Bases: GenericWriter
Write outputs to a JSONL file.
Source code in src/unitorch/cli/writers/__init__.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 | def __init__(
self,
output_file: str,
nrows_per_sample: Optional[int] = None,
header: Optional[bool] = None,
columns: Optional[List[str]] = None,
):
self.header = header
self.columns = columns
self.skip_n_samples = (
0
if nrows_per_sample is None or not os.path.exists(output_file)
else sum(1 for _ in open(output_file)) // nrows_per_sample
)
mode = "a" if self.skip_n_samples > 0 else "w"
self.output_file = open(output_file, mode, encoding="utf-8")
|
columns
instance-attribute
skip_n_samples
instance-attribute
skip_n_samples = (
0
if (nrows_per_sample is None or not exists(output_file))
else sum(1 for _ in (open(output_file)))
// nrows_per_sample
)
output_file
instance-attribute
output_file = open(output_file, mode, encoding='utf-8')
from_config
classmethod
from_config(config, **kwargs)
Source code in src/unitorch/cli/writers/__init__.py
| @classmethod
@config_defaults_init("core/writer/jsonl")
def from_config(cls, config, **kwargs):
pass
|
_write
_write(outputs: WriterOutputs)
Source code in src/unitorch/cli/writers/__init__.py
| def _write(self, outputs: "WriterOutputs"):
dataframe = outputs.to_pandas()
if self.columns is not None:
cols = set(dataframe.columns)
dataframe = dataframe[[c for c in self.columns if c in cols]]
self.output_file.write(dataframe.to_json(orient="records", lines=True))
self.output_file.flush()
|
process_start
process_start(outputs: WriterOutputs)
Source code in src/unitorch/cli/writers/__init__.py
| def process_start(self, outputs: "WriterOutputs"):
self._write(outputs)
|
process_chunk
process_chunk(outputs: WriterOutputs)
Source code in src/unitorch/cli/writers/__init__.py
| def process_chunk(self, outputs: "WriterOutputs"):
self._write(outputs)
|
process_end
Source code in src/unitorch/cli/writers/__init__.py
| def process_end(self):
self.output_file.close()
|