unitorch.cli.writer¤

GeneralJsonlWriter¤

Tip

core/writer/jsonl is the section for configuration of GeneralJsonlWriter.

Bases: GenericWriter

Class for writing data in JSONL format.

Initialize GeneralJsonlWriter.

Parameters:

Name	Type	Description	Default
`output_file`	`str`	The path to the output file.	required
`nrows_per_sample`	`int`	The number of rows per sample. Defaults to None.	`None`
`header`	`bool`	Whether to include a header in the output file. Defaults to None.	`None`
`columns`	`List[str]`	The list of columns to include in the output file. Defaults to None.	`None`

Source code in src/unitorch/cli/writers/__init__.py

def __init__(
    self,
    output_file: str,
    nrows_per_sample: Optional[int] = None,
    header: Optional[bool] = None,
    columns: Optional[List[str]] = None,
):
    """
    Initialize GeneralJsonlWriter.

    Args:
        output_file (str): The path to the output file.
        nrows_per_sample (int, optional): The number of rows per sample. Defaults to None.
        header (bool, optional): Whether to include a header in the output file. Defaults to None.
        columns (List[str], optional): The list of columns to include in the output file. Defaults to None.
    """
    self.header = header
    self.columns = columns
    self.skip_n_samples = (
        0
        if nrows_per_sample is None or not os.path.exists(output_file)
        else sum(1 for _ in open(output_file)) // nrows_per_sample
    )
    if self.skip_n_samples == 0:
        self.output_file = open(output_file, "w", encoding="utf-8")
    else:
        self.output_file = open(output_file, "a", encoding="utf-8")

from_core_configure `classmethod` ¤

from_core_configure(config, **kwargs)

Create an instance of GeneralJsonlWriter from a core configuration.

Parameters:

Name	Type	Description	Default
`config`		The core configuration.	required
`**kwargs`		Additional keyword arguments.	`{}`

Returns:

Name	Type	Description
`GeneralJsonlWriter`		An instance of GeneralJsonlWriter.

Source code in src/unitorch/cli/writers/__init__.py

@classmethod
@add_default_section_for_init("core/writer/jsonl")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of GeneralJsonlWriter from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        GeneralJsonlWriter: An instance of GeneralJsonlWriter.
    """
    pass

process_chunk ¤

process_chunk(outputs: WriterOutputs)

Process a chunk of data during the writing process.

Parameters:

Name	Type	Description	Default
`outputs`	`WriterOutputs`	The writer outputs.	required

Source code in src/unitorch/cli/writers/__init__.py

def process_chunk(self, outputs: WriterOutputs):
    """
    Process a chunk of data during the writing process.

    Args:
        outputs (WriterOutputs): The writer outputs.
    """
    dataframe = outputs.to_pandas()
    if self.columns is not None:
        columns = set(dataframe.columns)
        dataframe = dataframe[[h for h in self.columns if h in columns]]
    string = dataframe.to_json(orient="records", lines=True)
    self.output_file.write(string)
    self.output_file.flush()

process_end ¤

process_end()

Process the end of the writing process.

Source code in src/unitorch/cli/writers/__init__.py

def process_end(self):
    """Process the end of the writing process."""
    self.output_file.close()

process_start ¤

process_start(outputs: WriterOutputs)

Process the start of the writing process.

Parameters:

Name	Type	Description	Default
`outputs`	`WriterOutputs`	The writer outputs.	required

Source code in src/unitorch/cli/writers/__init__.py

def process_start(self, outputs: WriterOutputs):
    """
    Process the start of the writing process.

    Args:
        outputs (WriterOutputs): The writer outputs.
    """
    dataframe = outputs.to_pandas()
    if self.columns is not None:
        columns = set(dataframe.columns)
        dataframe = dataframe[[h for h in self.columns if h in columns]]
    string = dataframe.to_json(orient="records", lines=True)
    self.output_file.write(string)
    self.output_file.flush()

unitorch.cli.writer¤

GeneralJsonlWriter¤

from_core_configure classmethod ¤

process_chunk ¤

process_end ¤

process_start ¤

from_core_configure `classmethod` ¤