unitorch.cli.writer¤

GeneralCsvWriter¤

Tip

core/writer/csv is the section for configuration of GeneralCsvWriter.

Bases: GenericWriter

Class for writing data in CSV format.

Initialize GeneralCsvWriter.

Parameters:

Name	Type	Description	Default
`output_file`	`str`	The path to the output file.	required
`nrows_per_sample`	`int`	The number of rows per sample. Defaults to None.	`None`
`header`	`bool`	Whether to include a header in the output file. Defaults to None.	`None`
`columns`	`List[str]`	The list of columns to include in the output file. Defaults to None.	`None`
`sep`	`str`	The separator for the CSV file. Defaults to " ".	`'\t'`
`quoting`	`int`	The quoting style for the CSV file. Defaults to 3.	`3`
`escapechar`	`str`	The escape character for the CSV file. Defaults to None.	`None`

Source code in src/unitorch/cli/writers/__init__.py

def __init__(
    self,
    output_file: str,
    nrows_per_sample: Optional[int] = None,
    header: Optional[bool] = None,
    columns: Optional[List[str]] = None,
    sep: Optional[str] = "\t",
    quoting: Optional[int] = 3,
    escapechar: Optional[str] = None,
):
    """
    Initialize GeneralCsvWriter.

    Args:
        output_file (str): The path to the output file.
        nrows_per_sample (int, optional): The number of rows per sample. Defaults to None.
        header (bool, optional): Whether to include a header in the output file. Defaults to None.
        columns (List[str], optional): The list of columns to include in the output file. Defaults to None.
        sep (str, optional): The separator for the CSV file. Defaults to "\t".
        quoting (int, optional): The quoting style for the CSV file. Defaults to 3.
        escapechar (str, optional): The escape character for the CSV file. Defaults to None.
    """
    self.header = header
    self.columns = columns
    self.sep = sep
    self.quoting = quoting
    self.escapechar = escapechar
    has_header = int(header is True)
    self.skip_n_samples = (
        0
        if nrows_per_sample is None or not os.path.exists(output_file)
        else (sum(1 for _ in open(output_file)) - has_header) // nrows_per_sample
    )
    if self.skip_n_samples == 0:
        self.output_file = open(output_file, "w", encoding="utf-8")
    else:
        self.output_file = open(output_file, "a", encoding="utf-8")

from_core_configure `classmethod` ¤

from_core_configure(config, **kwargs)

Create an instance of GeneralCsvWriter from a core configuration.

Parameters:

Name	Type	Description	Default
`config`		The core configuration.	required
`**kwargs`		Additional keyword arguments.	`{}`

Returns:

Name	Type	Description
`GeneralCsvWriter`		An instance of GeneralCsvWriter.

Source code in src/unitorch/cli/writers/__init__.py

@classmethod
@add_default_section_for_init("core/writer/csv")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of GeneralCsvWriter from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        GeneralCsvWriter: An instance of GeneralCsvWriter.
    """
    pass

process_chunk ¤

process_chunk(outputs: WriterOutputs)

Process a chunk of data during the writing process.

Parameters:

Name	Type	Description	Default
`outputs`	`WriterOutputs`	The writer outputs.	required

Source code in src/unitorch/cli/writers/__init__.py

def process_chunk(self, outputs: WriterOutputs):
    """
    Process a chunk of data during the writing process.

    Args:
        outputs (WriterOutputs): The writer outputs.
    """
    dataframe = outputs.to_pandas()
    if self.columns is not None:
        columns = set(dataframe.columns)
        dataframe = dataframe[[h for h in self.columns if h in columns]]
    string = dataframe.to_csv(
        index=False,
        sep=self.sep,
        quoting=self.quoting,
        header=False,
        escapechar=self.escapechar,
    )
    self.output_file.write(string)
    self.output_file.flush()

process_end ¤

process_end()

Process the end of the writing process.

Source code in src/unitorch/cli/writers/__init__.py

def process_end(self):
    """Process the end of the writing process."""
    self.output_file.close()

process_start ¤

process_start(outputs: WriterOutputs)

Process the start of the writing process.

Parameters:

Name	Type	Description	Default
`outputs`	`WriterOutputs`	The writer outputs.	required

Source code in src/unitorch/cli/writers/__init__.py

def process_start(self, outputs: WriterOutputs):
    """
    Process the start of the writing process.

    Args:
        outputs (WriterOutputs): The writer outputs.
    """
    dataframe = outputs.to_pandas()
    if self.columns is not None:
        columns = set(dataframe.columns)
        dataframe = dataframe[[h for h in self.columns if h in columns]]
    string = dataframe.to_csv(
        index=False,
        sep=self.sep,
        quoting=self.quoting,
        header=self.header and self.skip_n_samples == 0,
        escapechar=self.escapechar,
    )
    self.output_file.write(string)
    self.output_file.flush()

unitorch.cli.writer¤

GeneralCsvWriter¤

from_core_configure classmethod ¤

process_chunk ¤

process_end ¤

process_start ¤

from_core_configure `classmethod` ¤