unitorch.cli.models.peft¤

BloomLoraForClassification¤

Tip

classification/peft/lora/bloom is the section for configuration of BloomLoraForClassification.

Bases: BloomLoraForClassification

Initialize the BloomLoraForClassification model.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	The path to the model configuration file.	required
`lora_r`	`int`	The number of Lora ranks. Defaults to 16.	`16`
`lora_alpha`	`int`	The Lora alpha value. Defaults to 32.	`32`
`lora_dropout`	`float`	The Lora dropout rate. Defaults to 0.05.	`0.05`
`fan_in_fan_out`	`bool`	Whether to use fan-in/fan-out weight initialization. Defaults to True.	`True`
`target_modules`	`Union[List[str], str]`	The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].	`['query_key_value']`
`num_classes`	`int`	The number of classes. Defaults to 1.	`1`
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing during training. Defaults to False.	`False`

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["query_key_value"],
    num_classes: Optional[int] = 1,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initialize the BloomLoraForClassification model.

    Args:
        config_path (str): The path to the model configuration file.
        lora_r (int, optional): The number of Lora ranks. Defaults to 16.
        lora_alpha (int, optional): The Lora alpha value. Defaults to 32.
        lora_dropout (float, optional): The Lora dropout rate. Defaults to 0.05.
        fan_in_fan_out (bool, optional): Whether to use fan-in/fan-out weight initialization. Defaults to True.
        target_modules (Union[List[str], str], optional): The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].
        num_classes (int, optional): The number of classes. Defaults to 1.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing during training. Defaults to False.
    """
    super().__init__(
        config_path=config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        num_classes=num_classes,
        gradient_checkpointing=gradient_checkpointing,
    )

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Perform forward pass of the BloomLoraForClassification model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input IDs.	required
`attention_mask`	`Tensor`	The attention mask.	`None`
`position_ids`	`Tensor`	The position IDs.	`None`

Returns:

Name	Type	Description
`ClassificationOutputs`		The output of the classification task.

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Perform forward pass of the BloomLoraForClassification model.

    Args:
        input_ids (torch.Tensor): The input IDs.
        attention_mask (torch.Tensor, optional): The attention mask.
        position_ids (torch.Tensor, optional): The position IDs.

    Returns:
        ClassificationOutputs: The output of the classification task.
    """
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )
    return ClassificationOutputs(outputs=outputs)

from_core_configure `classmethod` ¤

from_core_configure(config, **kwargs)

Create an instance of BloomLoraForClassification from a core configuration.

Parameters:

Name	Type	Description	Default
`config`		The core configuration.	required
`**kwargs`		Additional keyword arguments.	`{}`

Returns:

Name	Type	Description
`BloomLoraForClassification`		The initialized BloomLoraForClassification instance.

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

@classmethod
@add_default_section_for_init("core/model/classification/peft/lora/bloom")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of BloomLoraForClassification from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        BloomLoraForClassification: The initialized BloomLoraForClassification instance.
    """
    config.set_default_section("core/model/classification/peft/lora/bloom")
    pretrained_name = config.getoption("pretrained_name", "bloom-560m")
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_bloom_infos, pretrained_name, "config"),
    )
    config_path = cached_path(config_path)

    lora_r = config.getoption("lora_r", 16)
    lora_alpha = config.getoption("lora_alpha", 32)
    lora_dropout = config.getoption("lora_dropout", 0.05)
    fan_in_fan_out = config.getoption("fan_in_fan_out", True)
    target_modules = config.getoption("target_modules", ["query_key_value"])

    gradient_checkpointing = config.getoption("gradient_checkpointing", False)
    num_classes = config.getoption("num_classes", 1)

    inst = cls(
        config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        num_classes=num_classes,
        gradient_checkpointing=gradient_checkpointing,
    )

    weight_path = []
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    pretrained_weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_bloom_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if pretrained_weight_path is not None:
        if isinstance(pretrained_weight_path, str):
            weight_path.append(pretrained_weight_path)
        elif isinstance(pretrained_weight_path, list):
            weight_path.extend(pretrained_weight_path)

    pretrained_lora_weight_path = config.getoption(
        "pretrained_lora_weight_path", None
    )
    if pretrained_lora_weight_path is not None:
        weight_path.append(pretrained_lora_weight_path)

    if len(weight_path) > 0:
        inst.from_pretrained(
            weight_path=weight_path,
        )

    return inst

BloomLoraForGeneration¤

Tip

core/model/generation/peft/lora/bloom is the section for configuration of BloomLoraForGeneration.

Bases: BloomLoraForGeneration

BloomLora model for generation tasks.

Initialize the BloomLoraForGeneration model.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	The path to the model configuration file.	required
`lora_r`	`int`	The number of Lora ranks. Defaults to 16.	`16`
`lora_alpha`	`int`	The Lora alpha value. Defaults to 32.	`32`
`lora_dropout`	`float`	The Lora dropout rate. Defaults to 0.05.	`0.05`
`fan_in_fan_out`	`bool`	Whether to use fan-in/fan-out weight initialization. Defaults to True.	`True`
`target_modules`	`Union[List[str], str]`	The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].	`['query_key_value']`
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing during training. Defaults to False.	`False`

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["query_key_value"],
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initialize the BloomLoraForGeneration model.

    Args:
        config_path (str): The path to the model configuration file.
        lora_r (int, optional): The number of Lora ranks. Defaults to 16.
        lora_alpha (int, optional): The Lora alpha value. Defaults to 32.
        lora_dropout (float, optional): The Lora dropout rate. Defaults to 0.05.
        fan_in_fan_out (bool, optional): Whether to use fan-in/fan-out weight initialization. Defaults to True.
        target_modules (Union[List[str], str], optional): The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing during training. Defaults to False.
    """
    super().__init__(
        config_path=config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        gradient_checkpointing=gradient_checkpointing,
    )

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Perform forward pass of the BloomLoraForGeneration model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input IDs.	required
`attention_mask`	`Tensor`	The attention mask.	`None`
`position_ids`	`Tensor`	The position IDs.	`None`

Returns:

Name	Type	Description
`GenerationOutputs`		The output of the generation task.

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Perform forward pass of the BloomLoraForGeneration model.

    Args:
        input_ids (torch.Tensor, optional): The input IDs.
        attention_mask (torch.Tensor, optional): The attention mask.
        position_ids (torch.Tensor, optional): The position IDs.

    Returns:
        GenerationOutputs: The output of the generation task.
    """
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )
    return GenerationOutputs(sequences=outputs)

from_core_configure `classmethod` ¤

from_core_configure(config, **kwargs)

Create an instance of BloomLoraForGeneration from a core configuration.

Parameters:

Name	Type	Description	Default
`config`		The core configuration.	required
`**kwargs`		Additional keyword arguments.	`{}`

Returns:

Name	Type	Description
`BloomLoraForGeneration`		The initialized BloomLoraForGeneration instance.

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

@classmethod
@add_default_section_for_init("core/model/generation/peft/lora/bloom")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of BloomLoraForGeneration from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        BloomLoraForGeneration: The initialized BloomLoraForGeneration instance.
    """
    config.set_default_section("core/model/generation/peft/lora/bloom")
    pretrained_name = config.getoption("pretrained_name", "bloom-560m")
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_bloom_infos, pretrained_name, "config"),
    )
    config_path = cached_path(config_path)

    lora_r = config.getoption("lora_r", 16)
    lora_alpha = config.getoption("lora_alpha", 32)
    lora_dropout = config.getoption("lora_dropout", 0.05)
    fan_in_fan_out = config.getoption("fan_in_fan_out", True)
    target_modules = config.getoption("target_modules", ["query_key_value"])

    gradient_checkpointing = config.getoption("gradient_checkpointing", False)

    inst = cls(
        config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        gradient_checkpointing=gradient_checkpointing,
    )

    weight_path = []
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    pretrained_weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_bloom_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if pretrained_weight_path is not None:
        if isinstance(pretrained_weight_path, str):
            weight_path.append(pretrained_weight_path)
        elif isinstance(pretrained_weight_path, list):
            weight_path.extend(pretrained_weight_path)

    pretrained_lora_weight_path = config.getoption(
        "pretrained_lora_weight_path", None
    )
    if pretrained_lora_weight_path is not None:
        weight_path.append(pretrained_lora_weight_path)

    if len(weight_path) > 0:
        inst.from_pretrained(
            weight_path=weight_path,
        )

    return inst

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate sequences using the Bloom model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input token IDs.	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	Decoder start token ID. Defaults to 0.	`1`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 1.	`2`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum generation sequence length. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum generation sequence length. Defaults to 48.	`48`
`repetition_penalty`	`float`	Repetition penalty. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to prevent repetition. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to perform early stopping. Defaults to True.	`True`
`length_penalty`	`float`	Length penalty. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Diversity penalty for diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k sampling parameter. Defaults to 50.	`50`
`top_p`	`float`	Top-p sampling parameter. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenerationOutputs`		The generation outputs.

Source code in src/unitorch/cli/models/peft/modeling_bloom.py

@add_default_section_for_function("core/model/generation/peft/lora/bloom")
@torch.no_grad()
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate sequences using the Bloom model.

    Args:
        input_ids (torch.Tensor): Input token IDs.
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): Decoder start token ID. Defaults to 0.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 1.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum generation sequence length. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum generation sequence length. Defaults to 48.
        repetition_penalty (float, optional): Repetition penalty. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to prevent repetition. Defaults to 0.
        early_stopping (bool, optional): Whether to perform early stopping. Defaults to True.
        length_penalty (float, optional): Length penalty. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Diversity penalty for diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k sampling parameter. Defaults to 50.
        top_p (float, optional): Top-p sampling parameter. Defaults to 1.0.

    Returns:
        GenerationOutputs: The generation outputs.
    """
    outputs = super().generate(
        input_ids,
        num_beams=num_beams,
        decoder_start_token_id=decoder_start_token_id,
        decoder_end_token_id=decoder_end_token_id,
        num_return_sequences=num_return_sequences,
        min_gen_seq_length=min_gen_seq_length,
        max_gen_seq_length=max_gen_seq_length,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )

    return GenerationOutputs(
        sequences=outputs.sequences,
        sequences_scores=outputs.sequences_scores,
    )

LlamaLoraForClassification¤

Tip

core/model/classification/peft/lora/llama is the section for configuration of LlamaLoraForClassification.

Bases: LlamaLoraForClassification

LlamaLora model for classification tasks.

Initialize the LlamaLoraForClassification model.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	The path to the model configuration file.	required
`lora_r`	`int`	The number of Lora ranks. Defaults to 16.	`16`
`lora_alpha`	`int`	The Lora alpha value. Defaults to 32.	`32`
`lora_dropout`	`float`	The Lora dropout rate. Defaults to 0.05.	`0.05`
`fan_in_fan_out`	`bool`	Whether to use fan-in/fan-out weight initialization. Defaults to True.	`True`
`target_modules`	`Union[List[str], str]`	The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].	`['q_proj', 'v_proj']`
`num_classes`	`int`	The number of classes. Defaults to 1.	`1`
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing during training. Defaults to False.	`False`

Source code in src/unitorch/cli/models/peft/modeling_llama.py

def __init__(
    self,
    config_path: str,
    quant_config_path: Optional[str] = None,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    num_classes: Optional[int] = 1,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initialize the LlamaLoraForClassification model.

    Args:
        config_path (str): The path to the model configuration file.
        lora_r (int, optional): The number of Lora ranks. Defaults to 16.
        lora_alpha (int, optional): The Lora alpha value. Defaults to 32.
        lora_dropout (float, optional): The Lora dropout rate. Defaults to 0.05.
        fan_in_fan_out (bool, optional): Whether to use fan-in/fan-out weight initialization. Defaults to True.
        target_modules (Union[List[str], str], optional): The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].
        num_classes (int, optional): The number of classes. Defaults to 1.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing during training. Defaults to False.
    """
    super().__init__(
        config_path=config_path,
        quant_config_path=quant_config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        num_classes=num_classes,
        gradient_checkpointing=gradient_checkpointing,
    )

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Perform forward pass of the LlamaLoraForClassification model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input IDs.	required
`attention_mask`	`Tensor`	The attention mask.	`None`
`position_ids`	`Tensor`	The position IDs.	`None`

Returns:

Name	Type	Description
`ClassificationOutputs`		The output of the classification task.

Source code in src/unitorch/cli/models/peft/modeling_llama.py

@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Perform forward pass of the LlamaLoraForClassification model.

    Args:
        input_ids (torch.Tensor): The input IDs.
        attention_mask (torch.Tensor, optional): The attention mask.
        position_ids (torch.Tensor, optional): The position IDs.

    Returns:
        ClassificationOutputs: The output of the classification task.
    """
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )
    return ClassificationOutputs(outputs=outputs)

from_core_configure `classmethod` ¤

from_core_configure(config, **kwargs)

Create an instance of LlamaLoraForClassification from a core configuration.

Parameters:

Name	Type	Description	Default
`config`		The core configuration.	required
`**kwargs`		Additional keyword arguments.	`{}`

Returns:

Name	Type	Description
`LlamaLoraForClassification`		The initialized LlamaLoraForClassification instance.

Source code in src/unitorch/cli/models/peft/modeling_llama.py

@classmethod
@add_default_section_for_init("core/model/classification/peft/lora/llama")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of LlamaLoraForClassification from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        LlamaLoraForClassification: The initialized LlamaLoraForClassification instance.
    """
    config.set_default_section("core/model/classification/peft/lora/llama")
    pretrained_name = config.getoption("pretrained_name", "llama-7b")
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_llama_infos, pretrained_name, "config"),
    )
    config_path = cached_path(config_path)

    quant_config_path = config.getoption("quant_config_path", None)
    if quant_config_path is not None:
        quant_config_path = cached_path(quant_config_path)

    lora_r = config.getoption("lora_r", 16)
    lora_alpha = config.getoption("lora_alpha", 32)
    lora_dropout = config.getoption("lora_dropout", 0.05)
    fan_in_fan_out = config.getoption("fan_in_fan_out", True)
    target_modules = config.getoption("target_modules", ["q_proj", "v_proj"])

    gradient_checkpointing = config.getoption("gradient_checkpointing", False)
    num_classes = config.getoption("num_classes", 1)

    inst = cls(
        config_path,
        quant_config_path=quant_config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        num_classes=num_classes,
        gradient_checkpointing=gradient_checkpointing,
    )

    weight_path = []
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    pretrained_weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_llama_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if pretrained_weight_path is not None:
        if isinstance(pretrained_weight_path, str):
            weight_path.append(pretrained_weight_path)
        elif isinstance(pretrained_weight_path, list):
            weight_path.extend(pretrained_weight_path)

    pretrained_lora_weight_path = config.getoption(
        "pretrained_lora_weight_path", None
    )
    if pretrained_lora_weight_path is not None:
        weight_path.append(pretrained_lora_weight_path)

    if len(weight_path) > 0:
        inst.from_pretrained(
            weight_path=weight_path,
        )

    return inst

LlamaLoraForGeneration¤

Tip

core/model/generation/peft/lora/llama is the section for configuration of LlamaLoraForGeneration.

Bases: LlamaLoraForGeneration

LlamaLora model for generation tasks.

Initialize the LlamaLoraForGeneration model.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	The path to the model configuration file.	required
`lora_r`	`int`	The number of Lora ranks. Defaults to 16.	`16`
`lora_alpha`	`int`	The Lora alpha value. Defaults to 32.	`32`
`lora_dropout`	`float`	The Lora dropout rate. Defaults to 0.05.	`0.05`
`fan_in_fan_out`	`bool`	Whether to use fan-in/fan-out weight initialization. Defaults to True.	`True`
`target_modules`	`Union[List[str], str]`	The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].	`['q_proj', 'v_proj']`
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing during training. Defaults to False.	`False`

Source code in src/unitorch/cli/models/peft/modeling_llama.py

def __init__(
    self,
    config_path: str,
    quant_config_path: Optional[str] = None,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initialize the LlamaLoraForGeneration model.

    Args:
        config_path (str): The path to the model configuration file.
        lora_r (int, optional): The number of Lora ranks. Defaults to 16.
        lora_alpha (int, optional): The Lora alpha value. Defaults to 32.
        lora_dropout (float, optional): The Lora dropout rate. Defaults to 0.05.
        fan_in_fan_out (bool, optional): Whether to use fan-in/fan-out weight initialization. Defaults to True.
        target_modules (Union[List[str], str], optional): The target modules for Lora regularization. Defaults to ["q_proj", "v_proj"].
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing during training. Defaults to False.
    """
    super().__init__(
        config_path=config_path,
        quant_config_path=quant_config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        gradient_checkpointing=gradient_checkpointing,
    )

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Perform forward pass of the LlamaLoraForGeneration model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input IDs.	required
`attention_mask`	`Tensor`	The attention mask.	`None`
`position_ids`	`Tensor`	The position IDs.	`None`

Returns:

Name	Type	Description
`GenerationOutputs`		The output of the generation task.

Source code in src/unitorch/cli/models/peft/modeling_llama.py

@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Perform forward pass of the LlamaLoraForGeneration model.

    Args:
        input_ids (torch.Tensor, optional): The input IDs.
        attention_mask (torch.Tensor, optional): The attention mask.
        position_ids (torch.Tensor, optional): The position IDs.

    Returns:
        GenerationOutputs: The output of the generation task.
    """
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )
    return GenerationOutputs(sequences=outputs)

from_core_configure `classmethod` ¤

from_core_configure(config, **kwargs)

Create an instance of LlamaLoraForGeneration from a core configuration.

Parameters:

Name	Type	Description	Default
`config`		The core configuration.	required
`**kwargs`		Additional keyword arguments.	`{}`

Returns:

Name	Type	Description
`LlamaLoraForGeneration`		The initialized LlamaLoraForGeneration instance.

Source code in src/unitorch/cli/models/peft/modeling_llama.py

@classmethod
@add_default_section_for_init("core/model/generation/peft/lora/llama")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of LlamaLoraForGeneration from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        LlamaLoraForGeneration: The initialized LlamaLoraForGeneration instance.
    """
    config.set_default_section("core/model/generation/peft/lora/llama")
    pretrained_name = config.getoption("pretrained_name", "llama-7b")
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_llama_infos, pretrained_name, "config"),
    )
    config_path = cached_path(config_path)
    quant_config_path = config.getoption("quant_config_path", None)
    if quant_config_path is not None:
        quant_config_path = cached_path(quant_config_path)

    lora_r = config.getoption("lora_r", 16)
    lora_alpha = config.getoption("lora_alpha", 32)
    lora_dropout = config.getoption("lora_dropout", 0.05)
    fan_in_fan_out = config.getoption("fan_in_fan_out", True)
    target_modules = config.getoption("target_modules", ["q_proj", "v_proj"])

    gradient_checkpointing = config.getoption("gradient_checkpointing", False)

    inst = cls(
        config_path,
        quant_config_path=quant_config_path,
        lora_r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
        gradient_checkpointing=gradient_checkpointing,
    )

    weight_path = []
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    pretrained_weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_llama_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if pretrained_weight_path is not None:
        if isinstance(pretrained_weight_path, str):
            weight_path.append(pretrained_weight_path)
        elif isinstance(pretrained_weight_path, list):
            weight_path.extend(pretrained_weight_path)

    pretrained_lora_weight_path = config.getoption(
        "pretrained_lora_weight_path", None
    )
    if pretrained_lora_weight_path is not None:
        weight_path.append(pretrained_lora_weight_path)

    if len(weight_path) > 0:
        inst.from_pretrained(
            weight_path=weight_path,
        )

    return inst

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate sequences using the Llama model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input token IDs.	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	Decoder start token ID. Defaults to 0.	`1`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 1.	`2`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum generation sequence length. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum generation sequence length. Defaults to 48.	`48`
`repetition_penalty`	`float`	Repetition penalty. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to prevent repetition. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to perform early stopping. Defaults to True.	`True`
`length_penalty`	`float`	Length penalty. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Diversity penalty for diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k sampling parameter. Defaults to 50.	`50`
`top_p`	`float`	Top-p sampling parameter. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenerationOutputs`		The generation outputs.

Source code in src/unitorch/cli/models/peft/modeling_llama.py

@add_default_section_for_function("core/model/generation/peft/lora/llama")
@torch.no_grad()
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate sequences using the Llama model.

    Args:
        input_ids (torch.Tensor): Input token IDs.
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): Decoder start token ID. Defaults to 0.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 1.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum generation sequence length. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum generation sequence length. Defaults to 48.
        repetition_penalty (float, optional): Repetition penalty. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to prevent repetition. Defaults to 0.
        early_stopping (bool, optional): Whether to perform early stopping. Defaults to True.
        length_penalty (float, optional): Length penalty. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Diversity penalty for diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k sampling parameter. Defaults to 50.
        top_p (float, optional): Top-p sampling parameter. Defaults to 1.0.

    Returns:
        GenerationOutputs: The generation outputs.
    """
    outputs = super().generate(
        input_ids,
        num_beams=num_beams,
        decoder_start_token_id=decoder_start_token_id,
        decoder_end_token_id=decoder_end_token_id,
        num_return_sequences=num_return_sequences,
        min_gen_seq_length=min_gen_seq_length,
        max_gen_seq_length=max_gen_seq_length,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )

    return GenerationOutputs(
        sequences=outputs.sequences,
        sequences_scores=outputs.sequences_scores,
    )

unitorch.cli.models.peft¤

BloomLoraForClassification¤

forward ¤

from_core_configure classmethod ¤

BloomLoraForGeneration¤

forward ¤

from_core_configure classmethod ¤

generate ¤

LlamaLoraForClassification¤

forward ¤

from_core_configure classmethod ¤

LlamaLoraForGeneration¤

forward ¤

from_core_configure classmethod ¤

generate ¤

from_core_configure `classmethod` ¤

from_core_configure `classmethod` ¤

from_core_configure `classmethod` ¤

from_core_configure `classmethod` ¤