unitorch.models.peft¤

ClipLoraForMatching¤

Bases: GenericPeftModel, PeftWeightLoaderMixin

Source code in src/unitorch/models/peft/modeling_clip.py

def __init__(
    self,
    config_path: str,
    projection_dim: Optional[int] = 512,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
):
    super().__init__()
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.peft_model = PeftModelForSequenceClassification(
        ClipForMatching(config_path, projection_dim=projection_dim),
        self.peft_config,
    )
    self.classifier = nn.Linear(1, 1)

    self.init_weights()
    self.classifier.weight.data.fill_(5.0)

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^(?!peft_model\\.base_model\\.model\\.).*": "peft_model.base_model.model."
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints = ['lora', 'classifier']

replace_keys_in_peft_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_peft_state_dict = {
    ".weight": ".base_layer.weight",
    ".bias": ".base_layer.bias",
}

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

peft_model `instance-attribute` ¤

peft_model = PeftModelForSequenceClassification(
    ClipForMatching(
        config_path, projection_dim=projection_dim
    ),
    peft_config,
)

classifier `instance-attribute` ¤

classifier = Linear(1, 1)

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Tensor,
    attention_mask: Tensor,
    position_ids: Tensor,
)

Source code in src/unitorch/models/peft/modeling_clip.py

def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    attention_mask: torch.Tensor,
    position_ids: torch.Tensor,
):
    text_embeds, image_embeds = self.peft_model(
        input_ids=input_ids,
        pixel_values=pixel_values,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )

    text_embeds = text_embeds / text_embeds.norm(dim=-1, keepdim=True)
    image_embeds = image_embeds / image_embeds.norm(dim=-1, keepdim=True)
    scores = torch.sum(text_embeds * image_embeds, dim=-1, keepdim=True)

    outputs = self.classifier(scores)
    return outputs

ClipLoraForTextMatching¤

Bases: GenericPeftModel, PeftWeightLoaderMixin

Source code in src/unitorch/models/peft/modeling_clip.py

def __init__(
    self,
    config_path: str,
    projection_dim: Optional[int] = 512,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
):
    super().__init__()
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.peft_model = PeftModelForSequenceClassification(
        ClipForTextMatching(config_path, projection_dim=projection_dim),
        self.peft_config,
    )
    self.classifier = nn.Linear(1, 1)

    self.init_weights()
    self.classifier.weight.data.fill_(5.0)

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^(?!peft_model\\.base_model\\.model\\.).*": "peft_model.base_model.model."
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints = ['lora', 'classifier']

replace_keys_in_peft_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_peft_state_dict = {
    ".weight": ".base_layer.weight",
    ".bias": ".base_layer.bias",
}

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

peft_model `instance-attribute` ¤

peft_model = PeftModelForSequenceClassification(
    ClipForTextMatching(
        config_path, projection_dim=projection_dim
    ),
    peft_config,
)

classifier `instance-attribute` ¤

classifier = Linear(1, 1)

forward ¤

forward(
    query_input_ids: Tensor,
    query_attention_mask: Tensor,
    query_position_ids: Tensor,
    doc_input_ids: Tensor,
    doc_attention_mask: Tensor,
    doc_position_ids: Tensor,
)

Source code in src/unitorch/models/peft/modeling_clip.py

def forward(
    self,
    query_input_ids: torch.Tensor,
    query_attention_mask: torch.Tensor,
    query_position_ids: torch.Tensor,
    doc_input_ids: torch.Tensor,
    doc_attention_mask: torch.Tensor,
    doc_position_ids: torch.Tensor,
):
    query_embeds, doc_embeds = self.peft_model(
        query_input_ids=query_input_ids,
        query_attention_mask=query_attention_mask,
        query_position_ids=query_position_ids,
        doc_input_ids=doc_input_ids,
        doc_attention_mask=doc_attention_mask,
        doc_position_ids=doc_position_ids,
    )

    query_embeds = query_embeds / query_embeds.norm(dim=-1, keepdim=True)
    doc_embeds = doc_embeds / doc_embeds.norm(dim=-1, keepdim=True)
    scores = torch.sum(query_embeds * doc_embeds, dim=-1, keepdim=True)

    outputs = self.classifier(scores)
    return outputs

LlamaLoraForClassification¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_llama.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    num_classes: Optional[int] = 1,
    hidden_dropout_prob: Optional[float] = 0.1,
    freeze_classifer: Optional[bool] = True,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__()
    self.config = LlamaConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = LlamaModel(self.config)
    self.model.add_adapter(self.peft_config)
    self.dropout = nn.Dropout(hidden_dropout_prob)
    self.classifier = nn.Linear(self.config.hidden_size, num_classes)
    if freeze_classifer:
        for param in self.classifier.parameters():
            param.requires_grad = False
    self.init_weights()

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "v_proj.weight": "v_proj.base_layer.weight",
}

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints = ['lora', 'classifier']

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = LlamaModel(config)

dropout `instance-attribute` ¤

dropout = Dropout(hidden_dropout_prob)

classifier `instance-attribute` ¤

classifier = Linear(hidden_size, num_classes)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the classification model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, num_classes).

Source code in src/unitorch/models/peft/modeling_llama.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the classification model.

    Args:
        input_ids (torch.Tensor): Input tensor of shape (batch_size, sequence_length).
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, num_classes).
    """
    outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )[0]
    pooled_output = outputs[:, -1]
    pooled_output = self.dropout(pooled_output)
    logits = self.classifier(pooled_output)
    return logits

LlamaLoraForGeneration¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_llama.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__()
    self.config = LlamaConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.base_model = LlamaForCausalLM(self.config)
    self.base_model.add_adapter(self.peft_config)
    self.init_weights()

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^model.": "base_model.",
    "^lm_head.": "base_model.",
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "v_proj.weight": "v_proj.base_layer.weight",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

base_model `instance-attribute` ¤

base_model = LlamaForCausalLM(config)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_llama.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    outputs = self.base_model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        return_dict=True,
    )
    logits = outputs.logits
    return logits

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`1`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`2`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`48`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_llama.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    outputs = self.base_model.generate(
        input_ids=input_ids,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

LlavaMistralClipLoraForClassification¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_llava.py

def __init__(
    self,
    config_path: str,
    image_token_index: Optional[int] = 32000,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    num_classes: Optional[int] = 1,
    hidden_dropout_prob: Optional[float] = 0.1,
    freeze_multi_modal_projector: Optional[bool] = True,
    freeze_classifer: Optional[bool] = True,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__()
    self.config = LlavaNextConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.vision_tower = CLIPVisionModel(self.config.vision_config)
    self.multi_modal_projector = LlavaNextMultiModalProjector(self.config)
    embed_std = 1 / math.sqrt(self.config.text_config.hidden_size)
    self.image_newline = nn.Parameter(
        torch.randn(self.config.text_config.hidden_size, dtype=self.dtype)
        * embed_std
    )
    self.language_model = MistralModel(self.config.text_config)

    self.language_model.add_adapter(self.peft_config)
    self.dropout = nn.Dropout(hidden_dropout_prob)
    self.classifier = nn.Linear(self.config.text_config.hidden_size, num_classes)
    self.init_weights()

    if freeze_classifer:
        for param in self.classifier.parameters():
            param.requires_grad = False

    for param in self.vision_tower.parameters():
        param.requires_grad = False

    if freeze_multi_modal_projector:
        for param in self.multi_modal_projector.parameters():
            param.requires_grad = False

    self.image_token_index = image_token_index

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "(language_model.*?)q_proj\\.weight": "\\1q_proj.base_layer.weight",
    "(language_model.*?)v_proj\\.weight": "\\1v_proj.base_layer.weight",
    "language_model.": "",
}

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints = [
    "lora",
    "multi_modal_projector",
    "classifier",
]

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

vision_tower `instance-attribute` ¤

vision_tower = CLIPVisionModel(vision_config)

multi_modal_projector `instance-attribute` ¤

multi_modal_projector = LlavaNextMultiModalProjector(config)

image_newline `instance-attribute` ¤

image_newline = Parameter(
    randn(hidden_size, dtype=dtype) * embed_std
)

language_model `instance-attribute` ¤

language_model = MistralModel(text_config)

dropout `instance-attribute` ¤

dropout = Dropout(hidden_dropout_prob)

classifier `instance-attribute` ¤

classifier = Linear(hidden_size, num_classes)

image_token_index `instance-attribute` ¤

image_token_index = image_token_index

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Tensor,
    attention_mask: Optional[Tensor] = None,
)

Forward pass of the classification model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	required

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, num_classes).

Source code in src/unitorch/models/peft/modeling_llava.py

def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the classification model.

    Args:
        input_ids (torch.Tensor): Input tensor of shape (batch_size, sequence_length).
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, num_classes).
    """
    vision_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
    image_embeds = vision_outputs.hidden_states[-2][:, 1:]
    image_embeds = self.multi_modal_projector(image_embeds)
    image_embeds = torch.cat(
        [
            image_embeds,
            self.image_newline.expand(
                image_embeds.shape[0], 1, image_embeds.shape[-1]
            ),
        ],
        dim=1,
    )

    image_seq_length = image_embeds.size(1)
    batch_indices, text_indices = torch.where(input_ids != self.image_token_index)
    image_masks = (input_ids == self.image_token_index).long() * (
        image_seq_length - 1
    )
    new_positions = torch.cumsum(image_masks + 1, dim=1) - 1
    new_text_indices = new_positions[batch_indices, text_indices]

    input_ids[input_ids == self.image_token_index] = 0
    text_embeds = self.language_model.get_input_embeddings()(input_ids)

    batch_size, text_seq_length, text_dim = text_embeds.size()

    if attention_mask is None:
        attention_mask = torch.ones(batch_size, text_seq_length).to(
            text_embeds.device
        )

    final_embeds = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1, text_dim
    ).to(text_embeds.device)
    overwrite_masks = torch.ones(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(text_embeds.device)
    overwrite_masks[batch_indices, new_text_indices] = 0
    final_embeds[overwrite_masks == 0] = text_embeds[
        batch_indices, text_indices
    ].to(final_embeds)
    final_embeds[overwrite_masks == 1] = (
        image_embeds.contiguous().view(-1, text_dim).to(final_embeds)
    )
    final_masks = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(attention_mask)
    final_masks[overwrite_masks == 0] = attention_mask[
        batch_indices, text_indices
    ].to(final_masks)
    final_masks[overwrite_masks == 1] = 1
    position_ids = (final_masks.cumsum(dim=1) - 1).masked_fill(final_masks == 0, -1)

    outputs = self.language_model(
        inputs_embeds=final_embeds,
        attention_mask=final_masks,
        position_ids=position_ids,
    )[0]
    pooled_output = outputs[:, -1]
    pooled_output = self.dropout(pooled_output)
    logits = self.classifier(pooled_output)
    return logits

LlavaMistralClipLoraForGeneration¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_llava.py

def __init__(
    self,
    config_path: str,
    image_token_index: Optional[int] = 32000,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    freeze_multi_modal_projector: Optional[bool] = True,
    gradient_checkpointing: Optional[bool] = False,
    pad_token_id: Optional[int] = 0,
):
    super().__init__()
    self.config = LlavaNextConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.config.pad_token_id = pad_token_id
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.vision_tower = CLIPVisionModel(self.config.vision_config)
    self.multi_modal_projector = LlavaNextMultiModalProjector(self.config)
    embed_std = 1 / math.sqrt(self.config.text_config.hidden_size)
    self.image_newline = nn.Parameter(
        torch.randn(self.config.text_config.hidden_size, dtype=self.dtype)
        * embed_std
    )
    self.language_model = MistralForCausalLM(self.config.text_config)

    self.language_model.add_adapter(self.peft_config)
    self.init_weights()

    for param in self.vision_tower.parameters():
        param.requires_grad = False

    if freeze_multi_modal_projector:
        for param in self.multi_modal_projector.parameters():
            param.requires_grad = False

    self.image_token_index = image_token_index

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "(language_model.*?)q_proj\\.weight": "\\1q_proj.base_layer.weight",
    "(language_model.*?)v_proj\\.weight": "\\1v_proj.base_layer.weight",
    "language_model\\.": "model.",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

vision_tower `instance-attribute` ¤

vision_tower = CLIPVisionModel(vision_config)

multi_modal_projector `instance-attribute` ¤

multi_modal_projector = LlavaNextMultiModalProjector(config)

image_newline `instance-attribute` ¤

image_newline = Parameter(
    randn(hidden_size, dtype=dtype) * embed_std
)

language_model `instance-attribute` ¤

language_model = MistralForCausalLM(text_config)

image_token_index `instance-attribute` ¤

image_token_index = image_token_index

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_llava.py

def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    vision_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
    image_embeds = vision_outputs.hidden_states[-2][:, 1:]
    image_embeds = self.multi_modal_projector(image_embeds)
    image_embeds = torch.cat(
        [
            image_embeds,
            self.image_newline.expand(
                image_embeds.shape[0], 1, image_embeds.shape[-1]
            ),
        ],
        dim=1,
    )

    image_seq_length = image_embeds.size(1)
    batch_indices, text_indices = torch.where(input_ids != self.image_token_index)
    image_masks = (input_ids == self.image_token_index).long() * (
        image_seq_length - 1
    )
    new_positions = torch.cumsum(image_masks + 1, dim=1) - 1
    new_text_indices = new_positions[batch_indices, text_indices]

    input_ids[input_ids == self.image_token_index] = 0
    text_embeds = self.language_model.get_input_embeddings()(input_ids)

    batch_size, text_seq_length, text_dim = text_embeds.size()

    if attention_mask is None:
        attention_mask = torch.ones(batch_size, text_seq_length).to(
            text_embeds.device
        )

    final_embeds = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1, text_dim
    ).to(text_embeds.device)
    overwrite_masks = torch.ones(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(text_embeds.device)
    overwrite_masks[batch_indices, new_text_indices] = 0
    final_embeds[overwrite_masks == 0] = text_embeds[
        batch_indices, text_indices
    ].to(final_embeds)
    final_embeds[overwrite_masks == 1] = (
        image_embeds.contiguous().view(-1, text_dim).to(final_embeds)
    )
    final_masks = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(attention_mask)
    final_masks[overwrite_masks == 0] = attention_mask[
        batch_indices, text_indices
    ].to(final_masks)
    final_masks[overwrite_masks == 1] = 1
    position_ids = (final_masks.cumsum(dim=1) - 1).masked_fill(final_masks == 0, -1)

    outputs = self.language_model(
        inputs_embeds=final_embeds,
        attention_mask=final_masks,
        position_ids=position_ids,
    )
    logits = torch.zeros(batch_size, text_seq_length, outputs.logits.size(-1)).to(
        outputs.logits.device
    )
    logits[batch_indices, text_indices] = outputs.logits[
        batch_indices, new_text_indices
    ]
    return logits

generate ¤

generate(
    input_ids: Tensor,
    pixel_values: Tensor,
    attention_mask: Optional[Tensor] = None,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`1`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`2`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`48`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_llava.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    vision_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
    image_embeds = vision_outputs.hidden_states[-2][:, 1:]
    image_embeds = self.multi_modal_projector(image_embeds)
    image_embeds = torch.cat(
        [
            image_embeds,
            self.image_newline.expand(
                image_embeds.shape[0], 1, image_embeds.shape[-1]
            ),
        ],
        dim=1,
    )

    image_seq_length = image_embeds.size(1)
    batch_indices, text_indices = torch.where(input_ids != self.image_token_index)
    image_masks = (input_ids == self.image_token_index).long() * (
        image_seq_length - 1
    )
    new_positions = torch.cumsum(image_masks + 1, dim=1) - 1
    new_text_indices = new_positions[batch_indices, text_indices]

    input_ids[input_ids == self.image_token_index] = 0
    text_embeds = self.language_model.get_input_embeddings()(input_ids)

    batch_size = text_embeds.size(0)
    text_seq_length, image_seq_length = text_embeds.size(1), image_embeds.size(1)
    text_dim = text_embeds.size(2)

    if attention_mask is None:
        attention_mask = torch.ones(batch_size, text_seq_length).to(
            text_embeds.device
        )

    final_embeds = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1, text_dim
    ).to(text_embeds.device)
    overwrite_masks = torch.ones(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(text_embeds.device)
    overwrite_masks[batch_indices, new_text_indices] = 0
    final_embeds[overwrite_masks == 0] = text_embeds[
        batch_indices, text_indices
    ].to(final_embeds)
    final_embeds[overwrite_masks == 1] = (
        image_embeds.contiguous().view(-1, text_dim).to(final_embeds)
    )
    final_masks = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(attention_mask)
    final_masks[overwrite_masks == 0] = attention_mask[
        batch_indices, text_indices
    ].to(final_masks)
    final_masks[overwrite_masks == 1] = 1
    input_seq_length = final_embeds.size(1)
    outputs = self.language_model.generate(
        inputs_embeds=final_embeds,
        attention_mask=final_masks,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

LlavaLlamaSiglipLoraForGeneration¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_llava.py

def __init__(
    self,
    config_path: str,
    image_token_index: Optional[int] = 128077,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    freeze_multi_modal_projector: Optional[bool] = True,
    gradient_checkpointing: Optional[bool] = False,
    pad_token_id: Optional[int] = 0,
):
    super().__init__()
    self.config = LlavaConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.config.pad_token_id = pad_token_id
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.vision_tower = SiglipVisionModel(self.config.vision_config)
    self.multi_modal_projector = LlavaMultiModalProjector(self.config)
    self.language_model = LlamaForCausalLM(self.config.text_config)

    self.language_model.add_adapter(self.peft_config)
    self.init_weights()

    for param in self.vision_tower.parameters():
        param.requires_grad = False

    if freeze_multi_modal_projector:
        for param in self.multi_modal_projector.parameters():
            param.requires_grad = False

    self.image_token_index = image_token_index

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "(language_model.*?)q_proj\\.weight": "\\1q_proj.base_layer.weight",
    "(language_model.*?)v_proj\\.weight": "\\1v_proj.base_layer.weight",
    "language_model\\.": "model.",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

vision_tower `instance-attribute` ¤

vision_tower = SiglipVisionModel(vision_config)

multi_modal_projector `instance-attribute` ¤

multi_modal_projector = LlavaMultiModalProjector(config)

language_model `instance-attribute` ¤

language_model = LlamaForCausalLM(text_config)

image_token_index `instance-attribute` ¤

image_token_index = image_token_index

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_llava.py

def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    vision_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
    image_embeds = vision_outputs.hidden_states[-2]
    image_embeds = self.multi_modal_projector(image_embeds)
    image_seq_length = image_embeds.size(1)
    batch_indices, text_indices = torch.where(input_ids != self.image_token_index)
    image_masks = (input_ids == self.image_token_index).long() * (
        image_seq_length - 1
    )
    new_positions = torch.cumsum(image_masks + 1, dim=1) - 1
    new_text_indices = new_positions[batch_indices, text_indices]

    input_ids[input_ids == self.image_token_index] = 0
    text_embeds = self.language_model.get_input_embeddings()(input_ids)

    batch_size, text_seq_length, text_dim = text_embeds.size()

    if attention_mask is None:
        attention_mask = torch.ones(batch_size, text_seq_length).to(
            text_embeds.device
        )

    final_embeds = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1, text_dim
    ).to(text_embeds.device)
    overwrite_masks = torch.ones(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(text_embeds.device)
    overwrite_masks[batch_indices, new_text_indices] = 0
    final_embeds[overwrite_masks == 0] = text_embeds[
        batch_indices, text_indices
    ].to(final_embeds)
    final_embeds[overwrite_masks == 1] = (
        image_embeds.contiguous().view(-1, text_dim).to(final_embeds)
    )
    final_masks = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(attention_mask)
    final_masks[overwrite_masks == 0] = attention_mask[
        batch_indices, text_indices
    ].to(final_masks)
    final_masks[overwrite_masks == 1] = 1
    position_ids = (final_masks.cumsum(dim=1) - 1).masked_fill(final_masks == 0, -1)

    outputs = self.language_model(
        inputs_embeds=final_embeds,
        attention_mask=final_masks,
        position_ids=position_ids,
    )
    logits = torch.zeros(batch_size, text_seq_length, outputs.logits.size(-1)).to(
        outputs.logits.device
    )
    logits[batch_indices, text_indices] = outputs.logits[
        batch_indices, new_text_indices
    ]
    return logits

generate ¤

generate(
    input_ids: Tensor,
    pixel_values: Tensor,
    attention_mask: Optional[Tensor] = None,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 128000,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = None,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`128000`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`None`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`48`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_llava.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 128000,
    decoder_end_token_id: Optional[Union[int, List[int]]] = None,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    vision_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
    image_embeds = vision_outputs.hidden_states[-2]
    image_embeds = self.multi_modal_projector(image_embeds)
    image_seq_length = image_embeds.size(1)
    batch_indices, text_indices = torch.where(input_ids != self.image_token_index)
    image_masks = (input_ids == self.image_token_index).long() * (
        image_seq_length - 1
    )
    new_positions = torch.cumsum(image_masks + 1, dim=1) - 1
    new_text_indices = new_positions[batch_indices, text_indices]

    input_ids[input_ids == self.image_token_index] = 0
    text_embeds = self.language_model.get_input_embeddings()(input_ids)

    batch_size = text_embeds.size(0)
    text_seq_length, image_seq_length = text_embeds.size(1), image_embeds.size(1)
    text_dim = text_embeds.size(2)

    if attention_mask is None:
        attention_mask = torch.ones(batch_size, text_seq_length).to(
            text_embeds.device
        )

    final_embeds = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1, text_dim
    ).to(text_embeds.device)
    overwrite_masks = torch.ones(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(text_embeds.device)
    overwrite_masks[batch_indices, new_text_indices] = 0
    final_embeds[overwrite_masks == 0] = text_embeds[
        batch_indices, text_indices
    ].to(final_embeds)
    final_embeds[overwrite_masks == 1] = (
        image_embeds.contiguous().view(-1, text_dim).to(final_embeds)
    )
    final_masks = torch.zeros(
        batch_size, text_seq_length + image_seq_length - 1
    ).to(attention_mask)
    final_masks[overwrite_masks == 0] = attention_mask[
        batch_indices, text_indices
    ].to(final_masks)
    final_masks[overwrite_masks == 1] = 1
    input_seq_length = final_embeds.size(1)
    outputs = self.language_model.generate(
        inputs_embeds=final_embeds,
        attention_mask=final_masks,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

MistralLoraForClassification¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_mistral.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    num_classes: Optional[int] = 1,
    hidden_dropout_prob: Optional[float] = 0.1,
    freeze_classifer: Optional[bool] = True,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__()
    self.config = MistralConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = MistralModel(self.config)
    self.model.add_adapter(self.peft_config)
    self.dropout = nn.Dropout(hidden_dropout_prob)
    self.classifier = nn.Linear(self.config.hidden_size, num_classes)
    if freeze_classifer:
        for param in self.classifier.parameters():
            param.requires_grad = False
    self.init_weights()

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "v_proj.weight": "v_proj.base_layer.weight",
}

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints = ['lora', 'classifier']

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = MistralModel(config)

dropout `instance-attribute` ¤

dropout = Dropout(hidden_dropout_prob)

classifier `instance-attribute` ¤

classifier = Linear(hidden_size, num_classes)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the classification model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, num_classes).

Source code in src/unitorch/models/peft/modeling_mistral.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the classification model.

    Args:
        input_ids (torch.Tensor): Input tensor of shape (batch_size, sequence_length).
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, num_classes).
    """
    outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
    )[0]
    pooled_output = outputs[:, -1]
    pooled_output = self.dropout(pooled_output)
    logits = self.classifier(pooled_output)
    return logits

MistralLoraForGeneration¤

Bases: GenericPeftModel

Source code in src/unitorch/models/peft/modeling_mistral.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
    pad_token_id: Optional[int] = 0,
):
    super().__init__()
    self.config = MistralConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.config.pad_token_id = pad_token_id
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.base_model = MistralForCausalLM(self.config)
    self.base_model.add_adapter(self.peft_config)
    self.init_weights()

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^model.": "base_model.",
    "^lm_head.": "base_model.",
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "v_proj.weight": "v_proj.base_layer.weight",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

base_model `instance-attribute` ¤

base_model = MistralForCausalLM(config)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_mistral.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    outputs = self.base_model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        position_ids=position_ids,
        return_dict=True,
    )
    logits = outputs.logits
    return logits

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`1`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`2`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`48`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_mistral.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    outputs = self.base_model.generate(
        input_ids=input_ids,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

QWen3LoraForGeneration¤

Bases: GenericPeftModel

QWen3 LoRA model for text generation.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	Path to the model configuration file.	required
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing. Defaults to False.	`False`

Source code in src/unitorch/models/peft/modeling_qwen.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
):
    """
    QWen3 LoRA model for text generation.

    Args:
        config_path (str): Path to the model configuration file.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = Qwen3Config.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = Qwen3ForCausalLM(self.config)
    self.model.add_adapter(self.peft_config)
    self.init_weights()

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^(?!model\\.model\\.).*": "model."
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = Qwen3ForCausalLM(config)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`attention_mask`	`Tensor`	Attention mask tensor. Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_qwen.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor): Input tensor of shape (batch_size, sequence_length).
        attention_mask (torch.Tensor, optional): Attention mask tensor. Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    outputs = self.model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        return_dict=True,
    )
    logits = outputs.logits
    return logits

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`151643`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`151645`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`512`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_qwen.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    outputs = self.model.generate(
        input_ids=input_ids,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        pad_token_id=decoder_pad_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

QWen3DPOLoraForGeneration¤

Bases: GenericPeftModel

QWen3 DPO LoRA model for preference optimization.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	Path to the model configuration file.	required
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing. Defaults to False.	`False`
`dpo_beta`	`float`	DPO beta coefficient. Defaults to 0.1.	`0.1`

Source code in src/unitorch/models/peft/modeling_qwen.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
    dpo_beta: Optional[float] = 0.1,
):
    """
    QWen3 DPO LoRA model for preference optimization.

    Args:
        config_path (str): Path to the model configuration file.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
        dpo_beta (float, optional): DPO beta coefficient. Defaults to 0.1.
    """
    super().__init__()
    self.config = Qwen3Config.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = Qwen3ForCausalLM(self.config)
    self.model.add_adapter(self.peft_config)
    self.init_weights()
    self.dpo_beta = dpo_beta

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^(?!model\\.model\\.).*": "model."
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = Qwen3ForCausalLM(config)

dpo_beta `instance-attribute` ¤

dpo_beta = dpo_beta

forward ¤

forward(
    input_ids: Tensor,
    win_input_ids: Tensor,
    lose_input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    win_attention_mask: Optional[Tensor] = None,
    lose_attention_mask: Optional[Tensor] = None,
)

Forward pass computing DPO loss.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Prompt input IDs.	required
`win_input_ids`	`Tensor`	Winning response IDs.	required
`lose_input_ids`	`Tensor`	Losing response IDs.	required
`attention_mask`	`Tensor`	Prompt attention mask. Defaults to None.	`None`
`win_attention_mask`	`Tensor`	Win response attention mask. Defaults to None.	`None`
`lose_attention_mask`	`Tensor`	Lose response attention mask. Defaults to None.	`None`

Returns:

Type	Description
	torch.Tensor: DPO loss scalar.

Source code in src/unitorch/models/peft/modeling_qwen.py

def forward(
    self,
    input_ids: torch.Tensor,
    win_input_ids: torch.Tensor,
    lose_input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    win_attention_mask: Optional[torch.Tensor] = None,
    lose_attention_mask: Optional[torch.Tensor] = None,
):
    """
    Forward pass computing DPO loss.

    Args:
        input_ids (torch.Tensor): Prompt input IDs.
        win_input_ids (torch.Tensor): Winning response IDs.
        lose_input_ids (torch.Tensor): Losing response IDs.
        attention_mask (torch.Tensor, optional): Prompt attention mask. Defaults to None.
        win_attention_mask (torch.Tensor, optional): Win response attention mask. Defaults to None.
        lose_attention_mask (torch.Tensor, optional): Lose response attention mask. Defaults to None.

    Returns:
        torch.Tensor: DPO loss scalar.
    """
    win_input_ids = torch.cat([input_ids, win_input_ids], dim=1)
    lose_input_ids = torch.cat([input_ids, lose_input_ids], dim=1)
    if attention_mask is not None and win_attention_mask is not None:
        win_attention_mask = torch.cat([attention_mask, win_attention_mask], dim=1)
    if attention_mask is not None and lose_attention_mask is not None:
        lose_attention_mask = torch.cat(
            [attention_mask, lose_attention_mask], dim=1
        )
    win_outputs = self.model(
        input_ids=win_input_ids,
        attention_mask=win_attention_mask,
        return_dict=True,
    )
    lose_outputs = self.model(
        input_ids=lose_input_ids,
        attention_mask=lose_attention_mask,
        return_dict=True,
    )
    input_seq_length = input_ids.size(1)
    win_logits = win_outputs.logits[:, input_seq_length - 1 : -1, :]
    lose_logits = lose_outputs.logits[:, input_seq_length - 1 : -1, :]
    win_labels = win_input_ids[:, input_seq_length:]
    lose_labels = lose_input_ids[:, input_seq_length:]
    win_labels_mask = win_attention_mask[:, input_seq_length:]
    lose_labels_mask = lose_attention_mask[:, input_seq_length:]
    win_nll_loss = F.cross_entropy(
        win_logits.reshape(-1, win_logits.size(-1)),
        win_labels.reshape(-1),
        reduction="none",
    ).reshape(win_labels.size(0), -1)
    win_logprobs = -win_nll_loss * win_labels_mask
    lose_nll_loss = F.cross_entropy(
        lose_logits.reshape(-1, lose_logits.size(-1)),
        lose_labels.reshape(-1),
        reduction="none",
    ).reshape(lose_labels.size(0), -1)
    lose_logprobs = -lose_nll_loss * lose_labels_mask

    with torch.no_grad():
        self.model.disable_adapters()
        ref_win_outputs = self.model(
            input_ids=win_input_ids,
            attention_mask=win_attention_mask,
            return_dict=True,
        )
        ref_lose_outputs = self.model(
            input_ids=lose_input_ids,
            attention_mask=lose_attention_mask,
            return_dict=True,
        )
        ref_win_logits = ref_win_outputs.logits[:, input_seq_length - 1 : -1, :]
        ref_lose_logits = ref_lose_outputs.logits[:, input_seq_length - 1 : -1, :]
        ref_win_nll_loss = F.cross_entropy(
            ref_win_logits.reshape(-1, ref_win_logits.size(-1)),
            win_labels.reshape(-1),
            reduction="none",
        ).reshape(win_labels.size(0), -1)
        ref_win_logprobs = -ref_win_nll_loss * win_labels_mask
        ref_lose_nll_loss = F.cross_entropy(
            ref_lose_logits.reshape(-1, ref_lose_logits.size(-1)),
            lose_labels.reshape(-1),
            reduction="none",
        ).reshape(lose_labels.size(0), -1)
        ref_lose_logprobs = -ref_lose_nll_loss * lose_labels_mask
        self.model.enable_adapters()

    logratios = win_logprobs - lose_logprobs
    ref_logratios = ref_win_logprobs - ref_lose_logprobs
    logits = logratios - ref_logratios
    loss = -F.logsigmoid(self.dpo_beta * logits).mean()
    return loss

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`151643`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`151645`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`512`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_qwen.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    outputs = self.model.generate(
        input_ids=input_ids,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        pad_token_id=decoder_pad_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

QWen3GRPOLoraForGeneration¤

Bases: GenericPeftModel

QWen3 GRPO LoRA model for group relative policy optimization.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	Path to the model configuration file.	required
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing. Defaults to False.	`False`

Source code in src/unitorch/models/peft/modeling_qwen.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
):
    """
    QWen3 GRPO LoRA model for group relative policy optimization.

    Args:
        config_path (str): Path to the model configuration file.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = Qwen3Config.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = Qwen3ForCausalLM(self.config)
    self.model.add_adapter(self.peft_config)
    self.init_weights()

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^(?!model\\.model\\.).*": "model."
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = Qwen3ForCausalLM(config)

forward ¤

forward(
    input_ids: Tensor,
    sampled_ids: Tensor,
    sampled_rewards: Tensor,
    attention_mask: Optional[Tensor] = None,
    sampled_attention_mask: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	required

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_qwen.py

def forward(
    self,
    input_ids: torch.Tensor,
    sampled_ids: torch.Tensor,
    sampled_rewards: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    sampled_attention_mask: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    mean_rewards = sampled_rewards.mean(dim=-1)
    std_rewards = sampled_rewards.std(dim=-1)
    adv_rewards = sampled_rewards - mean_rewards.unsqueeze(-1)
    adv_rewards = adv_rewards / (std_rewards.unsqueeze(-1) + 1e-5)
    adv_rewards = adv_rewards.detach().reshape(-1)

    # 重复 input_ids 和 attention_mask
    G = sampled_ids.size(1)
    repeated_input_ids = input_ids.repeat_interleave(G, dim=0)  # [B*G, L_in]
    repeated_attention = (
        attention_mask.repeat_interleave(G, dim=0)
        if attention_mask is not None
        else None
    )
    sampled_seq_length = sampled_ids.size(-1)
    all_input_ids = torch.cat(
        [repeated_input_ids, sampled_ids.view(-1, sampled_seq_length)], dim=1
    )  # [B*G, L_in + L_out]

    if repeated_attention is not None:
        all_attention_mask = torch.cat(
            [
                repeated_attention,
                sampled_attention_mask.view(-1, sampled_seq_length),
            ],
            dim=1,
        )
    else:
        all_attention_mask = None

    all_outputs = self.model(
        input_ids=all_input_ids,
        attention_mask=all_attention_mask,
        return_dict=True,
    )

    input_seq_length = input_ids.size(1)
    logits = all_outputs.logits[:, input_seq_length - 1 : -1, :]

    labels = all_input_ids[:, input_seq_length:]
    labels_mask = all_attention_mask[:, input_seq_length:]
    nll_loss = F.cross_entropy(
        logits.reshape(-1, logits.size(-1)),
        labels.reshape(-1),
        reduction="none",
    ).reshape(labels.size(0), -1)
    logprobs = -nll_loss * labels_mask

    with torch.no_grad():
        self.model.disable_adapters()
        ref_outputs = self.model(
            input_ids=all_input_ids,
            attention_mask=all_attention_mask,
            return_dict=True,
        )
        ref_logits = ref_outputs.logits[:, input_seq_length - 1 : -1, :]
        ref_nll_loss = F.cross_entropy(
            ref_logits.reshape(-1, ref_logits.size(-1)),
            labels.reshape(-1),
            reduction="none",
        ).reshape(labels.size(0), -1)
        ref_logprobs = -ref_nll_loss * labels_mask
        self.model.enable_adapters()

    logprob_mean = logprobs.sum(-1) / (labels_mask.sum(-1) + 1e-5)
    ref_logprob_mean = ref_logprobs.sum(-1) / (labels_mask.sum(-1) + 1e-5)

    log_ratio = logprob_mean - ref_logprob_mean.detach()
    kl_div = 0.02 * (log_ratio**2)
    loss = (-adv_rewards.reshape(-1) * log_ratio + kl_div).mean()

    return loss

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`151643`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`151645`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`512`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_qwen.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    outputs = self.model.generate(
        input_ids=input_ids,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        pad_token_id=decoder_pad_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

QWen3VLLoraForGeneration¤

Bases: GenericPeftModel

QWen3-VL LoRA model for multimodal generation.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	Path to the model configuration file.	required
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing. Defaults to False.	`False`

Source code in src/unitorch/models/peft/modeling_qwen_vl.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
):
    """
    QWen3-VL LoRA model for multimodal generation.

    Args:
        config_path (str): Path to the model configuration file.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = Qwen3VLConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = Qwen3VLForConditionalGeneration(self.config)
    self.model.add_adapter(self.peft_config)
    self.init_weights()

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^visual.": "model.model.",
    "^model(?!\\.model).": "model.model.language_",
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = Qwen3VLForConditionalGeneration(config)

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Tensor,
    image_grid_thw: Tensor,
    attention_mask: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	required

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_qwen_vl.py

def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    image_grid_thw: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    image_grid_thw = image_grid_thw.view(-1, image_grid_thw.size(-1))
    pixel_values = pixel_values.view(-1, pixel_values.size(-1))
    outputs = self.model(
        input_ids=input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        attention_mask=attention_mask,
        return_dict=True,
    )
    logits = outputs.logits
    return logits

generate ¤

generate(
    input_ids: Tensor,
    pixel_values: Tensor,
    image_grid_thw: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`151643`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`151645`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`512`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_qwen_vl.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    image_grid_thw: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    image_grid_thw = image_grid_thw.view(-1, image_grid_thw.size(-1))
    pixel_values = pixel_values.view(-1, pixel_values.size(-1))
    outputs = self.model.generate(
        input_ids=input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        pad_token_id=decoder_pad_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

QWen3VLDPOLoraForGeneration¤

Bases: GenericPeftModel

QWen3-VL LoRA model for multimodal generation.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	Path to the model configuration file.	required
`gradient_checkpointing`	`bool`	Whether to use gradient checkpointing. Defaults to False.	`False`

Source code in src/unitorch/models/peft/modeling_qwen_vl.py

def __init__(
    self,
    config_path: str,
    lora_r: Optional[int] = 16,
    lora_alpha: Optional[int] = 32,
    lora_dropout: Optional[float] = 0.05,
    fan_in_fan_out: Optional[bool] = True,
    target_modules: Optional[Union[List[str], str]] = ["q_proj", "v_proj"],
    gradient_checkpointing: Optional[bool] = False,
    dpo_beta: Optional[float] = 0.1,
):
    """
    QWen3-VL LoRA model for multimodal generation.

    Args:
        config_path (str): Path to the model configuration file.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = Qwen3VLConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        fan_in_fan_out=fan_in_fan_out,
        target_modules=target_modules,
    )
    self.model = Qwen3VLForConditionalGeneration(self.config)
    self.model.add_adapter(self.peft_config)
    self.init_weights()
    self.dpo_beta = dpo_beta

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

prefix_keys_in_state_dict = {
    "^visual.": "model.model.",
    "^model(?!\\.model).": "model.model.language_",
}

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict = {
    "q_proj.weight": "q_proj.base_layer.weight",
    "q_proj.bias": "q_proj.base_layer.bias",
    "v_proj.weight": "v_proj.base_layer.weight",
    "v_proj.bias": "v_proj.base_layer.bias",
}

config `instance-attribute` ¤

config = from_json_file(config_path)

peft_config `instance-attribute` ¤

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    fan_in_fan_out=fan_in_fan_out,
    target_modules=target_modules,
)

model `instance-attribute` ¤

model = Qwen3VLForConditionalGeneration(config)

dpo_beta `instance-attribute` ¤

dpo_beta = dpo_beta

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Tensor,
    image_grid_thw: Tensor,
    win_input_ids: Tensor,
    lose_input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    win_attention_mask: Optional[Tensor] = None,
    lose_attention_mask: Optional[Tensor] = None,
)

Forward pass of the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length). Defaults to None.	required
`attention_mask`	`Tensor`	Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.	`None`
`position_ids`	`Tensor`	Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.	required

Returns:

Type	Description
	torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).

Source code in src/unitorch/models/peft/modeling_qwen_vl.py

def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    image_grid_thw: torch.Tensor,
    win_input_ids: torch.Tensor,
    lose_input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    win_attention_mask: Optional[torch.Tensor] = None,
    lose_attention_mask: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the generation model.

    Args:
        input_ids (torch.Tensor, optional): Input tensor of shape (batch_size, sequence_length). Defaults to None.
        attention_mask (torch.Tensor, optional): Attention mask tensor of shape (batch_size, sequence_length). Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs tensor of shape (batch_size, sequence_length). Defaults to None.

    Returns:
        torch.Tensor: Output logits of shape (batch_size, sequence_length, vocab_size).
    """
    image_grid_thw = image_grid_thw.view(-1, image_grid_thw.size(-1))
    pixel_values = pixel_values.view(-1, pixel_values.size(-1))
    win_input_ids = torch.cat([input_ids, win_input_ids], dim=1)
    lose_input_ids = torch.cat([input_ids, lose_input_ids], dim=1)
    if attention_mask is not None and win_attention_mask is not None:
        win_attention_mask = torch.cat([attention_mask, win_attention_mask], dim=1)
    if attention_mask is not None and lose_attention_mask is not None:
        lose_attention_mask = torch.cat(
            [attention_mask, lose_attention_mask], dim=1
        )
    win_outputs = self.model(
        input_ids=win_input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        attention_mask=win_attention_mask,
        return_dict=True,
    )
    lose_outputs = self.model(
        input_ids=lose_input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        attention_mask=lose_attention_mask,
        return_dict=True,
    )
    input_seq_length = input_ids.size(1)
    win_logits = win_outputs.logits[:, input_seq_length - 1 : -1, :]
    lose_logits = lose_outputs.logits[:, input_seq_length - 1 : -1, :]
    win_labels = win_input_ids[:, input_seq_length:]
    lose_labels = lose_input_ids[:, input_seq_length:]
    win_labels_mask = win_attention_mask[:, input_seq_length:]
    lose_labels_mask = lose_attention_mask[:, input_seq_length:]
    win_nll_loss = F.cross_entropy(
        win_logits.reshape(-1, win_logits.size(-1)),
        win_labels.reshape(-1),
        reduction="none",
    ).reshape(win_labels.size(0), -1)
    win_logprobs = -win_nll_loss * win_labels_mask
    lose_nll_loss = F.cross_entropy(
        lose_logits.reshape(-1, lose_logits.size(-1)),
        lose_labels.reshape(-1),
        reduction="none",
    ).reshape(lose_labels.size(0), -1)
    lose_logprobs = -lose_nll_loss * lose_labels_mask

    with torch.no_grad():
        self.model.disable_adapters()
        ref_win_outputs = self.model(
            input_ids=win_input_ids,
            attention_mask=win_attention_mask,
            return_dict=True,
        )
        ref_lose_outputs = self.model(
            input_ids=lose_input_ids,
            attention_mask=lose_attention_mask,
            return_dict=True,
        )
        ref_win_logits = ref_win_outputs.logits[:, input_seq_length - 1 : -1, :]
        ref_lose_logits = ref_lose_outputs.logits[:, input_seq_length - 1 : -1, :]
        ref_win_nll_loss = F.cross_entropy(
            ref_win_logits.reshape(-1, ref_win_logits.size(-1)),
            win_labels.reshape(-1),
            reduction="none",
        ).reshape(win_labels.size(0), -1)
        ref_win_logprobs = -ref_win_nll_loss * win_labels_mask
        ref_lose_nll_loss = F.cross_entropy(
            ref_lose_logits.reshape(-1, ref_lose_logits.size(-1)),
            lose_labels.reshape(-1),
            reduction="none",
        ).reshape(lose_labels.size(0), -1)
        ref_lose_logprobs = -ref_lose_nll_loss * lose_labels_mask
        self.model.enable_adapters()

    logratios = win_logprobs - lose_logprobs
    ref_logratios = ref_win_logprobs - ref_lose_logprobs
    logits = logratios - ref_logratios
    loss = -F.logsigmoid(self.dpo_beta * logits).mean()
    return loss

generate ¤

generate(
    input_ids: Tensor,
    pixel_values: Tensor,
    image_grid_thw: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Generate text using the generation model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	Input tensor of shape (batch_size, sequence_length).	required
`num_beams`	`int`	Number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`int`	The ID of the decoder start token. Defaults to 2.	`151643`
`decoder_end_token_id`	`int or List[int]`	The ID(s) of the decoder end token(s). Defaults to 2.	`151645`
`num_return_sequences`	`int`	Number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`int`	Minimum length of generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`int`	Maximum length of generated sequences. Defaults to 48.	`512`
`repetition_penalty`	`float`	Penalty for repeated tokens. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`int`	Size of n-grams to avoid repeating. Defaults to 0.	`0`
`early_stopping`	`bool`	Whether to stop generation early. Defaults to True.	`True`
`length_penalty`	`float`	Penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`int`	Number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`float`	Penalty for diverse sequences in diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`bool`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`float`	Sampling temperature. Defaults to 1.0.	`1.0`
`top_k`	`int`	Top-k value for sampling. Defaults to 50.	`50`
`top_p`	`float`	Top-p value for sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`		Generated sequences and their scores.

Source code in src/unitorch/models/peft/modeling_qwen_vl.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    image_grid_thw: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    """
    Generate text using the generation model.

    Args:
        input_ids: Input tensor of shape (batch_size, sequence_length).
        num_beams (int, optional): Number of beams for beam search. Defaults to 5.
        decoder_start_token_id (int, optional): The ID of the decoder start token. Defaults to 2.
        decoder_end_token_id (int or List[int], optional): The ID(s) of the decoder end token(s). Defaults to 2.
        num_return_sequences (int, optional): Number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (int, optional): Minimum length of generated sequences. Defaults to 0.
        max_gen_seq_length (int, optional): Maximum length of generated sequences. Defaults to 48.
        repetition_penalty (float, optional): Penalty for repeated tokens. Defaults to 1.0.
        no_repeat_ngram_size (int, optional): Size of n-grams to avoid repeating. Defaults to 0.
        early_stopping (bool, optional): Whether to stop generation early. Defaults to True.
        length_penalty (float, optional): Penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (int, optional): Number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (float, optional): Penalty for diverse sequences in diverse beam search. Defaults to 0.0.
        do_sample (bool, optional): Whether to use sampling for generation. Defaults to False.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        top_k (int, optional): Top-k value for sampling. Defaults to 50.
        top_p (float, optional): Top-p value for sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: Generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    image_grid_thw = image_grid_thw.view(-1, image_grid_thw.size(-1))
    pixel_values = pixel_values.view(-1, pixel_values.size(-1))
    outputs = self.model.generate(
        input_ids=input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        pad_token_id=decoder_pad_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )

unitorch.models.peft¤

ClipLoraForMatching¤

prefix_keys_in_state_dict class-attribute instance-attribute ¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

modules_to_save_checkpoints class-attribute instance-attribute ¤

replace_keys_in_peft_state_dict class-attribute instance-attribute ¤

peft_config instance-attribute ¤

peft_model instance-attribute ¤

classifier instance-attribute ¤

forward ¤

ClipLoraForTextMatching¤

prefix_keys_in_state_dict class-attribute instance-attribute ¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

modules_to_save_checkpoints class-attribute instance-attribute ¤

replace_keys_in_peft_state_dict class-attribute instance-attribute ¤

peft_config instance-attribute ¤

peft_model instance-attribute ¤

classifier instance-attribute ¤

forward ¤

LlamaLoraForClassification¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

modules_to_save_checkpoints class-attribute instance-attribute ¤

config instance-attribute ¤

peft_config instance-attribute ¤

model instance-attribute ¤

dropout instance-attribute ¤

classifier instance-attribute ¤

forward ¤

LlamaLoraForGeneration¤

prefix_keys_in_state_dict class-attribute instance-attribute ¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

config instance-attribute ¤

peft_config instance-attribute ¤

base_model instance-attribute ¤

forward ¤

generate ¤

LlavaMistralClipLoraForClassification¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

modules_to_save_checkpoints class-attribute instance-attribute ¤

config instance-attribute ¤

peft_config instance-attribute ¤

vision_tower instance-attribute ¤

multi_modal_projector instance-attribute ¤

image_newline instance-attribute ¤

language_model instance-attribute ¤

dropout instance-attribute ¤

classifier instance-attribute ¤

image_token_index instance-attribute ¤

forward ¤

LlavaMistralClipLoraForGeneration¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

config instance-attribute ¤

peft_config instance-attribute ¤

vision_tower instance-attribute ¤

multi_modal_projector instance-attribute ¤

image_newline instance-attribute ¤

language_model instance-attribute ¤

image_token_index instance-attribute ¤

forward ¤

generate ¤

LlavaLlamaSiglipLoraForGeneration¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

config instance-attribute ¤

peft_config instance-attribute ¤

vision_tower instance-attribute ¤

multi_modal_projector instance-attribute ¤

language_model instance-attribute ¤

image_token_index instance-attribute ¤

forward ¤

generate ¤

MistralLoraForClassification¤

replace_keys_in_state_dict class-attribute instance-attribute ¤

modules_to_save_checkpoints class-attribute instance-attribute ¤

config instance-attribute ¤

peft_config instance-attribute ¤

model instance-attribute ¤

dropout instance-attribute ¤

classifier instance-attribute ¤

forward ¤

MistralLoraForGeneration¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

replace_keys_in_peft_state_dict `class-attribute` `instance-attribute` ¤

peft_config `instance-attribute` ¤

peft_model `instance-attribute` ¤

classifier `instance-attribute` ¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

replace_keys_in_peft_state_dict `class-attribute` `instance-attribute` ¤

peft_config `instance-attribute` ¤

peft_model `instance-attribute` ¤

classifier `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

model `instance-attribute` ¤

dropout `instance-attribute` ¤

classifier `instance-attribute` ¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

base_model `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

vision_tower `instance-attribute` ¤

multi_modal_projector `instance-attribute` ¤

image_newline `instance-attribute` ¤

language_model `instance-attribute` ¤

dropout `instance-attribute` ¤

classifier `instance-attribute` ¤

image_token_index `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

vision_tower `instance-attribute` ¤

multi_modal_projector `instance-attribute` ¤

image_newline `instance-attribute` ¤

language_model `instance-attribute` ¤

image_token_index `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

vision_tower `instance-attribute` ¤

multi_modal_projector `instance-attribute` ¤

language_model `instance-attribute` ¤

image_token_index `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

modules_to_save_checkpoints `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

model `instance-attribute` ¤

dropout `instance-attribute` ¤

classifier `instance-attribute` ¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

base_model `instance-attribute` ¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

model `instance-attribute` ¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

model `instance-attribute` ¤

dpo_beta `instance-attribute` ¤

prefix_keys_in_state_dict `class-attribute` `instance-attribute` ¤

replace_keys_in_state_dict `class-attribute` `instance-attribute` ¤

config `instance-attribute` ¤

peft_config `instance-attribute` ¤

model `instance-attribute` ¤