unitorch.cli.models.qwen¤

QWenProcessor¤

Tip

core/process/qwen is the section for configuration of QWenProcessor.

Bases: QWenProcessor

Processor for Bloom language models.

Source code in src/unitorch/cli/models/qwen/processing.py

def __init__(
    self,
    tokenizer_file: str,
    tokenizer_config: Optional[str] = None,
    special_tokens_map: Optional[str] = None,
    chat_template: Optional[str] = None,
    max_seq_length: Optional[int] = 128,
    max_gen_seq_length: Optional[int] = 128,
):
    super().__init__(
        tokenizer_file=tokenizer_file,
        tokenizer_config=tokenizer_config,
        special_tokens_map=special_tokens_map,
        chat_template=chat_template,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )

from_config `classmethod` ¤

from_config(config, **kwargs)

Source code in src/unitorch/cli/models/qwen/processing.py

@classmethod
@config_defaults_init("core/process/qwen")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/process/qwen")
    pretrained_name = config.getoption("pretrained_name", "qwen3-4b-thinking")
    tokenizer_file = config.getoption("tokenizer_file", None)
    tokenizer_file = pop_value(
        tokenizer_file,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "tokenizer"),
    )
    tokenizer_file = cached_path(tokenizer_file)

    tokenizer_config = config.getoption("tokenizer_config", None)
    tokenizer_config = pop_value(
        tokenizer_config,
        nested_dict_value(
            pretrained_qwen_infos, pretrained_name, "tokenizer_config"
        ),
        check_none=False,
    )
    tokenizer_config = (
        cached_path(tokenizer_config) if tokenizer_config is not None else None
    )

    special_tokens_map = config.getoption("special_tokens_map", None)
    special_tokens_map = pop_value(
        special_tokens_map,
        nested_dict_value(
            pretrained_qwen_infos, pretrained_name, "special_tokens_map"
        ),
        check_none=False,
    )
    special_tokens_map = (
        cached_path(special_tokens_map) if special_tokens_map is not None else None
    )

    chat_template = config.getoption("chat_template", None)
    chat_template = pop_value(
        chat_template,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "chat_template"),
        check_none=False,
    )
    chat_template = (
        cached_path(chat_template) if chat_template is not None else None
    )

    return {
        "tokenizer_file": tokenizer_file,
        "tokenizer_config": tokenizer_config,
        "special_tokens_map": special_tokens_map,
        "chat_template": chat_template,
    }

_chat_template ¤

_chat_template(messages: List[Dict[str, Any]])

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/chat_template")
def _chat_template(
    self,
    messages: List[Dict[str, Any]],
):
    return super().chat_template(messages=messages)

_generation_inputs ¤

_generation_inputs(
    text: str, max_seq_length: Optional[int] = None
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/generation/inputs")
def _generation_inputs(
    self,
    text: str,
    max_seq_length: Optional[int] = None,
):
    outputs = super().generation_inputs(
        text=text,
        max_seq_length=max_seq_length,
    )
    return TensorInputs(input_ids=outputs.input_ids)

_generation_labels ¤

_generation_labels(
    text: str, max_gen_seq_length: Optional[int] = None
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/generation/labels")
def _generation_labels(
    self,
    text: str,
    max_gen_seq_length: Optional[int] = None,
):
    outputs = super().generation_labels(
        text=text,
        max_gen_seq_length=max_gen_seq_length,
    )
    return GenerationTargets(
        refs=outputs.input_ids,
        masks=outputs.attention_mask,
    )

_generation ¤

_generation(
    text: str,
    text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/generation")
def _generation(
    self,
    text: str,
    text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    outputs = super().generation(
        text=text,
        text_pair=text_pair,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
    ), GenerationTargets(
        refs=outputs.input_ids_label,
        masks=outputs.attention_mask_label,
    )

_dpo_generation ¤

_dpo_generation(
    text: str,
    win_text_pair: str,
    lose_text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/dpo/generation")
def _dpo_generation(
    self,
    text: str,
    win_text_pair: str,
    lose_text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    inputs = super().generation_inputs(
        text=text,
        max_seq_length=max_seq_length,
    )
    win_labels = super().generation_labels(
        text=win_text_pair,
        max_gen_seq_length=max_gen_seq_length,
    )
    lose_labels = super().generation_labels(
        text=lose_text_pair,
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        win_input_ids=win_labels.input_ids,
        win_attention_mask=win_labels.attention_mask,
        lose_input_ids=lose_labels.input_ids,
        lose_attention_mask=lose_labels.attention_mask,
    )

_messages_generation ¤

_messages_generation(
    messages: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/messages/generation")
def _messages_generation(
    self,
    messages: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
):
    outputs = super().messages_generation(
        messages=messages,
        max_seq_length=max_seq_length,
    )
    return TensorInputs(
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
    ), GenerationTargets(
        refs=outputs.input_ids_label,
        masks=outputs.attention_mask_label,
    )

_messages_dpo_generation ¤

_messages_dpo_generation(
    messages: List[Dict[str, Any]],
    win_messages: List[Dict[str, Any]],
    lose_messages: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/messages/dpo/generation")
def _messages_dpo_generation(
    self,
    messages: List[Dict[str, Any]],
    win_messages: List[Dict[str, Any]],
    lose_messages: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    if isinstance(messages, dict):
        messages = [messages]
    if isinstance(win_messages, dict):
        win_messages = [win_messages]
    if isinstance(lose_messages, dict):
        lose_messages = [lose_messages]
    inputs = super().generation_inputs(
        text=super().chat_template(messages=messages),
        max_seq_length=max_seq_length,
    )
    win_labels = super().generation_labels(
        text=super().chat_template(messages=win_messages),
        max_gen_seq_length=max_gen_seq_length,
    )
    lose_labels = super().generation_labels(
        text=super().chat_template(messages=lose_messages),
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        win_input_ids=win_labels.input_ids,
        win_attention_mask=win_labels.attention_mask,
        lose_input_ids=lose_labels.input_ids,
        lose_attention_mask=lose_labels.attention_mask,
    )

_messages_grpo_generation ¤

_messages_grpo_generation(
    messages: List[Dict[str, Any]],
    messages_labels: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/process/qwen/messages/grpo/generation")
def _messages_grpo_generation(
    self,
    messages: List[Dict[str, Any]],
    messages_labels: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    inputs = super().generation_inputs(
        text=super().chat_template(messages=messages),
        max_seq_length=max_seq_length,
    )
    labels = super().generation_labels(
        text=super().chat_template(messages=messages_labels),
        max_gen_seq_length=max_gen_seq_length,
    )
    sample_ids = labels.input_ids
    sample_attention_mask = labels.attention_mask
    sampled_rewards = torch.tensor(1.0).float()
    return TensorInputs(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        sampled_ids=torch.stack([sample_ids, sample_ids], dim=0),
        sampled_attention_mask=torch.stack(
            [sample_attention_mask, sample_attention_mask], dim=0
        ),
        sampled_rewards=torch.stack([sampled_rewards, sampled_rewards], dim=0),
    )

_detokenize ¤

_detokenize(outputs: GenerationOutputs)

Source code in src/unitorch/cli/models/qwen/processing.py

@register_process("core/postprocess/qwen/detokenize")
def _detokenize(
    self,
    outputs: GenerationOutputs,
):
    results = outputs.to_pandas()
    assert results.shape[0] == 0 or results.shape[0] == outputs.sequences.shape[0]

    decoded = super().detokenize(sequences=outputs.sequences)

    def cleanup_string(text: str) -> str:
        # Strip any thinking-mode CoT blocks emitted by Qwen3 reasoning models.
        text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
        # Collapse newlines into spaces for TSV/CSV output compatibility.
        text = re.sub(r"\n", " ", text)
        return text.strip()

    if isinstance(decoded[0], list):
        decoded = [list(map(cleanup_string, sequence)) for sequence in decoded]
    elif isinstance(decoded[0], str):
        decoded = list(map(cleanup_string, decoded))
    else:
        raise ValueError(
            f"Unsupported type for Qwen detokenize: {type(decoded[0])}"
        )
    results["decoded"] = decoded
    return WriterOutputs(results)

QWenVLProcessor¤

Tip

core/process/qwen_vl is the section for configuration of QWenVLProcessor.

Bases: QWenVLProcessor

Processor for Bloom language models.

Source code in src/unitorch/cli/models/qwen/processing_vl.py

def __init__(
    self,
    tokenizer_file: str,
    vision_config_path: str,
    tokenizer_config: Optional[str] = None,
    special_tokens_map: Optional[str] = None,
    chat_template: Optional[str] = None,
    max_seq_length: Optional[int] = 128,
    max_gen_seq_length: Optional[int] = 128,
):
    super().__init__(
        tokenizer_file=tokenizer_file,
        vision_config_path=vision_config_path,
        tokenizer_config=tokenizer_config,
        special_tokens_map=special_tokens_map,
        chat_template=chat_template,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )

from_config `classmethod` ¤

from_config(config, **kwargs)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@classmethod
@config_defaults_init("core/process/qwen_vl")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/process/qwen_vl")
    pretrained_name = config.getoption("pretrained_name", "qwen3-vl-8b-instruct")
    tokenizer_file = config.getoption("tokenizer_file", None)
    tokenizer_file = pop_value(
        tokenizer_file,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "tokenizer"),
    )
    tokenizer_file = cached_path(tokenizer_file)

    vision_config_path = config.getoption("vision_config_path", None)
    vision_config_path = pop_value(
        vision_config_path,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "vision_config"),
    )
    vision_config_path = cached_path(vision_config_path)

    tokenizer_config = config.getoption("tokenizer_config", None)
    tokenizer_config = pop_value(
        tokenizer_config,
        nested_dict_value(
            pretrained_qwen_infos, pretrained_name, "tokenizer_config"
        ),
        check_none=False,
    )
    tokenizer_config = (
        cached_path(tokenizer_config) if tokenizer_config is not None else None
    )

    special_tokens_map = config.getoption("special_tokens_map", None)
    special_tokens_map = pop_value(
        special_tokens_map,
        nested_dict_value(
            pretrained_qwen_infos, pretrained_name, "special_tokens_map"
        ),
        check_none=False,
    )
    special_tokens_map = (
        cached_path(special_tokens_map) if special_tokens_map is not None else None
    )

    chat_template = config.getoption("chat_template", None)
    chat_template = pop_value(
        chat_template,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "chat_template"),
        check_none=False,
    )
    chat_template = (
        cached_path(chat_template) if chat_template is not None else None
    )

    return {
        "tokenizer_file": tokenizer_file,
        "vision_config_path": vision_config_path,
        "tokenizer_config": tokenizer_config,
        "special_tokens_map": special_tokens_map,
        "chat_template": chat_template,
    }

_chat_template ¤

_chat_template(messages: List[Dict[str, Any]])

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/chat_template")
def _chat_template(
    self,
    messages: List[Dict[str, Any]],
):
    return super().chat_template(messages=messages)

_generation_inputs ¤

_generation_inputs(
    text: str,
    images: Union[Image, str, List[Image], List[str]],
    max_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/generation/inputs")
def _generation_inputs(
    self,
    text: str,
    images: Union[Image.Image, str, List[Image.Image], List[str]],
    max_seq_length: Optional[int] = None,
):
    outputs = super().generation_inputs(
        text=text,
        images=images,
        max_seq_length=max_seq_length,
    )
    return TensorInputs(
        input_ids=outputs.input_ids,
        pixel_values=outputs.pixel_values,
        image_grid_thw=outputs.image_grid_thw,
    )

_generation_labels ¤

_generation_labels(
    text: str, max_gen_seq_length: Optional[int] = None
)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/generation/labels")
def _generation_labels(
    self,
    text: str,
    max_gen_seq_length: Optional[int] = None,
):
    outputs = super().generation_labels(
        text=text,
        max_gen_seq_length=max_gen_seq_length,
    )
    return GenerationTargets(
        refs=outputs.input_ids,
        masks=outputs.attention_mask,
    )

_generation ¤

_generation(
    text: str,
    images: Union[Image, str, List[Image], List[str]],
    text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/generation")
def _generation(
    self,
    text: str,
    images: Union[Image.Image, str, List[Image.Image], List[str]],
    text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    outputs = super().generation(
        text=text,
        text_pair=text_pair,
        images=images,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
        pixel_values=outputs.pixel_values,
        image_grid_thw=outputs.image_grid_thw,
    ), GenerationTargets(
        refs=outputs.input_ids_label,
        masks=outputs.attention_mask_label,
    )

_dpo_generation ¤

_dpo_generation(
    text: str,
    images: Union[Image, str, List[Image], List[str]],
    win_text_pair: str,
    lose_text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/dpo/generation")
def _dpo_generation(
    self,
    text: str,
    images: Union[Image.Image, str, List[Image.Image], List[str]],
    win_text_pair: str,
    lose_text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    inputs = super().generation_inputs(
        text=text,
        images=images,
        max_seq_length=max_seq_length,
    )
    win_labels = super().generation_labels(
        text=win_text_pair,
        max_gen_seq_length=max_gen_seq_length,
    )
    lose_labels = super().generation_labels(
        text=lose_text_pair,
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        pixel_values=inputs.pixel_values,
        image_grid_thw=inputs.image_grid_thw,
        win_input_ids=win_labels.input_ids,
        win_attention_mask=win_labels.attention_mask,
        lose_input_ids=lose_labels.input_ids,
        lose_attention_mask=lose_labels.attention_mask,
    )

_messages_generation ¤

_messages_generation(
    messages: List[Dict[str, Any]],
    images: Union[Image, str, List[Image], List[str]],
    max_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/messages/generation")
def _messages_generation(
    self,
    messages: List[Dict[str, Any]],
    images: Union[Image.Image, str, List[Image.Image], List[str]],
    max_seq_length: Optional[int] = None,
):
    outputs = super().messages_generation(
        messages=messages,
        images=images,
        max_seq_length=max_seq_length,
    )
    return TensorInputs(
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
        pixel_values=outputs.pixel_values,
        image_grid_thw=outputs.image_grid_thw,
    ), GenerationTargets(
        refs=outputs.input_ids_label,
        masks=outputs.attention_mask_label,
    )

_messages_dpo_generation ¤

_messages_dpo_generation(
    messages: List[Dict[str, Any]],
    images: Union[Image, str, List[Image], List[str]],
    win_messages: List[Dict[str, Any]],
    lose_messages: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/process/qwen_vl/messages/dpo/generation")
def _messages_dpo_generation(
    self,
    messages: List[Dict[str, Any]],
    images: Union[Image.Image, str, List[Image.Image], List[str]],
    win_messages: List[Dict[str, Any]],
    lose_messages: List[Dict[str, Any]],
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    if isinstance(messages, dict):
        messages = [messages]
    if isinstance(win_messages, dict):
        win_messages = [win_messages]
    if isinstance(lose_messages, dict):
        lose_messages = [lose_messages]
    inputs = super().generation_inputs(
        text=super().chat_template(messages=messages),
        images=images,
        max_seq_length=max_seq_length,
    )
    win_labels = super().generation_labels(
        text=super().chat_template(messages=win_messages),
        max_gen_seq_length=max_gen_seq_length,
    )
    lose_labels = super().generation_labels(
        text=super().chat_template(messages=lose_messages),
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        pixel_values=inputs.pixel_values,
        image_grid_thw=inputs.image_grid_thw,
        win_input_ids=win_labels.input_ids,
        win_attention_mask=win_labels.attention_mask,
        lose_input_ids=lose_labels.input_ids,
        lose_attention_mask=lose_labels.attention_mask,
    )

_detokenize ¤

_detokenize(outputs: GenerationOutputs)

Source code in src/unitorch/cli/models/qwen/processing_vl.py

@register_process("core/postprocess/qwen_vl/detokenize")
def _detokenize(
    self,
    outputs: GenerationOutputs,
):
    results = outputs.to_pandas()
    assert results.shape[0] == 0 or results.shape[0] == outputs.sequences.shape[0]

    decoded = super().detokenize(sequences=outputs.sequences)
    cleanup_string = lambda text: re.sub(r"\n", " ", text)
    if isinstance(decoded[0], list):
        decoded = [list(map(cleanup_string, sequence)) for sequence in decoded]
    elif isinstance(decoded[0], str):
        decoded = list(map(cleanup_string, decoded))
    else:
        raise ValueError(
            f"Unsupported type for Qwen detokenize: {type(decoded[0])}"
        )
    results["decoded"] = decoded
    return WriterOutputs(results)

QWen3ForGeneration¤

Tip

core/model/generation/qwen3 is the section for configuration of QWen3ForGeneration.

Bases: QWen3ForGeneration

Qwen3 model for text generation.

Source code in src/unitorch/cli/models/qwen/modeling.py

def __init__(
    self,
    config_path: str,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__(
        config_path=config_path,
        gradient_checkpointing=gradient_checkpointing,
    )

from_config `classmethod` ¤

from_config(config, **kwargs)

Source code in src/unitorch/cli/models/qwen/modeling.py

@classmethod
@config_defaults_init("core/model/generation/qwen3")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/model/generation/qwen3")
    pretrained_name = config.getoption("pretrained_name", "qwen3-4b-thinking")
    pretrained_lora_name = config.getoption("pretrained_lora_name", None)
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "config"),
    )

    config_path = cached_path(config_path)
    gradient_checkpointing = config.getoption("gradient_checkpointing", False)

    inst = cls(config_path, gradient_checkpointing)
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "weight"),
        check_none=False,
    )

    if weight_path is not None:
        inst.from_pretrained(weight_path)

    pretrained_lora_weight_path = config.getoption(
        "pretrained_lora_weight_path", None
    )
    lora_weight_path = pop_value(
        pretrained_lora_weight_path,
        nested_dict_value(pretrained_qwen_extensions_infos, pretrained_lora_name),
        check_none=False,
    )
    pretrained_lora_weight = config.getoption("pretrained_lora_weight", 1.0)
    pretrained_lora_alpha = config.getoption("pretrained_lora_alpha", 32.0)
    if lora_weight_path is not None:
        inst.load_lora_weights(
            lora_weight_path,
            lora_weights=pretrained_lora_weight,
            lora_alphas=pretrained_lora_alpha,
            save_base_state=False,
        )

    return inst

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
)

Source code in src/unitorch/cli/models/qwen/modeling.py

@autocast(
    device_type=("cuda" if torch.cuda.is_available() else "cpu"),
    dtype=(torch.bfloat16 if is_bfloat16_available() else torch.float32),
)
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
):
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
    )
    return GenerationOutputs(sequences=outputs)

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Source code in src/unitorch/cli/models/qwen/modeling.py

@config_defaults_method("core/model/generation/qwen3")
@torch.no_grad()
@autocast(
    device_type=("cuda" if torch.cuda.is_available() else "cpu"),
    dtype=(torch.bfloat16 if is_bfloat16_available() else torch.float32),
)
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    outputs = super().generate(
        input_ids,
        num_beams=num_beams,
        decoder_start_token_id=decoder_start_token_id,
        decoder_end_token_id=decoder_end_token_id,
        decoder_pad_token_id=decoder_pad_token_id,
        num_return_sequences=num_return_sequences,
        min_gen_seq_length=min_gen_seq_length,
        max_gen_seq_length=max_gen_seq_length,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )

    return GenerationOutputs(
        sequences=outputs.sequences,
        sequences_scores=outputs.sequences_scores,
    )

QWen3VLForGeneration¤

Tip

core/model/generation/qwen3_vl is the section for configuration of QWen3VLForGeneration.

Bases: QWen3VLForGeneration

Qwen3 model for text generation.

Source code in src/unitorch/cli/models/qwen/modeling_vl.py

def __init__(
    self,
    config_path: str,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__(
        config_path=config_path,
        gradient_checkpointing=gradient_checkpointing,
    )

from_config `classmethod` ¤

from_config(config, **kwargs)

Source code in src/unitorch/cli/models/qwen/modeling_vl.py

@classmethod
@config_defaults_init("core/model/generation/qwen3_vl")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/model/generation/qwen3_vl")
    pretrained_name = config.getoption("pretrained_name", "qwen3-vl-8b-instruct")
    pretrained_lora_name = config.getoption("pretrained_lora_name", None)
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "config"),
    )

    config_path = cached_path(config_path)
    gradient_checkpointing = config.getoption("gradient_checkpointing", False)

    inst = cls(config_path, gradient_checkpointing)
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_qwen_infos, pretrained_name, "weight"),
        check_none=False,
    )

    if weight_path is not None:
        inst.from_pretrained(weight_path)

    pretrained_lora_weight_path = config.getoption(
        "pretrained_lora_weight_path", None
    )
    lora_weight_path = pop_value(
        pretrained_lora_weight_path,
        nested_dict_value(pretrained_qwen_extensions_infos, pretrained_lora_name),
        check_none=False,
    )
    pretrained_lora_weight = config.getoption("pretrained_lora_weight", 1.0)
    pretrained_lora_alpha = config.getoption("pretrained_lora_alpha", 32.0)
    if lora_weight_path is not None:
        inst.load_lora_weights(
            lora_weight_path,
            lora_weights=pretrained_lora_weight,
            lora_alphas=pretrained_lora_alpha,
            save_base_state=False,
        )

    return inst

forward ¤

forward(
    input_ids: Tensor,
    pixel_values: Optional[Tensor] = None,
    image_grid_thw: Optional[Tensor] = None,
    attention_mask: Optional[Tensor] = None,
)

Source code in src/unitorch/cli/models/qwen/modeling_vl.py

@autocast(
    device_type=("cuda" if torch.cuda.is_available() else "cpu"),
    dtype=(torch.bfloat16 if is_bfloat16_available() else torch.float32),
)
def forward(
    self,
    input_ids: torch.Tensor,
    pixel_values: Optional[torch.Tensor] = None,
    image_grid_thw: Optional[torch.Tensor] = None,
    attention_mask: Optional[torch.Tensor] = None,
):
    outputs = super().forward(
        input_ids=input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        attention_mask=attention_mask,
    )
    return GenerationOutputs(sequences=outputs)

generate ¤

generate(
    input_ids: Tensor,
    pixel_values: Tensor,
    image_grid_thw: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)

Source code in src/unitorch/cli/models/qwen/modeling_vl.py

@config_defaults_method("core/model/generation/qwen3_vl")
@torch.no_grad()
@autocast(
    device_type=("cuda" if torch.cuda.is_available() else "cpu"),
    dtype=(torch.bfloat16 if is_bfloat16_available() else torch.float32),
)
def generate(
    self,
    input_ids: torch.Tensor,
    pixel_values: torch.Tensor,
    image_grid_thw: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 151643,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 151645,
    decoder_pad_token_id: Optional[int] = 151643,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 512,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    outputs = super().generate(
        input_ids,
        pixel_values=pixel_values,
        image_grid_thw=image_grid_thw,
        num_beams=num_beams,
        decoder_start_token_id=decoder_start_token_id,
        decoder_end_token_id=decoder_end_token_id,
        decoder_pad_token_id=decoder_pad_token_id,
        num_return_sequences=num_return_sequences,
        min_gen_seq_length=min_gen_seq_length,
        max_gen_seq_length=max_gen_seq_length,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )

    return GenerationOutputs(
        sequences=outputs.sequences,
        sequences_scores=outputs.sequences_scores,
    )

unitorch.cli.models.qwen¤

QWenProcessor¤

from_config classmethod ¤

_chat_template ¤

_generation_inputs ¤

_generation_labels ¤

_generation ¤

_dpo_generation ¤

_messages_generation ¤

_messages_dpo_generation ¤

_messages_grpo_generation ¤

_detokenize ¤

QWenVLProcessor¤

from_config classmethod ¤

_chat_template ¤

_generation_inputs ¤

_generation_labels ¤

_generation ¤

_dpo_generation ¤

_messages_generation ¤

_messages_dpo_generation ¤

_detokenize ¤

QWen3ForGeneration¤

from_config classmethod ¤

forward ¤

generate ¤

QWen3VLForGeneration¤

from_config classmethod ¤

forward ¤

generate ¤

from_config `classmethod` ¤

from_config `classmethod` ¤

from_config `classmethod` ¤

from_config `classmethod` ¤