Skip to content

unitorch.cli.models.bart¤

BartProcessor¤

Tip

core/process/bart is the section for configuration of BartProcessor.

Bases: BartProcessor

Class for processing data with BART model.

Source code in src/unitorch/cli/models/bart/processing.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def __init__(
    self,
    vocab_path: str,
    merge_path: str,
    special_input_ids: Optional[Dict] = dict(),
    max_seq_length: Optional[int] = 128,
    max_gen_seq_length: Optional[int] = 48,
):
    super().__init__(
        vocab_path=vocab_path,
        merge_path=merge_path,
        special_input_ids=special_input_ids,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )

from_config classmethod ¤

from_config(config, **kwargs)
Source code in src/unitorch/cli/models/bart/processing.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@classmethod
@config_defaults_init("core/process/bart")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/process/bart")
    pretrained_name = config.getoption("pretrained_name", "bart-base")
    vocab_path = config.getoption("vocab_path", None)
    vocab_path = pop_value(
        vocab_path,
        nested_dict_value(pretrained_bart_infos, pretrained_name, "vocab"),
    )
    vocab_path = cached_path(vocab_path)

    merge_path = config.getoption("merge_path", None)
    merge_path = pop_value(
        merge_path,
        nested_dict_value(pretrained_bart_infos, pretrained_name, "merge"),
    )
    merge_path = cached_path(merge_path)

    return {
        "vocab_path": vocab_path,
        "merge_path": merge_path,
    }

_generation ¤

_generation(
    text: str,
    text_pair: Optional[str] = None,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
)
Source code in src/unitorch/cli/models/bart/processing.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@register_process("core/process/bart/generation")
def _generation(
    self,
    text: str,
    text_pair: Optional[str] = None,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
):
    outputs = super().generation(
        text=text,
        text_pair=text_pair,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )
    return TensorInputs(
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
        decoder_input_ids=outputs.input_ids_pair,
        decoder_attention_mask=outputs.attention_mask_pair,
    ), GenerationTargets(
        refs=outputs.input_ids_label,
        masks=outputs.attention_mask_label,
    )

_generation_inputs ¤

_generation_inputs(
    text: str, max_seq_length: Optional[int] = None
)
Source code in src/unitorch/cli/models/bart/processing.py
88
89
90
91
92
93
94
95
96
97
98
@register_process("core/process/bart/generation/inputs")
def _generation_inputs(
    self,
    text: str,
    max_seq_length: Optional[int] = None,
):
    outputs = super().generation_inputs(
        text=text,
        max_seq_length=max_seq_length,
    )
    return TensorInputs(input_ids=outputs.input_ids)

_generation_labels ¤

_generation_labels(
    text: str, max_gen_seq_length: Optional[int] = None
)
Source code in src/unitorch/cli/models/bart/processing.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
@register_process("core/process/bart/generation/labels")
def _generation_labels(
    self,
    text: str,
    max_gen_seq_length: Optional[int] = None,
):
    outputs = super().generation_labels(
        text=text,
        max_gen_seq_length=max_gen_seq_length,
    )
    return GenerationTargets(
        refs=outputs.input_ids,
        masks=outputs.attention_mask,
    )

_detokenize ¤

_detokenize(outputs: GenerationOutputs)
Source code in src/unitorch/cli/models/bart/processing.py
115
116
117
118
119
120
121
122
123
124
125
@register_process("core/postprocess/bart/detokenize")
def _detokenize(
    self,
    outputs: GenerationOutputs,
):
    results = outputs.to_pandas()
    assert results.shape[0] == 0 or results.shape[0] == outputs.sequences.shape[0]

    decoded = super().detokenize(sequences=outputs.sequences)
    results["decoded"] = decoded
    return WriterOutputs(results)

BartForGeneration¤

Tip

core/model/generation/bart is the section for configuration of BartForGeneration.

Bases: BartForGeneration

BART model for generation tasks.

Source code in src/unitorch/cli/models/bart/modeling.py
24
25
26
27
28
29
30
31
def __init__(
    self,
    config_path: str,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__(
        config_path=config_path, gradient_checkpointing=gradient_checkpointing
    )

from_config classmethod ¤

from_config(config, **kwargs)
Source code in src/unitorch/cli/models/bart/modeling.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@classmethod
@config_defaults_init("core/model/generation/bart")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/model/generation/bart")
    pretrained_name = config.getoption("pretrained_name", "default-bart")
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_bart_infos, pretrained_name, "config"),
    )

    config_path = cached_path(config_path)
    gradient_checkpointing = config.getoption("gradient_checkpointing", False)

    inst = cls(config_path, gradient_checkpointing)
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_bart_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if weight_path is not None:
        inst.from_pretrained(weight_path)

    return inst

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Tensor,
    decoder_input_ids: Tensor,
    decoder_attention_mask: Tensor,
)
Source code in src/unitorch/cli/models/bart/modeling.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor,
    decoder_input_ids: torch.Tensor,
    decoder_attention_mask: torch.Tensor,
):
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        decoder_input_ids=decoder_input_ids,
        decoder_attention_mask=decoder_attention_mask,
    )
    return GenerationOutputs(sequences=outputs)

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 2,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)
Source code in src/unitorch/cli/models/bart/modeling.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
@config_defaults_method("core/model/generation/bart")
@torch.no_grad()
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 2,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    outputs = super().generate(
        input_ids=input_ids,
        num_beams=num_beams,
        decoder_start_token_id=decoder_start_token_id,
        decoder_end_token_id=decoder_end_token_id,
        num_return_sequences=num_return_sequences,
        min_gen_seq_length=min_gen_seq_length,
        max_gen_seq_length=max_gen_seq_length,
        repetition_penalty=repetition_penalty,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )

    return GenerationOutputs(
        sequences=outputs.sequences,
        sequences_scores=outputs.sequences_scores,
    )