Skip to content

unitorch.models.mbart¤

MBartProcessor¤

Bases: HfTextGenerationProcessor

Source code in src/unitorch/models/mbart/processing.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def __init__(
    self,
    vocab_path: str,
    max_seq_length: Optional[int] = 128,
    max_gen_seq_length: Optional[int] = 48,
    special_input_ids: Optional[Dict] = {},
):
    tokenizer = get_mbart_tokenizer(
        vocab_path,
        special_input_ids=special_input_ids,
    )
    super().__init__(
        tokenizer=tokenizer,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )

MBartForGeneration¤

Bases: GenericModel

Source code in src/unitorch/models/mbart/modeling.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(
    self,
    config_path: str,
    freeze_input_embedding: Optional[bool] = True,
    gradient_checkpointing: Optional[bool] = False,
):
    super().__init__()
    self.config = MBartConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.model = MBartForConditionalGeneration(self.config)

    if freeze_input_embedding:
        for param in self.model.get_input_embeddings().parameters():
            param.requires_grad = False

    self.init_weights()

prefix_keys_in_state_dict class-attribute instance-attribute ¤

prefix_keys_in_state_dict = {
    "^(?!model\\.model\\.|model\\.lm_head\\.)model\\.": "model.",
    "^lm_head.": "model.",
}

config instance-attribute ¤

config = from_json_file(config_path)

model instance-attribute ¤

model = MBartForConditionalGeneration(config)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Tensor,
    decoder_input_ids: Tensor,
    decoder_attention_mask: Tensor,
)
Source code in src/unitorch/models/mbart/modeling.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor,
    decoder_input_ids: torch.Tensor,
    decoder_attention_mask: torch.Tensor,
):
    outputs = self.model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        decoder_input_ids=decoder_input_ids,
        decoder_attention_mask=decoder_attention_mask,
        return_dict=True,
    )
    logits = outputs.logits
    return logits

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 2,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
)
Source code in src/unitorch/models/mbart/modeling.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 2,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
):
    outputs = self.model.generate(
        input_ids,
        max_length=max_gen_seq_length,
        min_length=min_gen_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = (
        torch.zeros(sequences.size(0), num_return_sequences, max_gen_seq_length).to(
            device=sequences.device
        )
        + decoder_start_token_id
    )
    outputs.sequences[:, :, : sequences.size(-1)].copy_(sequences)

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences,
        sequences_scores=outputs.sequences_scores,
    )