Skip to content

unitorch.models.roberta¤

RobertaProcessor¤

Bases: HfTextClassificationProcessor

Processor for Roberta text classification models.

Initializes the RobertaProcessor.

Parameters:

Name Type Description Default
vocab_path str

Path to the vocabulary file.

required
merge_path str

Path to the merge file.

required
max_seq_length int

Maximum sequence length. Defaults to 128.

128
source_type_id int

Source type ID. Defaults to 0.

0
target_type_id int

Target type ID. Defaults to 0.

0
Source code in src/unitorch/models/roberta/processing.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def __init__(
    self,
    vocab_path: str,
    merge_path: str,
    max_seq_length: Optional[int] = 128,
    source_type_id: Optional[int] = 0,
    target_type_id: Optional[int] = 0,
):
    """
    Initializes the RobertaProcessor.

    Args:
        vocab_path (str): Path to the vocabulary file.
        merge_path (str): Path to the merge file.
        max_seq_length (int, optional): Maximum sequence length. Defaults to 128.
        source_type_id (int, optional): Source type ID. Defaults to 0.
        target_type_id (int, optional): Target type ID. Defaults to 0.
    """
    tokenizer = get_roberta_tokenizer(vocab_path, merge_path)
    super().__init__(
        tokenizer=tokenizer,
        max_seq_length=max_seq_length,
        source_type_id=source_type_id,
        target_type_id=target_type_id,
        position_start_id=tokenizer.pad_token_id + 1,
    )

RobertaForClassification¤

Bases: GenericModel

Initializes the RobertaForClassification model.

Parameters:

Name Type Description Default
config_path str

Path to the Roberta configuration file.

required
num_classes int

Number of output classes. Defaults to 1.

1
gradient_checkpointing bool

Whether to use gradient checkpointing. Defaults to False.

False
Source code in src/unitorch/models/roberta/modeling.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(
    self,
    config_path: str,
    num_classes: Optional[int] = 1,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initializes the RobertaForClassification model.

    Args:
        config_path (str): Path to the Roberta configuration file.
        num_classes (int, optional): Number of output classes. Defaults to 1.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = RobertaConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.roberta = RobertaModel(self.config)
    self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
    self.classifier = nn.Linear(self.config.hidden_size, num_classes)
    self.init_weights()

config instance-attribute ¤

config = from_json_file(config_path)

roberta instance-attribute ¤

roberta = RobertaModel(config)

dropout instance-attribute ¤

dropout = Dropout(hidden_dropout_prob)

classifier instance-attribute ¤

classifier = Linear(hidden_size, num_classes)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    token_type_ids: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the RobertaForClassification model.

Parameters:

Name Type Description Default
input_ids Tensor

Input token IDs.

required
attention_mask Tensor

Attention mask. Defaults to None.

None
token_type_ids Tensor

Token type IDs. Defaults to None.

None
position_ids Tensor

Position IDs. Defaults to None.

None

Returns:

Type Description

torch.Tensor: Classification logits.

Source code in src/unitorch/models/roberta/modeling.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    token_type_ids: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the RobertaForClassification model.

    Args:
        input_ids (torch.Tensor): Input token IDs.
        attention_mask (torch.Tensor, optional): Attention mask. Defaults to None.
        token_type_ids (torch.Tensor, optional): Token type IDs. Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs. Defaults to None.

    Returns:
        torch.Tensor: Classification logits.
    """
    outputs = self.roberta(
        input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
    )
    pooled_output = self.dropout(outputs[1])
    return self.classifier(pooled_output)

RobertaForMaskLM¤

Bases: GenericModel

Initializes the RobertaForMaskLM model.

Parameters:

Name Type Description Default
config_path str

Path to the Roberta configuration file.

required
gradient_checkpointing bool

Whether to use gradient checkpointing. Defaults to False.

False
Source code in src/unitorch/models/roberta/modeling.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def __init__(
    self,
    config_path: str,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initializes the RobertaForMaskLM model.

    Args:
        config_path (str): Path to the Roberta configuration file.
        gradient_checkpointing (bool, optional): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = RobertaConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.roberta = RobertaModel(self.config, add_pooling_layer=False)
    self.lm_head = RobertaLMHead(self.config)
    self.lm_head.decoder.weight = self.roberta.embeddings.word_embeddings.weight
    self.init_weights()

config instance-attribute ¤

config = from_json_file(config_path)

roberta instance-attribute ¤

roberta = RobertaModel(config, add_pooling_layer=False)

lm_head instance-attribute ¤

lm_head = RobertaLMHead(config)

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
    token_type_ids: Optional[Tensor] = None,
    position_ids: Optional[Tensor] = None,
)

Forward pass of the RobertaForMaskLM model.

Parameters:

Name Type Description Default
input_ids Tensor

Input token IDs.

required
attention_mask Tensor

Attention mask. Defaults to None.

None
token_type_ids Tensor

Token type IDs. Defaults to None.

None
position_ids Tensor

Position IDs. Defaults to None.

None

Returns:

Type Description

torch.Tensor: MLM logits of shape [batch_size, seq_len, vocab_size].

Source code in src/unitorch/models/roberta/modeling.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    token_type_ids: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.Tensor] = None,
):
    """
    Forward pass of the RobertaForMaskLM model.

    Args:
        input_ids (torch.Tensor): Input token IDs.
        attention_mask (torch.Tensor, optional): Attention mask. Defaults to None.
        token_type_ids (torch.Tensor, optional): Token type IDs. Defaults to None.
        position_ids (torch.Tensor, optional): Position IDs. Defaults to None.

    Returns:
        torch.Tensor: MLM logits of shape [batch_size, seq_len, vocab_size].
    """
    outputs = self.roberta(
        input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
    )
    return self.lm_head(outputs[0])