Skip to content

unitorch.cli.models.visualbert¤

VisualBertProcessor¤

Tip

core/process/visualbert is the section for configuration of VisualBertProcessor.

Bases: VisualBertProcessor

VisualBERT Processor for text and image inputs.

Initialize the VisualBertProcessor.

Parameters:

Name Type Description Default
vocab_path str

The path to the vocabulary file.

required
max_seq_length Optional[int]

The maximum sequence length. Defaults to 128.

128
special_input_ids Optional[Dict]

A dictionary containing special input IDs. Defaults to an empty dictionary.

dict()
do_lower_case Optional[bool]

Whether to convert the text to lowercase. Defaults to True.

True
do_basic_tokenize Optional[bool]

Whether to perform basic tokenization. Defaults to True.

True
do_whole_word_mask Optional[bool]

Whether to use whole-word masking. Defaults to True.

True
masked_lm_prob Optional[float]

The probability of masked language model masking. Defaults to 0.15.

0.15
max_predictions_per_seq Optional[int]

The maximum number of masked language model predictions per sequence. Defaults to 20.

20
Source code in src/unitorch/cli/models/visualbert/processing.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def __init__(
    self,
    vocab_path,
    max_seq_length: Optional[int] = 128,
    special_input_ids: Optional[Dict] = dict(),
    do_lower_case: Optional[bool] = True,
    do_basic_tokenize: Optional[bool] = True,
    do_whole_word_mask: Optional[bool] = True,
    masked_lm_prob: Optional[float] = 0.15,
    max_predictions_per_seq: Optional[int] = 20,
):
    """
    Initialize the VisualBertProcessor.

    Args:
        vocab_path (str): The path to the vocabulary file.
        max_seq_length (Optional[int]): The maximum sequence length. Defaults to 128.
        special_input_ids (Optional[Dict]): A dictionary containing special input IDs. Defaults to an empty dictionary.
        do_lower_case (Optional[bool]): Whether to convert the text to lowercase. Defaults to True.
        do_basic_tokenize (Optional[bool]): Whether to perform basic tokenization. Defaults to True.
        do_whole_word_mask (Optional[bool]): Whether to use whole-word masking. Defaults to True.
        masked_lm_prob (Optional[float]): The probability of masked language model masking. Defaults to 0.15.
        max_predictions_per_seq (Optional[int]): The maximum number of masked language model predictions per sequence. Defaults to 20.
    """
    super().__init__(
        vocab_path=vocab_path,
        max_seq_length=max_seq_length,
        special_input_ids=special_input_ids,
        do_lower_case=do_lower_case,
        do_basic_tokenize=do_basic_tokenize,
        do_whole_word_mask=do_whole_word_mask,
        masked_lm_prob=masked_lm_prob,
        max_predictions_per_seq=max_predictions_per_seq,
    )

from_core_configure classmethod ¤

from_core_configure(config, **kwargs)

Create an instance of VisualBertProcessor from a core configuration.

Parameters:

Name Type Description Default
config

The core configuration.

required
**kwargs

Additional keyword arguments.

{}

Returns:

Name Type Description
dict

A dictionary containing the processor's configuration.

Source code in src/unitorch/cli/models/visualbert/processing.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@classmethod
@add_default_section_for_init("core/process/visualbert")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of VisualBertProcessor from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        dict: A dictionary containing the processor's configuration.
    """
    config.set_default_section("core/process/visualbert")
    pretrained_name = config.getoption("pretrained_name", "visualbert-vqa-coco-pre")
    vocab_path = config.getoption("vocab_path", None)
    vocab_path = pop_value(
        vocab_path,
        nested_dict_value(pretrained_visualbert_infos, pretrained_name, "vocab"),
    )
    vocab_path = cached_path(vocab_path)

    return {
        "vocab_path": vocab_path,
    }

VisualBertForClassification¤

Tip

core/model/classification/visualbert is the section for configuration of VisualBertForClassification.

Bases: VisualBertForClassification

VisualBERT for Classification model.

Initialize VisualBertForClassification.

Parameters:

Name Type Description Default
config_path str

The path to the model's configuration file.

required
num_classes Optional[int]

The number of classes for classification. Defaults to 1.

1
gradient_checkpointing Optional[bool]

Whether to use gradient checkpointing to save memory during training. Defaults to False.

False
Source code in src/unitorch/cli/models/visualbert/modeling.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    config_path: str,
    num_classes: Optional[int] = 1,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initialize VisualBertForClassification.

    Args:
        config_path (str): The path to the model's configuration file.
        num_classes (Optional[int]): The number of classes for classification.
            Defaults to 1.
        gradient_checkpointing (Optional[bool]): Whether to use gradient checkpointing
            to save memory during training. Defaults to False.
    """
    super().__init__(
        config_path=config_path,
        num_classes=num_classes,
        gradient_checkpointing=gradient_checkpointing,
    )

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Tensor,
    token_type_ids: Tensor,
    position_ids: Tensor,
    visual_embeds: Tensor,
    visual_attention_mask: Tensor,
    visual_token_type_ids: Tensor,
)

Forward pass of the VisualBertForClassification model.

Parameters:

Name Type Description Default
input_ids Tensor

The input token IDs.

required
attention_mask Tensor

The attention mask.

required
token_type_ids Tensor

The token type IDs.

required
position_ids Tensor

The position IDs.

required
visual_embeds Tensor

The visual embeddings.

required
visual_attention_mask Tensor

The visual attention mask.

required
visual_token_type_ids Tensor

The visual token type IDs.

required

Returns:

Name Type Description
ClassificationOutputs

The output logits of the model.

Source code in src/unitorch/cli/models/visualbert/modeling.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor,
    token_type_ids: torch.Tensor,
    position_ids: torch.Tensor,
    visual_embeds: torch.Tensor,
    visual_attention_mask: torch.Tensor,
    visual_token_type_ids: torch.Tensor,
):
    """
    Forward pass of the VisualBertForClassification model.

    Args:
        input_ids (torch.Tensor): The input token IDs.
        attention_mask (torch.Tensor): The attention mask.
        token_type_ids (torch.Tensor): The token type IDs.
        position_ids (torch.Tensor): The position IDs.
        visual_embeds (torch.Tensor): The visual embeddings.
        visual_attention_mask (torch.Tensor): The visual attention mask.
        visual_token_type_ids (torch.Tensor): The visual token type IDs.

    Returns:
        ClassificationOutputs: The output logits of the model.
    """
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        visual_embeds=visual_embeds,
        visual_attention_mask=visual_attention_mask,
        visual_token_type_ids=visual_token_type_ids,
    )
    return ClassificationOutputs(outputs=outputs)

from_core_configure classmethod ¤

from_core_configure(config, **kwargs)

Create an instance of VisualBertForClassification from a core configuration.

Parameters:

Name Type Description Default
config

The core configuration.

required
**kwargs

Additional keyword arguments.

{}

Returns:

Name Type Description
VisualBertForClassification

The initialized VisualBertForClassification instance.

Source code in src/unitorch/cli/models/visualbert/modeling.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@classmethod
@add_default_section_for_init("core/model/classification/visualbert")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of VisualBertForClassification from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        VisualBertForClassification: The initialized VisualBertForClassification instance.
    """
    config.set_default_section("core/model/classification/visualbert")
    pretrained_name = config.getoption("pretrained_name", "visualbert-vqa-coco-pre")
    config_path = config.getoption("config_path", None)

    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_visualbert_infos, pretrained_name, "config"),
    )

    config_path = cached_path(config_path)
    gradient_checkpointing = config.getoption("gradient_checkpointing", False)
    num_classes = config.getoption("num_classes", 1)

    inst = cls(config_path, num_classes, gradient_checkpointing)
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_visualbert_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if weight_path is not None:
        inst.from_pretrained(weight_path)

    return inst

VisualBertForPretrain¤

Tip

core/model/pretrain/visualbert is the section for configuration of VisualBertForPretrain.

Bases: VisualBertForPretrain

VisualBERT for Pretraining model.

Initialize VisualBertForPretrain.

Parameters:

Name Type Description Default
config_path str

The path to the model's configuration file.

required
gradient_checkpointing Optional[bool]

Whether to use gradient checkpointing to save memory during training. Defaults to False.

False
Source code in src/unitorch/cli/models/visualbert/modeling.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def __init__(
    self,
    config_path: str,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initialize VisualBertForPretrain.

    Args:
        config_path (str): The path to the model's configuration file.
        gradient_checkpointing (Optional[bool]): Whether to use gradient checkpointing
            to save memory during training. Defaults to False.
    """
    super().__init__(
        config_path=config_path,
        gradient_checkpointing=gradient_checkpointing,
    )

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Tensor,
    token_type_ids: Tensor,
    position_ids: Tensor,
    visual_embeds: Tensor,
    visual_attention_mask: Tensor,
    visual_token_type_ids: Tensor,
    nsp_label: Tensor,
    mlm_label: Tensor,
    mlm_label_mask: Tensor,
)

Forward pass of the VisualBertForPretrain model.

Parameters:

Name Type Description Default
input_ids Tensor

The input token IDs.

required
attention_mask Tensor

The attention mask.

required
token_type_ids Tensor

The token type IDs.

required
position_ids Tensor

The position IDs.

required
visual_embeds Tensor

The visual embeddings.

required
visual_attention_mask Tensor

The visual attention mask.

required
visual_token_type_ids Tensor

The visual token type IDs.

required
nsp_label Tensor

The next sentence prediction label.

required
mlm_label Tensor

The masked language model label.

required
mlm_label_mask Tensor

The masked language model label mask.

required

Returns:

Name Type Description
LossOutputs

The output loss of the model.

Source code in src/unitorch/cli/models/visualbert/modeling.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: torch.Tensor,
    token_type_ids: torch.Tensor,
    position_ids: torch.Tensor,
    visual_embeds: torch.Tensor,
    visual_attention_mask: torch.Tensor,
    visual_token_type_ids: torch.Tensor,
    nsp_label: torch.Tensor,
    mlm_label: torch.Tensor,
    mlm_label_mask: torch.Tensor,
):
    """
    Forward pass of the VisualBertForPretrain model.

    Args:
        input_ids (torch.Tensor): The input token IDs.
        attention_mask (torch.Tensor): The attention mask.
        token_type_ids (torch.Tensor): The token type IDs.
        position_ids (torch.Tensor): The position IDs.
        visual_embeds (torch.Tensor): The visual embeddings.
        visual_attention_mask (torch.Tensor): The visual attention mask.
        visual_token_type_ids (torch.Tensor): The visual token type IDs.
        nsp_label (torch.Tensor): The next sentence prediction label.
        mlm_label (torch.Tensor): The masked language model label.
        mlm_label_mask (torch.Tensor): The masked language model label mask.

    Returns:
        LossOutputs: The output loss of the model.
    """
    outputs = super().forward(
        input_ids=input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        visual_embeds=visual_embeds,
        visual_attention_mask=visual_attention_mask,
        visual_token_type_ids=visual_token_type_ids,
        nsp_label=nsp_label,
        mlm_label=mlm_label,
        mlm_label_mask=mlm_label_mask,
    )
    return LossOutputs(loss=outputs)

from_core_configure classmethod ¤

from_core_configure(config, **kwargs)

Create an instance of VisualBertForPretrain from a core configuration.

Parameters:

Name Type Description Default
config

The core configuration.

required
**kwargs

Additional keyword arguments.

{}

Returns:

Name Type Description
VisualBertForPretrain

The initialized VisualBertForPretrain instance.

Source code in src/unitorch/cli/models/visualbert/modeling.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
@classmethod
@add_default_section_for_init("core/model/pretrain/visualbert")
def from_core_configure(cls, config, **kwargs):
    """
    Create an instance of VisualBertForPretrain from a core configuration.

    Args:
        config: The core configuration.
        **kwargs: Additional keyword arguments.

    Returns:
        VisualBertForPretrain: The initialized VisualBertForPretrain instance.
    """
    config.set_default_section("core/model/pretrain/visualbert")
    pretrained_name = config.getoption("pretrained_name", "visualbert-vqa-coco-pre")
    config_path = config.getoption("config_path", None)

    config_path = pop_value(
        config_path,
        nested_dict_value(pretrained_visualbert_infos, pretrained_name, "config"),
    )

    config_path = cached_path(config_path)
    gradient_checkpointing = config.getoption("gradient_checkpointing", False)

    inst = cls(config_path, gradient_checkpointing)
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(pretrained_visualbert_infos, pretrained_name, "weight"),
        check_none=False,
    )
    if weight_path is not None:
        inst.from_pretrained(weight_path)

    return inst