unitorch.models.bloom¤

BloomProcessor¤

Bases: HfTextClassificationProcessor, HfTextGenerationProcessor

Processor for the Bloom model that combines text classification and text generation functionality.

Initializes a new instance of the BloomProcessor.

Parameters:

Name	Type	Description	Default
`tokenizer_file`	`str`	The path to the tokenizer file.	required
`max_seq_length`	`Optional[int]`	The maximum sequence length for classification. Defaults to 128.	`128`
`max_gen_seq_length`	`Optional[int]`	The maximum sequence length for generation. Defaults to 48.	`48`

Source code in src/unitorch/models/bloom/processing.py

def __init__(
    self,
    tokenizer_file: str,
    max_seq_length: Optional[int] = 128,
    max_gen_seq_length: Optional[int] = 48,
):
    """
    Initializes a new instance of the BloomProcessor.

    Args:
        tokenizer_file (str): The path to the tokenizer file.
        max_seq_length (Optional[int]): The maximum sequence length for classification. Defaults to 128.
        max_gen_seq_length (Optional[int]): The maximum sequence length for generation. Defaults to 48.
    """
    tokenizer = BloomTokenizerFast(tokenizer_file=tokenizer_file)
    tokenizer.cls_token = tokenizer.bos_token
    tokenizer.sep_token = tokenizer.eos_token
    tokenizer.cls_token_id = tokenizer.bos_token_id
    tokenizer.sep_token_id = tokenizer.eos_token_id
    HfTextClassificationProcessor.__init__(
        self,
        tokenizer=tokenizer,
        max_seq_length=max_seq_length,
    )
    HfTextGenerationProcessor.__init__(
        self,
        tokenizer=tokenizer,
        max_seq_length=max_seq_length,
        max_gen_seq_length=max_gen_seq_length,
    )

classification ¤

classification(
    text: str,
    text_pair: Optional[str] = None,
    max_seq_length: Optional[int] = None,
) -> GenericOutputs

Preprocesses text for classification.

Parameters:

Name	Type	Description	Default
`text`	`str`	The input text to classify.	required
`text_pair`	`Optional[str]`	The second input text for sequence classification. Defaults to None.	`None`
`max_seq_length`	`Optional[int]`	The maximum sequence length. Defaults to None.	`None`

Returns:

Name	Type	Description
`GenericOutputs`	`GenericOutputs`	The processed input IDs and attention mask tensors.

Source code in src/unitorch/models/bloom/processing.py

def classification(
    self,
    text: str,
    text_pair: Optional[str] = None,
    max_seq_length: Optional[int] = None,
) -> GenericOutputs:
    """
    Preprocesses text for classification.

    Args:
        text (str): The input text to classify.
        text_pair (Optional[str]): The second input text for sequence classification. Defaults to None.
        max_seq_length (Optional[int]): The maximum sequence length. Defaults to None.

    Returns:
        GenericOutputs: The processed input IDs and attention mask tensors.
    """
    max_seq_length = pop_value(
        max_seq_length,
        self.max_seq_length,
    )

    tokens = self.tokenizer.tokenize(str(text))
    if text_pair is None:
        tokens = tokens[:max_seq_length]
        input_ids = self.tokenizer.convert_tokens_to_ids(tokens)
    else:
        tokens_pair = self.tokenizer.tokenize(str(text_pair))
        truncate_sequence_pair(tokens, tokens_pair, max_seq_length)
        tokens = tokens + tokens_pair
        input_ids = self.tokenizer.convert_tokens_to_ids(tokens)

    padding = [0] * (max_seq_length - len(input_ids))
    attention_mask = [0] * len(padding) + [1] * len(input_ids)
    input_ids = len(padding) * [self.pad_token_id] + input_ids

    assert len(input_ids) == max_seq_length
    assert len(attention_mask) == max_seq_length
    return GenericOutputs(
        input_ids=torch.tensor(input_ids, dtype=torch.long),
        attention_mask=torch.tensor(attention_mask, dtype=torch.long),
    )

generation ¤

generation(
    text: str,
    text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
) -> GenericOutputs

Preprocesses text for generation.

Parameters:

Name	Type	Description	Default
`text`	`str`	The input text for generation.	required
`text_pair`	`str`	The second input text for generation.	required
`max_seq_length`	`Optional[int]`	The maximum sequence length for classification. Defaults to None.	`None`
`max_gen_seq_length`	`Optional[int]`	The maximum generation sequence length. Defaults to None.	`None`

Returns:

Name	Type	Description
`GenericOutputs`	`GenericOutputs`	The processed input IDs and attention mask tensors.

Source code in src/unitorch/models/bloom/processing.py

def generation(
    self,
    text: str,
    text_pair: str,
    max_seq_length: Optional[int] = None,
    max_gen_seq_length: Optional[int] = None,
) -> GenericOutputs:
    """
    Preprocesses text for generation.

    Args:
        text (str): The input text for generation.
        text_pair (str): The second input text for generation.
        max_seq_length (Optional[int]): The maximum sequence length for classification. Defaults to None.
        max_gen_seq_length (Optional[int]): The maximum generation sequence length. Defaults to None.

    Returns:
        GenericOutputs: The processed input IDs and attention mask tensors.
    """
    max_seq_length = pop_value(
        max_seq_length,
        self.max_seq_length,
    )
    max_gen_seq_length = pop_value(
        max_gen_seq_length,
        self.max_gen_seq_length,
    )

    tokens = self.tokenizer.tokenize(str(text))[-max_seq_length:]
    tokens_pair = self.tokenizer.tokenize(str(text_pair))[
        : max_gen_seq_length - 1
    ] + [self.eos_token]
    padding_a = [self.pad_token] * (max_seq_length - len(tokens))
    padding_b = [self.pad_token] * (max_gen_seq_length - len(tokens_pair))
    attention_mask = (
        [0] * len(padding_a)
        + [1] * (len(tokens) + len(tokens_pair))
        + [0] * len(padding_b)
    )
    _tokens = padding_a + tokens + tokens_pair + padding_b
    input_ids = self.tokenizer.convert_tokens_to_ids(_tokens)

    tokens_label = tokens_pair + [self.pad_token] * (
        max_gen_seq_length - len(tokens_pair) + 1
    )
    input_ids_label = self.tokenizer.convert_tokens_to_ids(tokens_label)
    input_ids_label = [0] * (max_seq_length - 1) + input_ids_label
    attention_mask_label = [1] * len(tokens_pair) + [0] * (
        max_gen_seq_length - len(tokens_pair) + 1
    )
    attention_mask_label = [0] * (max_seq_length - 1) + attention_mask_label

    return GenericOutputs(
        input_ids=torch.tensor(input_ids, dtype=torch.long),
        attention_mask=torch.tensor(attention_mask, dtype=torch.long),
        input_ids_label=torch.tensor(input_ids_label, dtype=torch.long),
        attention_mask_label=torch.tensor(attention_mask_label, dtype=torch.long),
    )

generation_inputs ¤

generation_inputs(
    text: str, max_seq_length: Optional[int] = None
) -> GenericOutputs

Preprocesses text as generation inputs.

Parameters:

Name	Type	Description	Default
`text`	`str`	The input text for generation.	required
`max_seq_length`	`Optional[int]`	The maximum sequence length. Defaults to None.	`None`

Returns:

Name	Type	Description
`GenericOutputs`	`GenericOutputs`	The processed input IDs tensor.

Source code in src/unitorch/models/bloom/processing.py

def generation_inputs(
    self,
    text: str,
    max_seq_length: Optional[int] = None,
) -> GenericOutputs:
    """
    Preprocesses text as generation inputs.

    Args:
        text (str): The input text for generation.
        max_seq_length (Optional[int]): The maximum sequence length. Defaults to None.

    Returns:
        GenericOutputs: The processed input IDs tensor.
    """
    max_seq_length = pop_value(
        max_seq_length,
        self.max_seq_length,
    )
    tokens = self.tokenizer.tokenize(str(text))[-max_seq_length:]
    padding = [self.pad_token] * (max_seq_length - len(tokens))
    tokens = padding + tokens
    input_ids = self.tokenizer.convert_tokens_to_ids(tokens)

    assert len(input_ids) == max_seq_length
    return GenericOutputs(
        input_ids=torch.tensor(input_ids, dtype=torch.long),
    )

generation_labels ¤

generation_labels(
    text: str, max_gen_seq_length: Optional[int] = None
) -> GenericOutputs

Preprocesses text as generation labels.

Parameters:

Name	Type	Description	Default
`text`	`str`	The input text for generation labels.	required
`max_gen_seq_length`	`Optional[int]`	The maximum generation sequence length. Defaults to None.	`None`

Returns:

Name	Type	Description
`GenericOutputs`	`GenericOutputs`	The processed input IDs and attention mask tensors.

Source code in src/unitorch/models/bloom/processing.py

def generation_labels(
    self,
    text: str,
    max_gen_seq_length: Optional[int] = None,
) -> GenericOutputs:
    """
    Preprocesses text as generation labels.

    Args:
        text (str): The input text for generation labels.
        max_gen_seq_length (Optional[int]): The maximum generation sequence length. Defaults to None.

    Returns:
        GenericOutputs: The processed input IDs and attention mask tensors.
    """
    max_gen_seq_length = pop_value(
        max_gen_seq_length,
        self.max_gen_seq_length,
    )
    tokens = self.tokenizer.tokenize(str(text))[: max_gen_seq_length - 1] + [
        self.eos_token
    ]
    padding = [self.pad_token] * (max_gen_seq_length - len(tokens))
    input_ids = self.tokenizer.convert_tokens_to_ids(tokens)
    attention_mask = [1] * len(input_ids)

    padding = [0] * (max_gen_seq_length - len(input_ids))
    input_ids += [self.pad_token_id] * len(padding)
    attention_mask += padding

    assert len(input_ids) == max_gen_seq_length
    assert len(attention_mask) == max_gen_seq_length
    return GenericOutputs(
        input_ids=torch.tensor(input_ids, dtype=torch.long),
        attention_mask=torch.tensor(attention_mask, dtype=torch.long),
    )

instruction_generation_inputs ¤

instruction_generation_inputs(
    instruction: str,
    input: str,
    max_seq_length: Optional[int] = None,
) -> GenericOutputs

Preprocesses text as generation inputs.

Parameters:

Name	Type	Description	Default
`text`	`str`	The input text for generation.	required
`max_seq_length`	`Optional[int]`	The maximum sequence length. Defaults to None.	`None`

Returns:

Name	Type	Description
`GenericOutputs`	`GenericOutputs`	The processed input IDs tensor.

Source code in src/unitorch/models/bloom/processing.py

def instruction_generation_inputs(
    self,
    instruction: str,
    input: str,
    max_seq_length: Optional[int] = None,
) -> GenericOutputs:
    """
    Preprocesses text as generation inputs.

    Args:
        text (str): The input text for generation.
        max_seq_length (Optional[int]): The maximum sequence length. Defaults to None.

    Returns:
        GenericOutputs: The processed input IDs tensor.
    """
    max_seq_length = pop_value(
        max_seq_length,
        self.max_seq_length,
    )
    tokens = self._instrution_tokenize(instruction, input, max_seq_length)
    padding = [self.pad_token] * (max_seq_length - len(tokens))
    tokens = padding + tokens
    input_ids = self.tokenizer.convert_tokens_to_ids(tokens)

    assert len(input_ids) == max_seq_length
    assert len(input_ids) == max_seq_length
    return GenericOutputs(
        input_ids=torch.tensor(input_ids, dtype=torch.long),
    )

BloomForClassification¤

Bases: GenericModel, PeftWeightLoaderMixin

A classification model based on the Bloom architecture.

Initializes a new instance of the BloomForClassification model.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	The path to the configuration file for the Bloom model.	required
`num_classes`	`Optional[int]`	The number of output classes for classification. Defaults to 1.	`1`
`hidden_dropout_prob`	`Optional[float]`	The dropout probability for the hidden layers. Defaults to 0.1.	`0.1`
`gradient_checkpointing`	`Optional[bool]`	Whether to use gradient checkpointing. Defaults to False.	`False`

Source code in src/unitorch/models/bloom/modeling.py

def __init__(
    self,
    config_path: str,
    num_classes: Optional[int] = 1,
    hidden_dropout_prob: Optional[float] = 0.1,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initializes a new instance of the BloomForClassification model.

    Args:
        config_path (str): The path to the configuration file for the Bloom model.
        num_classes (Optional[int]): The number of output classes for classification. Defaults to 1.
        hidden_dropout_prob (Optional[float]): The dropout probability for the hidden layers. Defaults to 0.1.
        gradient_checkpointing (Optional[bool]): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = BloomConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.transformer = BloomModel(self.config)
    self.dropout = nn.Dropout(hidden_dropout_prob)
    self.classifier = nn.Linear(self.config.hidden_size, num_classes)
    self.init_weights()

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
) -> Tensor

Forward pass of the BloomForClassification model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input token IDs.	required
`attention_mask`	`Optional[Tensor]`	The attention mask tensor. Defaults to None.	`None`

Returns:

Type	Description
`Tensor`	The output logits for classification.

Source code in src/unitorch/models/bloom/modeling.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Forward pass of the BloomForClassification model.

    Args:
        input_ids (torch.Tensor): The input token IDs.
        attention_mask (Optional[torch.Tensor]): The attention mask tensor. Defaults to None.

    Returns:
        (torch.Tensor):The output logits for classification.
    """
    outputs = self.transformer(
        input_ids,
        attention_mask=attention_mask,
    )[0]
    pooled_output = outputs[:, -1]
    pooled_output = self.dropout(pooled_output)
    logits = self.classifier(pooled_output)
    return logits

BloomForGeneration¤

Bases: GenericModel, PeftWeightLoaderMixin

A generation model based on the Bloom architecture.

Initializes a new instance of the BloomForGeneration model.

Parameters:

Name	Type	Description	Default
`config_path`	`str`	The path to the configuration file for the Bloom model.	required
`gradient_checkpointing`	`Optional[bool]`	Whether to use gradient checkpointing. Defaults to False.	`False`

Source code in src/unitorch/models/bloom/modeling.py

def __init__(
    self,
    config_path: str,
    gradient_checkpointing: Optional[bool] = False,
):
    """
    Initializes a new instance of the BloomForGeneration model.

    Args:
        config_path (str): The path to the configuration file for the Bloom model.
        gradient_checkpointing (Optional[bool]): Whether to use gradient checkpointing. Defaults to False.
    """
    super().__init__()
    self.config = BloomConfig.from_json_file(config_path)
    self.config.gradient_checkpointing = gradient_checkpointing
    self.model = BloomForCausalLM(self.config)
    self.init_weights()

forward ¤

forward(
    input_ids: Tensor,
    attention_mask: Optional[Tensor] = None,
) -> Tensor

Forward pass of the BloomForGeneration model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input token IDs.	required
`attention_mask`	`Optional[Tensor]`	The attention mask tensor. Defaults to None.	`None`

Returns:

Type	Description
`Tensor`	The output logits.

Source code in src/unitorch/models/bloom/modeling.py

def forward(
    self,
    input_ids: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Forward pass of the BloomForGeneration model.

    Args:
        input_ids (torch.Tensor): The input token IDs.
        attention_mask (Optional[torch.Tensor]): The attention mask tensor. Defaults to None.

    Returns:
        (torch.Tensor):The output logits.
    """
    outputs = self.model(
        input_ids,
        attention_mask=attention_mask,
        return_dict=True,
    )
    logits = outputs.logits
    return logits

generate ¤

generate(
    input_ids: Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[
        Union[int, List[int]]
    ] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
) -> GenericOutputs

Generate sequences using the BloomForGeneration model.

Parameters:

Name	Type	Description	Default
`input_ids`	`Tensor`	The input token IDs.	required
`num_beams`	`Optional[int]`	The number of beams for beam search. Defaults to 5.	`5`
`decoder_start_token_id`	`Optional[int]`	The ID of the start token for decoding. Defaults to 1.	`1`
`decoder_end_token_id`	`Optional[int]`	The ID of the end token for decoding. Defaults to 2.	`2`
`num_return_sequences`	`Optional[int]`	The number of generated sequences to return. Defaults to 1.	`1`
`min_gen_seq_length`	`Optional[int]`	The minimum length of the generated sequences. Defaults to 0.	`0`
`max_gen_seq_length`	`Optional[int]`	The maximum length of the generated sequences. Defaults to 48.	`48`
`repetition_penalty`	`Optional[float]`	The penalty for repeated n-grams. Defaults to 1.0.	`1.0`
`no_repeat_ngram_size`	`Optional[int]`	The size of n-grams to prevent repetition. Defaults to 0.	`0`
`early_stopping`	`Optional[bool]`	Whether to stop generation early based on specified conditions. Defaults to True.	`True`
`length_penalty`	`Optional[float]`	The penalty for longer sequences. Defaults to 1.0.	`1.0`
`num_beam_groups`	`Optional[int]`	The number of beam groups for diverse beam search. Defaults to 1.	`1`
`diversity_penalty`	`Optional[float]`	The penalty for diverse beam search. Defaults to 0.0.	`0.0`
`do_sample`	`Optional[bool]`	Whether to use sampling for generation. Defaults to False.	`False`
`temperature`	`Optional[float]`	The temperature for sampling. Defaults to 1.0.	`1.0`
`top_k`	`Optional[int]`	The number of top-k tokens to consider for sampling. Defaults to 50.	`50`
`top_p`	`Optional[float]`	The cumulative probability for top-p sampling. Defaults to 1.0.	`1.0`

Returns:

Name	Type	Description
`GenericOutputs`	`GenericOutputs`	The generated sequences and their scores.

Source code in src/unitorch/models/bloom/modeling.py

@torch.no_grad()
def generate(
    self,
    input_ids: torch.Tensor,
    num_beams: Optional[int] = 5,
    decoder_start_token_id: Optional[int] = 1,
    decoder_end_token_id: Optional[Union[int, List[int]]] = 2,
    num_return_sequences: Optional[int] = 1,
    min_gen_seq_length: Optional[int] = 0,
    max_gen_seq_length: Optional[int] = 48,
    repetition_penalty: Optional[float] = 1.0,
    no_repeat_ngram_size: Optional[int] = 0,
    early_stopping: Optional[bool] = True,
    length_penalty: Optional[float] = 1.0,
    num_beam_groups: Optional[int] = 1,
    diversity_penalty: Optional[float] = 0.0,
    do_sample: Optional[bool] = False,
    temperature: Optional[float] = 1.0,
    top_k: Optional[int] = 50,
    top_p: Optional[float] = 1.0,
) -> GenericOutputs:
    """
    Generate sequences using the BloomForGeneration model.

    Args:
        input_ids (torch.Tensor): The input token IDs.
        num_beams (Optional[int]): The number of beams for beam search. Defaults to 5.
        decoder_start_token_id (Optional[int]): The ID of the start token for decoding. Defaults to 1.
        decoder_end_token_id (Optional[int]): The ID of the end token for decoding. Defaults to 2.
        num_return_sequences (Optional[int]): The number of generated sequences to return. Defaults to 1.
        min_gen_seq_length (Optional[int]): The minimum length of the generated sequences. Defaults to 0.
        max_gen_seq_length (Optional[int]): The maximum length of the generated sequences. Defaults to 48.
        repetition_penalty (Optional[float]): The penalty for repeated n-grams. Defaults to 1.0.
        no_repeat_ngram_size (Optional[int]): The size of n-grams to prevent repetition. Defaults to 0.
        early_stopping (Optional[bool]): Whether to stop generation early based on specified conditions. Defaults to True.
        length_penalty (Optional[float]): The penalty for longer sequences. Defaults to 1.0.
        num_beam_groups (Optional[int]): The number of beam groups for diverse beam search. Defaults to 1.
        diversity_penalty (Optional[float]): The penalty for diverse beam search. Defaults to 0.0.
        do_sample (Optional[bool]): Whether to use sampling for generation. Defaults to False.
        temperature (Optional[float]): The temperature for sampling. Defaults to 1.0.
        top_k (Optional[int]): The number of top-k tokens to consider for sampling. Defaults to 50.
        top_p (Optional[float]): The cumulative probability for top-p sampling. Defaults to 1.0.

    Returns:
        GenericOutputs: The generated sequences and their scores.
    """
    input_seq_length = input_ids.size(1)
    outputs = self.model.generate(
        input_ids,
        max_length=max_gen_seq_length + input_seq_length,
        min_length=min_gen_seq_length + input_seq_length,
        num_beams=num_beams,
        do_sample=do_sample,
        decoder_start_token_id=decoder_start_token_id,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=early_stopping,
        length_penalty=length_penalty,
        repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences,
        bos_token_id=decoder_start_token_id,
        eos_token_id=decoder_end_token_id,
        num_beam_groups=num_beam_groups,
        diversity_penalty=diversity_penalty,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_dict_in_generate=True,
        output_scores=True,
    )
    sequences = outputs.sequences.reshape(
        -1, num_return_sequences, outputs.sequences.size(-1)
    )
    outputs.sequences = torch.zeros(
        sequences.size(0), num_return_sequences, max_gen_seq_length
    ).to(device=sequences.device)
    outputs.sequences[:, :, : sequences.size(-1) - input_seq_length].copy_(
        sequences[:, :, input_seq_length : sequences.size(-1)]
    )

    if num_return_sequences == 1:
        outputs.sequences = outputs.sequences.reshape(-1, max_gen_seq_length)

    return GenericOutputs(
        sequences=outputs.sequences.long(),
        sequences_scores=outputs.sequences_scores,
    )