Skip to content

unitorch.cli.models.grounding_dino¤

GroundingDinoProcessor¤

Tip

core/process/grounding_dino is the section for configuration of GroundingDinoProcessor.

Bases: GroundingDinoProcessor

Source code in src/unitorch/cli/models/grounding_dino/processing.py
26
27
28
29
30
31
32
33
34
def __init__(
    self,
    vocab_path: str,
    vision_config_path: str,
):
    super().__init__(
        vocab_path=vocab_path,
        vision_config_path=vision_config_path,
    )

from_config classmethod ¤

from_config(config, **kwargs)
Source code in src/unitorch/cli/models/grounding_dino/processing.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
@classmethod
@config_defaults_init("core/process/grounding_dino")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/process/grounding_dino")
    pretrained_name = config.getoption("pretrained_name", "grounding-dino-tiny")

    vocab_path = config.getoption("vocab_path", None)
    vocab_path = pop_value(
        vocab_path,
        nested_dict_value(
            pretrained_grounding_dino_infos, pretrained_name, "vocab"
        ),
    )
    vocab_path = cached_path(vocab_path)

    vision_config_path = config.getoption("vision_config_path", None)
    vision_config_path = pop_value(
        vision_config_path,
        nested_dict_value(
            pretrained_grounding_dino_infos, pretrained_name, "vision_config"
        ),
    )
    vision_config_path = cached_path(vision_config_path)

    return {
        "vocab_path": vocab_path,
        "vision_config_path": vision_config_path,
    }

_detection_inputs ¤

_detection_inputs(text: str, image: Union[Image, str])
Source code in src/unitorch/cli/models/grounding_dino/processing.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
@register_process("core/process/grounding_dino/detection/inputs")
def _detection_inputs(
    self,
    text: str,
    image: Union[Image.Image, str],
):
    outputs = super().detection_inputs(
        text=text,
        image=image,
    )

    return TensorInputs(
        pixel_values=outputs.pixel_values,
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
        token_type_ids=outputs.token_type_ids,
    )

_detection ¤

_detection(
    text: str,
    image: Union[Image, str],
    bboxes: List[List[float]],
    classes: List[str],
    do_eval: Optional[bool] = False,
)
Source code in src/unitorch/cli/models/grounding_dino/processing.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@register_process("core/process/grounding_dino/detection")
def _detection(
    self,
    text: str,
    image: Union[Image.Image, str],
    bboxes: List[List[float]],
    classes: List[str],
    do_eval: Optional[bool] = False,
):
    outputs = super().detection(
        text=text,
        image=image,
        bboxes=bboxes,
        classes=classes,
    )
    if do_eval:
        new_h, new_w = outputs.pixel_values.size()[1:]
        bboxes = outputs.bboxes
        bboxes[:, 0] = bboxes[:, 0] * new_w
        bboxes[:, 1] = bboxes[:, 1] * new_h
        bboxes[:, 2] = bboxes[:, 2] * new_w
        bboxes[:, 3] = bboxes[:, 3] * new_h
        return TensorInputs(
            pixel_values=outputs.pixel_values,
            input_ids=outputs.input_ids,
            attention_mask=outputs.attention_mask,
            token_type_ids=outputs.token_type_ids,
        ), DetectionTargets(
            bboxes=bboxes,
            classes=outputs.classes,
        )

    return TensorInputs(
        pixel_values=outputs.pixel_values,
        input_ids=outputs.input_ids,
        attention_mask=outputs.attention_mask,
        token_type_ids=outputs.token_type_ids,
    ), TensorSeqInputs(
        bboxes=outputs.bboxes,
        classes=outputs.classes,
    )

_post_dectection ¤

_post_dectection(outputs: DetectionOutputs)
Source code in src/unitorch/cli/models/grounding_dino/processing.py
125
126
127
128
129
130
131
@register_process("core/postprocess/grounding_dino/detection")
def _post_dectection(self, outputs: DetectionOutputs):
    results = outputs.to_pandas()
    results["bboxes"] = [bboxes.tolist() for bboxes in outputs.bboxes]
    results["scores"] = [scores.tolist() for scores in outputs.scores]
    results["classes"] = [classes.tolist() for classes in outputs.classes]
    return WriterOutputs(results)

GroundingDinoForDetection¤

Tip

core/model/detection/grounding_dino is the section for configuration of GroundingDinoForDetection.

Bases: GroundingDinoForDetection

Source code in src/unitorch/cli/models/grounding_dino/modeling.py
27
28
29
30
31
32
33
def __init__(
    self,
    config_path: str,
):
    super().__init__(
        config_path=config_path,
    )

from_config classmethod ¤

from_config(config, **kwargs)
Source code in src/unitorch/cli/models/grounding_dino/modeling.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
@classmethod
@config_defaults_init("core/model/detection/grounding_dino")
def from_config(cls, config, **kwargs):
    config.set_default_section("core/model/detection/grounding_dino")
    pretrained_name = config.getoption("pretrained_name", "grounding-dino-tiny")
    config_path = config.getoption("config_path", None)
    config_path = pop_value(
        config_path,
        nested_dict_value(
            pretrained_grounding_dino_infos, pretrained_name, "config"
        ),
    )

    config_path = cached_path(config_path)

    inst = cls(
        config_path=config_path,
    )
    pretrained_weight_path = config.getoption("pretrained_weight_path", None)
    weight_path = pop_value(
        pretrained_weight_path,
        nested_dict_value(
            pretrained_grounding_dino_infos, pretrained_name, "weight"
        ),
        check_none=False,
    )
    if weight_path is not None:
        inst.from_pretrained(weight_path)

    return inst

forward ¤

forward(
    pixel_values,
    input_ids,
    attention_mask,
    token_type_ids,
    bboxes,
    classes,
)
Source code in src/unitorch/cli/models/grounding_dino/modeling.py
66
67
68
69
70
71
72
73
74
75
76
77
78
@autocast(device_type=("cuda" if torch.cuda.is_available() else "cpu"))
def forward(
    self, pixel_values, input_ids, attention_mask, token_type_ids, bboxes, classes
):
    outputs = super().forward(
        pixel_values=pixel_values,
        input_ids=input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        bboxes=bboxes,
        classes=classes,
    )
    return LossOutputs(loss=outputs)

detect ¤

detect(
    pixel_values,
    input_ids,
    attention_mask,
    token_type_ids,
    norm_bboxes: bool = False,
    text_threshold: Optional[float] = 0.25,
    box_threshold: Optional[float] = 0.25,
)
Source code in src/unitorch/cli/models/grounding_dino/modeling.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
@config_defaults_method("core/model/detection/grounding_dino")
def detect(
    self,
    pixel_values,
    input_ids,
    attention_mask,
    token_type_ids,
    norm_bboxes: bool = False,
    text_threshold: Optional[float] = 0.25,
    box_threshold: Optional[float] = 0.25,
):
    outputs = super().detect(
        pixel_values=pixel_values,
        input_ids=input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        norm_bboxes=norm_bboxes,
        text_threshold=text_threshold,
        box_threshold=box_threshold,
    )
    classes = [c.max(dim=-1)[1] for c in outputs.classes]
    return DetectionOutputs(
        bboxes=outputs.bboxes,
        scores=outputs.scores,
        classes=classes,
    )