Skip to content

unitorch.models.detr¤

DetrProcessor¤

Image processor for DETR detection and segmentation models.

Source code in src/unitorch/models/detr/processing.py
17
18
19
20
21
22
23
24
25
def __init__(
    self,
    vision_config_path: str,
    min_size_test: int = 800,
    max_size_test: int = 1333,
) -> None:
    self.vision_processor = DetrImageProcessorPil.from_json_file(vision_config_path)
    self.min_size_test = min_size_test
    self.max_size_test = max_size_test

vision_processor instance-attribute ¤

vision_processor = from_json_file(vision_config_path)

min_size_test instance-attribute ¤

min_size_test = min_size_test

max_size_test instance-attribute ¤

max_size_test = max_size_test

image ¤

image(image: Image) -> GenericOutputs

Preprocess a single image and return pixel values with original size.

Source code in src/unitorch/models/detr/processing.py
27
28
29
30
31
32
33
def image(self, image: Image.Image) -> GenericOutputs:
    """Preprocess a single image and return pixel values with original size."""
    width, height = image.size
    pixel_values = self.vision_processor.preprocess(
        images=image, return_tensors="pt"
    ).pixel_values.squeeze(0)
    return GenericOutputs(image=pixel_values, sizes=torch.tensor([height, width]))

detection ¤

detection(
    image: Image,
    bboxes: List[List[float]],
    classes: List[int],
) -> GenericOutputs

Preprocess an image and normalise bounding boxes for detection training.

Source code in src/unitorch/models/detr/processing.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def detection(
    self,
    image: Image.Image,
    bboxes: List[List[float]],
    classes: List[int],
) -> GenericOutputs:
    """Preprocess an image and normalise bounding boxes for detection training."""
    outputs = self.image(image)
    org_h, org_w = outputs.sizes
    bboxes = torch.tensor(bboxes, dtype=torch.float)
    if bboxes.dim() == 1:
        bboxes = bboxes.unsqueeze(0)
    scale = torch.tensor([org_w, org_h, org_w, org_h], dtype=torch.float)
    bboxes = bboxes / scale
    classes = torch.tensor(classes)
    assert (
        bboxes.size(-1) == 4 and classes.dim() == 1 and len(classes) == len(bboxes)
    )
    return GenericOutputs(image=outputs.image, bboxes=bboxes, classes=classes)

segmentation ¤

segmentation(
    image: Image,
    gt_image: Image,
    num_classes: Optional[int] = None,
) -> GenericOutputs

Preprocess an image and its ground-truth segmentation mask.

Source code in src/unitorch/models/detr/processing.py
55
56
57
58
59
60
61
62
63
64
65
66
def segmentation(
    self,
    image: Image.Image,
    gt_image: Image.Image,
    num_classes: Optional[int] = None,
) -> GenericOutputs:
    """Preprocess an image and its ground-truth segmentation mask."""
    pixel_values = self.image(image).image
    gt = np.array(gt_image)
    if num_classes is not None:
        gt = np.minimum(gt, num_classes)
    return GenericOutputs(image=pixel_values, gt_image=torch.tensor(gt))

DetrForDetection¤

Bases: GenericModel

DETR model for object detection.

Source code in src/unitorch/models/detr/modeling.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def __init__(
    self,
    config_path: str,
    num_classes: Optional[int] = None,
) -> None:
    super().__init__()
    self.config = DetrConfig.from_json_file(config_path)
    if num_classes is not None:
        self.config.num_labels = num_classes

    self.model = DetrModel(self.config)
    self.class_labels_classifier = nn.Linear(
        self.config.d_model, self.config.num_labels + 1
    )
    self.bbox_predictor = DetrMLPPredictionHead(
        input_dim=self.config.d_model,
        hidden_dim=self.config.d_model,
        output_dim=4,
        num_layers=3,
    )
    self.init_weights()

    self.enable_auxiliary_loss = self.config.auxiliary_loss
    matcher = DetrHungarianMatcher(
        class_cost=self.config.class_cost,
        bbox_cost=self.config.bbox_cost,
        giou_cost=self.config.giou_cost,
    )
    self.criterion = DetrLoss(
        matcher=matcher,
        num_classes=self.config.num_labels,
        eos_coef=self.config.eos_coefficient,
        losses=["labels", "boxes", "cardinality"],
    )
    self.weight_dict = {
        "loss_ce": 1,
        "loss_bbox": self.config.bbox_loss_coefficient,
        "loss_giou": self.config.giou_loss_coefficient,
    }
    if self.enable_auxiliary_loss:
        aux_weight_dict = {
            f"{k}_{i}": v
            for i in range(self.config.decoder_layers - 1)
            for k, v in self.weight_dict.items()
        }
        self.weight_dict.update(aux_weight_dict)

replace_keys_in_state_dict class-attribute instance-attribute ¤

replace_keys_in_state_dict = {
    "conv_encoder\\.": "",
    "out_proj": "o_proj",
    "(?<!mlp\\.)fc1": "mlp.fc1",
    "(?<!mlp\\.)fc2": "mlp.fc2",
}

config instance-attribute ¤

config = from_json_file(config_path)

model instance-attribute ¤

model = DetrModel(config)

class_labels_classifier instance-attribute ¤

class_labels_classifier = Linear(d_model, num_labels + 1)

bbox_predictor instance-attribute ¤

bbox_predictor = DetrMLPPredictionHead(
    input_dim=d_model,
    hidden_dim=d_model,
    output_dim=4,
    num_layers=3,
)

enable_auxiliary_loss instance-attribute ¤

enable_auxiliary_loss = auxiliary_loss

criterion instance-attribute ¤

criterion = ImageLoss(
    matcher=matcher,
    num_classes=num_labels,
    eos_coef=eos_coefficient,
    losses=["labels", "boxes", "cardinality"],
)

weight_dict instance-attribute ¤

weight_dict = {
    "loss_ce": 1,
    "loss_bbox": bbox_loss_coefficient,
    "loss_giou": giou_loss_coefficient,
}

_set_aux_loss ¤

_set_aux_loss(
    outputs_class: Tensor, outputs_coord: Tensor
) -> list
Source code in src/unitorch/models/detr/modeling.py
102
103
104
105
106
107
108
def _set_aux_loss(
    self, outputs_class: torch.Tensor, outputs_coord: torch.Tensor
) -> list:
    return [
        {"logits": a, "pred_boxes": b}
        for a, b in zip(outputs_class[:-1], outputs_coord[:-1])
    ]

forward ¤

forward(
    images: Union[List[Tensor], Tensor],
    bboxes: Union[List[Tensor], Tensor],
    classes: Union[List[Tensor], Tensor],
) -> Tensor
Source code in src/unitorch/models/detr/modeling.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def forward(
    self,
    images: Union[List[torch.Tensor], torch.Tensor],
    bboxes: Union[List[torch.Tensor], torch.Tensor],
    classes: Union[List[torch.Tensor], torch.Tensor],
) -> torch.Tensor:
    if not isinstance(images, torch.Tensor):
        images = image_list_to_tensor(images)
    assert images.dim() == 4

    outputs = self.model(images.to(self.dtype))
    sequence_output = outputs[0]

    logits = self.class_labels_classifier(sequence_output)
    pred_boxes = self.bbox_predictor(sequence_output).sigmoid()
    outputs_loss = {"logits": logits, "pred_boxes": pred_boxes}

    if self.enable_auxiliary_loss:
        intermediate = outputs[4]
        outputs_loss["auxiliary_outputs"] = self._set_aux_loss(
            self.class_labels_classifier(intermediate),
            self.bbox_predictor(intermediate).sigmoid(),
        )

    labels = [
        {"class_labels": c, "boxes": _xyxy_to_xywh(b)}
        for b, c in zip(bboxes, classes)
    ]
    loss_dict = self.criterion(outputs_loss, labels)
    return sum(
        loss_dict[k] * self.weight_dict[k]
        for k in loss_dict
        if k in self.weight_dict
    )

detect ¤

detect(
    images: Union[List[Tensor], Tensor],
    norm_bboxes: bool = False,
    threshold: float = 0.5,
) -> GenericOutputs
Source code in src/unitorch/models/detr/modeling.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
@torch.no_grad()
def detect(
    self,
    images: Union[List[torch.Tensor], torch.Tensor],
    norm_bboxes: bool = False,
    threshold: float = 0.5,
) -> GenericOutputs:
    image_sizes = [(img.size(-2), img.size(-1)) for img in images]
    if not isinstance(images, torch.Tensor):
        images = image_list_to_tensor(images)
    assert images.dim() == 4

    outputs = self.model(images.to(self.dtype))
    logits = self.class_labels_classifier(outputs[0]).softmax(dim=-1)
    pred_boxes = [
        _xywh_to_xyxy(b) for b in self.bbox_predictor(outputs[0]).sigmoid()
    ]

    scores, classes = zip(*[p.max(-1) for p in logits])

    if norm_bboxes:
        bboxes = pred_boxes
    else:
        bboxes = [
            b * torch.tensor([s[1], s[0], s[1], s[0]]).to(b)
            for b, s in zip(pred_boxes, image_sizes)
        ]

    keep = [
        (c != self.config.num_labels) & (s > threshold)
        for s, c in zip(scores, classes)
    ]
    bboxes = [b[m] for b, m in zip(bboxes, keep)]
    scores = [s[m] for s, m in zip(scores, keep)]
    classes = [c[m] for c, m in zip(classes, keep)]

    return GenericOutputs(bboxes=bboxes, scores=scores, classes=classes)