Skip to content

Metrics

ToyTestClassificationMetric

Bases: Metric

Dummy classification Mettric object returning 0 always (for testing).

Source code in V3_1/src/super_gradients/training/metrics/classification_metrics.py
81
82
83
84
85
86
87
88
89
90
91
92
93
class ToyTestClassificationMetric(Metric):
    """
    Dummy classification Mettric object returning 0 always (for testing).
    """

    def __init__(self, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)

    def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
        pass

    def compute(self):
        return 0

accuracy(output, target, topk=(1))

Computes the precision@k for the specified values of k

Parameters:

Name Type Description Default
output

Tensor / Numpy / List The prediction

required
target

Tensor / Numpy / List The corresponding lables

required
topk

tuple The type of accuracy to calculate, e.g. topk=(1,5) returns accuracy for top-1 and top-5

(1)
Source code in V3_1/src/super_gradients/training/metrics/classification_metrics.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k
    :param output: Tensor / Numpy / List
        The prediction
    :param target: Tensor / Numpy / List
        The corresponding lables
    :param topk: tuple
        The type of accuracy to calculate, e.g. topk=(1,5) returns accuracy for top-1 and top-5"""
    # Convert to tensor
    output = convert_to_tensor(output)
    target = convert_to_tensor(target)

    # Get the maximal value of the accuracy measurment and the batch size
    maxk = max(topk)
    batch_size = target.size(0)

    # Get the top k predictions
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    # Count the number of correct predictions only for the highest k
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        # Count the number of correct prediction for the different K (the top predictions) values
        correct_k = correct[:k].reshape(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size).item())
    return res

DetectionMetrics

Bases: Metric

DetectionMetrics

Metric class for computing F1, Precision, Recall and Mean Average Precision.

Parameters:

Name Type Description Default
num_cls int

Number of classes.

required
post_prediction_callback DetectionPostPredictionCallback

DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).

required
normalize_targets bool

Whether to normalize bbox coordinates by image size.

False
iou_thres Union[IouThreshold, float]

IoU threshold to compute the mAP.

IouThreshold.MAP_05_TO_095
recall_thres torch.Tensor

Recall threshold to compute the mAP.

None
score_thres float

Score threshold to compute Recall, Precision and F1.

0.1
top_k_predictions int

Number of predictions per class used to compute metrics, ordered by confidence score

100
dist_sync_on_step bool

Synchronize metric state across processes at each forward() before returning the value at the step.

False
accumulate_on_cpu bool

Run on CPU regardless of device used in other parts. This is to avoid "CUDA out of memory" that might happen on GPU.

True
Source code in V3_1/src/super_gradients/training/metrics/detection_metrics.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
@register_metric(Metrics.DETECTION_METRICS)
class DetectionMetrics(Metric):
    """
    DetectionMetrics

    Metric class for computing F1, Precision, Recall and Mean Average Precision.

    :param num_cls:                         Number of classes.
    :param post_prediction_callback:        DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).
    :param normalize_targets:               Whether to normalize bbox coordinates by image size.
    :param iou_thres:                       IoU threshold to compute the mAP.
    :param recall_thres:                    Recall threshold to compute the mAP.
    :param score_thres:                     Score threshold to compute Recall, Precision and F1.
    :param top_k_predictions:               Number of predictions per class used to compute metrics, ordered by confidence score
    :param dist_sync_on_step:               Synchronize metric state across processes at each ``forward()`` before returning the value at the step.
    :param accumulate_on_cpu:               Run on CPU regardless of device used in other parts.
                                            This is to avoid "CUDA out of memory" that might happen on GPU.
    """

    def __init__(
        self,
        num_cls: int,
        post_prediction_callback: DetectionPostPredictionCallback,
        normalize_targets: bool = False,
        iou_thres: Union[IouThreshold, float] = IouThreshold.MAP_05_TO_095,
        recall_thres: torch.Tensor = None,
        score_thres: float = 0.1,
        top_k_predictions: int = 100,
        dist_sync_on_step: bool = False,
        accumulate_on_cpu: bool = True,
    ):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_cls = num_cls
        self.iou_thres = iou_thres

        if isinstance(iou_thres, IouThreshold):
            self.iou_thresholds = iou_thres.to_tensor()
        else:
            self.iou_thresholds = torch.tensor([iou_thres])

        self.map_str = "mAP" + self._get_range_str()
        self.greater_component_is_better = {
            f"Precision{self._get_range_str()}": True,
            f"Recall{self._get_range_str()}": True,
            f"mAP{self._get_range_str()}": True,
            f"F1{self._get_range_str()}": True,
        }
        self.component_names = list(self.greater_component_is_better.keys())
        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = super_gradients.is_distributed()
        self.denormalize_targets = not normalize_targets
        self.world_size = None
        self.rank = None
        self.add_state(f"matching_info{self._get_range_str()}", default=[], dist_reduce_fx=None)

        self.recall_thresholds = torch.linspace(0, 1, 101) if recall_thres is None else recall_thres
        self.score_threshold = score_thres
        self.top_k_predictions = top_k_predictions

        self.accumulate_on_cpu = accumulate_on_cpu

    def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
        """
        Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

        :param preds:           Raw output of the model, the format might change from one model to another,
                                but has to fit the input format of the post_prediction_callback (cx,cy,wh)
        :param target:          Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format:  (index, label, cx, cy, w, h)
        :param device:          Device to run on
        :param inputs:          Input image tensor of shape (batch_size, n_img, height, width)
        :param crowd_targets:   Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
        """
        self.iou_thresholds = self.iou_thresholds.to(device)
        _, _, height, width = inputs.shape

        targets = target.clone()
        crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

        preds = self.post_prediction_callback(preds, device=device)

        new_matching_info = compute_detection_matching(
            preds,
            targets,
            height,
            width,
            self.iou_thresholds,
            crowd_targets=crowd_targets,
            top_k=self.top_k_predictions,
            denormalize_targets=self.denormalize_targets,
            device=self.device,
            return_on_cpu=self.accumulate_on_cpu,
        )

        accumulated_matching_info = getattr(self, f"matching_info{self._get_range_str()}")
        setattr(self, f"matching_info{self._get_range_str()}", accumulated_matching_info + new_matching_info)

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        mean_ap, mean_precision, mean_recall, mean_f1 = -1.0, -1.0, -1.0, -1.0
        accumulated_matching_info = getattr(self, f"matching_info{self._get_range_str()}")

        if len(accumulated_matching_info):
            matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]

            # shape (n_class, nb_iou_thresh)
            ap, precision, recall, f1, unique_classes = compute_detection_metrics(
                *matching_info_tensors,
                recall_thresholds=self.recall_thresholds,
                score_threshold=self.score_threshold,
                device="cpu" if self.accumulate_on_cpu else self.device,
            )

            # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
            # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
            mean_precision, mean_recall, mean_f1 = precision.mean(), recall.mean(), f1.mean()

            # MaP is averaged over IoU thresholds and over classes
            mean_ap = ap.mean()

        return {
            f"Precision{self._get_range_str()}": mean_precision,
            f"Recall{self._get_range_str()}": mean_recall,
            f"mAP{self._get_range_str()}": mean_ap,
            f"F1{self._get_range_str()}": mean_f1,
        }

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        :param dist_sync_fn:
        :return:
        """
        if self.world_size is None:
            self.world_size = torch.distributed.get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = torch.distributed.get_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = {attr: getattr(self, attr) for attr in self._reductions.keys()}
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.barrier()
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            matching_info = []
            for state_dict in gathered_state_dicts:
                matching_info += state_dict[f"matching_info{self._get_range_str()}"]
            matching_info = tensor_container_to_device(matching_info, device="cpu" if self.accumulate_on_cpu else self.device)

            setattr(self, f"matching_info{self._get_range_str()}", matching_info)

    def _get_range_str(self):
        return "@%.2f" % self.iou_thresholds[0] if not len(self.iou_thresholds) > 1 else "@%.2f:%.2f" % (self.iou_thresholds[0], self.iou_thresholds[-1])

compute()

Compute the metrics for all the accumulated results.

Returns:

Type Description
Dict[str, Union[float, torch.Tensor]]

Metrics of interest

Source code in V3_1/src/super_gradients/training/metrics/detection_metrics.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
    """Compute the metrics for all the accumulated results.
    :return: Metrics of interest
    """
    mean_ap, mean_precision, mean_recall, mean_f1 = -1.0, -1.0, -1.0, -1.0
    accumulated_matching_info = getattr(self, f"matching_info{self._get_range_str()}")

    if len(accumulated_matching_info):
        matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]

        # shape (n_class, nb_iou_thresh)
        ap, precision, recall, f1, unique_classes = compute_detection_metrics(
            *matching_info_tensors,
            recall_thresholds=self.recall_thresholds,
            score_threshold=self.score_threshold,
            device="cpu" if self.accumulate_on_cpu else self.device,
        )

        # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
        # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
        mean_precision, mean_recall, mean_f1 = precision.mean(), recall.mean(), f1.mean()

        # MaP is averaged over IoU thresholds and over classes
        mean_ap = ap.mean()

    return {
        f"Precision{self._get_range_str()}": mean_precision,
        f"Recall{self._get_range_str()}": mean_recall,
        f"mAP{self._get_range_str()}": mean_ap,
        f"F1{self._get_range_str()}": mean_f1,
    }

update(preds, target, device, inputs, crowd_targets=None)

Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

Parameters:

Name Type Description Default
preds

Raw output of the model, the format might change from one model to another, but has to fit the input format of the post_prediction_callback (cx,cy,wh)

required
target torch.Tensor

Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format: (index, label, cx, cy, w, h)

required
device str

Device to run on

required
inputs torch.tensor

Input image tensor of shape (batch_size, n_img, height, width)

required
crowd_targets Optional[torch.Tensor]

Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH

None
Source code in V3_1/src/super_gradients/training/metrics/detection_metrics.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
    """
    Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

    :param preds:           Raw output of the model, the format might change from one model to another,
                            but has to fit the input format of the post_prediction_callback (cx,cy,wh)
    :param target:          Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format:  (index, label, cx, cy, w, h)
    :param device:          Device to run on
    :param inputs:          Input image tensor of shape (batch_size, n_img, height, width)
    :param crowd_targets:   Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
    """
    self.iou_thresholds = self.iou_thresholds.to(device)
    _, _, height, width = inputs.shape

    targets = target.clone()
    crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

    preds = self.post_prediction_callback(preds, device=device)

    new_matching_info = compute_detection_matching(
        preds,
        targets,
        height,
        width,
        self.iou_thresholds,
        crowd_targets=crowd_targets,
        top_k=self.top_k_predictions,
        denormalize_targets=self.denormalize_targets,
        device=self.device,
        return_on_cpu=self.accumulate_on_cpu,
    )

    accumulated_matching_info = getattr(self, f"matching_info{self._get_range_str()}")
    setattr(self, f"matching_info{self._get_range_str()}", accumulated_matching_info + new_matching_info)

flatten_metrics_dict(metrics_dict)

Returns:

Type Description

flattened dict of metric values i.e {metric1_name: metric1_value...}

Source code in V3_1/src/super_gradients/training/metrics/metric_utils.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def flatten_metrics_dict(metrics_dict: dict):
    """
    :param metrics_dict - dictionary of metric values where values can also be dictionaries containing subvalues
    (in the case of compound metrics)

    :return: flattened dict of metric values i.e {metric1_name: metric1_value...}
    """
    flattened = {}
    for metric_name, metric_val in metrics_dict.items():
        if metric_name == "additional_items":
            continue
        # COLLECT ALL OF THE COMPONENTS IN THE CASE OF COMPOUND METRICS
        elif isinstance(metric_val, dict):
            for sub_metric_name, sub_metric_val in metric_val.items():
                flattened[sub_metric_name] = sub_metric_val
        else:
            flattened[metric_name] = metric_val

    return flattened

get_logging_values(loss_loggings, metrics, criterion=None)

Parameters:

Name Type Description Default
loss_loggings AverageMeter

AverageMeter running average for the loss items

required
metrics MetricCollection

MetricCollection object for running user specified metrics

required

Returns:

Type Description

tuple of the computed values

Source code in V3_1/src/super_gradients/training/metrics/metric_utils.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def get_logging_values(loss_loggings: AverageMeter, metrics: MetricCollection, criterion=None):
    """
    :param loss_loggings: AverageMeter running average for the loss items
    :param metrics: MetricCollection object for running user specified metrics
    :param criterion the object loss_loggings average meter is monitoring, when set to None- only the metrics values are
    computed and returned.

    :return: tuple of the computed values
    """
    if criterion is not None:
        loss_loggingg_avg = loss_loggings.average
        if not isinstance(loss_loggingg_avg, tuple):
            loss_loggingg_avg = tuple([loss_loggingg_avg])
        logging_vals = loss_loggingg_avg + get_metrics_results_tuple(metrics)
    else:
        logging_vals = get_metrics_results_tuple(metrics)

    return logging_vals

get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names)

Returns a dictionary with the epoch results as values and their names as keys.

Parameters:

Name Type Description Default
metrics_tuple

the result tuple

required
metrics_collection

MetricsCollection

required
loss_logging_item_names

loss component's names.

required

Returns:

Type Description

dict

Source code in V3_1/src/super_gradients/training/metrics/metric_utils.py
79
80
81
82
83
84
85
86
87
88
89
def get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names):
    """
    Returns a dictionary with the epoch results as values and their names as keys.
    :param metrics_tuple: the result tuple
    :param metrics_collection: MetricsCollection
    :param loss_logging_item_names: loss component's names.
    :return: dict
    """
    keys = loss_logging_item_names + get_metrics_titles(metrics_collection)
    metrics_dict = dict(zip(keys, list(metrics_tuple)))
    return metrics_dict

get_metrics_results_tuple(metrics_collection)

Parameters:

Name Type Description Default
metrics_collection MetricCollection

metrics collection of the user specified metrics @type metrics_collection

required

Returns:

Type Description

tuple of metrics values

Source code in V3_1/src/super_gradients/training/metrics/metric_utils.py
44
45
46
47
48
49
50
51
52
53
54
55
def get_metrics_results_tuple(metrics_collection: MetricCollection):
    """

    :param metrics_collection: metrics collection of the user specified metrics
    @type metrics_collection
    :return: tuple of metrics values
    """
    if metrics_collection is None:
        results_tuple = ()
    else:
        results_tuple = tuple(flatten_metrics_dict(metrics_collection.compute()).values())
    return results_tuple

get_metrics_titles(metrics_collection)

Parameters:

Name Type Description Default
metrics_collection MetricCollection

MetricCollection object for running user specified metrics

required

Returns:

Type Description

list of all the names of the computed values list(str)

Source code in V3_1/src/super_gradients/training/metrics/metric_utils.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def get_metrics_titles(metrics_collection: MetricCollection):
    """

    :param metrics_collection: MetricCollection object for running user specified metrics
    :return: list of all the names of the computed values list(str)
    """
    titles = []
    for metric_name, metric in metrics_collection.items():
        if metric_name == "additional_items":
            continue
        elif hasattr(metric, "component_names"):
            titles += metric.component_names
        else:
            titles.append(metric_name)

    return titles

get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items)

Returns a dictionary with the epoch's logging items as values and their names as keys, with the purpose of passing it as a description to tqdm's progress bar.

Parameters:

Name Type Description Default
metrics_tuple

the result tuple

required
metrics_collection

MetricsCollection

required
loss_logging_item_names

loss component's names.

required

Returns:

Type Description

dict

Source code in V3_1/src/super_gradients/training/metrics/metric_utils.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items):
    """
    Returns a dictionary with the epoch's logging items as values and their names as keys, with the purpose of
     passing it as a description to tqdm's progress bar.

    :param metrics_tuple: the result tuple
    :param metrics_collection: MetricsCollection
    :param loss_logging_item_names: loss component's names.
    :param log_items additional logging items to be rendered.
    :return: dict
    """
    log_items.update(get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names))
    for key, value in log_items.items():
        if isinstance(value, torch.Tensor):
            log_items[key] = value.detach().item()

    return log_items

PoseEstimationMetrics

Bases: Metric

Implementation of COCO Keypoint evaluation metric. When instantiated with default parameters, it will default to COCO params. By default, only AR and AP metrics are computed:

from super_gradients.training.metrics import PoseEstimationMetrics metric = PoseEstimationMetrics(...) metric.update(...) metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

If you wish to get AR/AR at specific thresholds, you can specify them using iou_thresholds_to_report argument:

from super_gradients.training.metrics import PoseEstimationMetrics metric = PoseEstimationMetrics(iou_thresholds_to_report=[0.5, 0.75], ...) metric.update(...) metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_metrics.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
@register_metric(Metrics.POSE_ESTIMATION_METRICS)
class PoseEstimationMetrics(Metric):
    """
    Implementation of COCO Keypoint evaluation metric.
    When instantiated with default parameters, it will default to COCO params.
    By default, only AR and AP metrics are computed:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

    If you wish to get AR/AR at specific thresholds, you can specify them using `iou_thresholds_to_report` argument:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(iou_thresholds_to_report=[0.5, 0.75], ...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

    """

    def __init__(
        self,
        post_prediction_callback: Callable[[Any], Tuple[Tensor, Tensor]],
        num_joints: int,
        max_objects_per_image: int = 20,
        oks_sigmas: Optional[Iterable] = None,
        iou_thresholds: Optional[Iterable] = None,
        recall_thresholds: Optional[Iterable] = None,
        iou_thresholds_to_report: Optional[Iterable] = None,
    ):
        """
        Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
        If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

        :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                          and returns a tuple of (poses, scores)

        :param num_joints:                Number of joints per pose

        :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

        :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                          If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

        :param recall_thresholds:         List of recall thresholds to compute AP.
                                          If None, then will use default 101 recall thresholds from COCO in range [0..1]

        :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

        :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                          may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

        """
        super().__init__(dist_sync_on_step=False)
        self.num_joints = num_joints
        self.max_objects_per_image = max_objects_per_image
        self.stats_names = ["AP", "AR"]

        if recall_thresholds is None:
            recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
        self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

        if iou_thresholds is None:
            iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
        self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

        if iou_thresholds_to_report is not None:
            self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

            if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
                missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
                raise RuntimeError(
                    f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
                )

            self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
            self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
        else:
            self.iou_thresholds_to_report = None

        self.greater_component_is_better = dict((k, True) for k in self.stats_names)

        if oks_sigmas is None:
            oks_sigmas = np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89]) / 10.0

        if len(oks_sigmas) != num_joints:
            raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

        self.oks_sigmas = torch.tensor(oks_sigmas).float()

        self.component_names = list(self.greater_component_is_better.keys())
        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = is_distributed()
        self.world_size = None
        self.rank = None
        self.add_state("predictions", default=[], dist_reduce_fx=None)

    def reset(self) -> None:
        self.predictions.clear()

    @torch.no_grad()
    def update(
        self,
        preds,
        target,
        gt_joints: List[np.ndarray],
        gt_iscrowd: List[np.ndarray] = None,
        gt_bboxes: List[np.ndarray] = None,
        gt_areas: List[np.ndarray] = None,
    ):
        """
        Decode the predictions and update the metric

        :param preds :           Raw output of the model

        :param target:           Targets for the model training (rarely used for evaluation)

        :param gt_joints:        List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                                 Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                                 This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                                 where predictions rescaled back to original size of the image.
                                 However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                                 system of the predicted image.

        :param gt_iscrowd:       Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                                 If not provided, all instances are considered as non-crowd targets.
                                 For instance, in CrowdPose all instances are considered as "non-crowd".

        :param gt_bboxes:        Bounding boxes of the groundtruth instances (XYWH).
                                 This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                                 If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

        :param gt_areas:         Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                                 so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                                 If not provided, the area is computed as the product of the width and height of the bounding box.
                                 (For instance this is used in CrowdPose dataset)

        """
        predicted_poses, predicted_scores = self.post_prediction_callback(preds)  # Decode raw predictions into poses

        if gt_bboxes is None:
            gt_bboxes = [compute_visible_bbox_xywh(torch.tensor(joints[:, :, 0:2]), torch.tensor(joints[:, :, 2])) for joints in gt_joints]

        if gt_areas is None:
            gt_areas = [bboxes[:, 2] * bboxes[:, 3] for bboxes in gt_bboxes]

        if gt_iscrowd is None:
            gt_iscrowd = [[False] * len(x) for x in gt_joints]

        for i in range(len(predicted_poses)):
            self.update_single_image(
                predicted_poses[i], predicted_scores[i], gt_joints[i], gt_areas=gt_areas[i], gt_bboxes=gt_bboxes[i], gt_iscrowd=gt_iscrowd[i]
            )

    def update_single_image(
        self,
        predicted_poses: Union[Tensor, np.ndarray],
        predicted_scores: Union[Tensor, np.ndarray],
        groundtruths: Union[Tensor, np.ndarray],
        gt_bboxes: Union[Tensor, np.ndarray],
        gt_areas: Union[Tensor, np.ndarray],
        gt_iscrowd: Union[Tensor, np.ndarray, List[bool]],
    ):
        if len(predicted_poses) == 0 and len(groundtruths) == 0:
            return
        if len(predicted_poses) != len(predicted_scores):
            raise ValueError("Length of predicted poses and scores should be equal. Got {} and {}".format(len(predicted_poses), len(predicted_scores)))
        if len(groundtruths) != len(gt_areas) != len(gt_bboxes) != len(gt_iscrowd):
            raise ValueError(
                "Length of groundtruths, areas, bboxes and iscrowd should be equal. Got {} and {} and {} and {}".format(
                    len(groundtruths), len(gt_areas), len(gt_bboxes), len(gt_iscrowd)
                )
            )

        predicted_poses = convert_to_tensor(predicted_poses, dtype=torch.float32, device=self.device)
        predicted_scores = convert_to_tensor(predicted_scores, dtype=torch.float32, device=self.device)

        gt_keypoints = convert_to_tensor(groundtruths, dtype=torch.float32, device=self.device)
        gt_areas = convert_to_tensor(gt_areas, dtype=torch.float32, device=self.device)
        gt_bboxes = convert_to_tensor(gt_bboxes, dtype=torch.float32, device=self.device)
        gt_iscrowd = convert_to_tensor(gt_iscrowd, dtype=torch.bool, device=self.device)

        gt_keypoints_xy = gt_keypoints[:, :, 0:2]
        gt_keypoints_visibility = gt_keypoints[:, :, 2]
        gt_all_kpts_invisible = gt_keypoints_visibility.eq(0).all(dim=1)
        gt_is_ignore = gt_all_kpts_invisible | gt_iscrowd

        targets = gt_keypoints_xy[~gt_is_ignore] if len(groundtruths) else []
        targets_visibilities = gt_keypoints_visibility[~gt_is_ignore] if len(groundtruths) else []
        targets_areas = gt_areas[~gt_is_ignore] if len(groundtruths) else []
        targets_bboxes = gt_bboxes[~gt_is_ignore]
        targets_ignored = gt_is_ignore[~gt_is_ignore]

        crowd_targets = gt_keypoints_xy[gt_is_ignore] if len(groundtruths) else []
        crowd_visibilities = gt_keypoints_visibility[gt_is_ignore] if len(groundtruths) else []
        crowd_targets_areas = gt_areas[gt_is_ignore]
        crowd_targets_bboxes = gt_bboxes[gt_is_ignore]

        mr = compute_img_keypoint_matching(
            predicted_poses,
            predicted_scores,
            #
            targets=targets,
            targets_visibilities=targets_visibilities,
            targets_areas=targets_areas,
            targets_bboxes=targets_bboxes,
            targets_ignored=targets_ignored,
            #
            crowd_targets=crowd_targets,
            crowd_visibilities=crowd_visibilities,
            crowd_targets_areas=crowd_targets_areas,
            crowd_targets_bboxes=crowd_targets_bboxes,
            #
            iou_thresholds=self.iou_thresholds.to(self.device),
            sigmas=self.oks_sigmas.to(self.device),
            top_k=self.max_objects_per_image,
        )

        self.predictions.append((mr.preds_matched.cpu(), mr.preds_to_ignore.cpu(), mr.preds_scores.cpu(), int(mr.num_targets)))

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        :param dist_sync_fn:
        :return:
        """
        if self.world_size is None:
            self.world_size = torch.distributed.get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = torch.distributed.get_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = self.predictions
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            self.predictions = list(itertools.chain(*gathered_state_dicts))

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        T = len(self.iou_thresholds)
        K = 1  # num categories

        precision = -np.ones((T, K))
        recall = -np.ones((T, K))

        predictions = self.predictions  # All gathered by this time
        if len(predictions) > 0:
            preds_matched = torch.cat([x[0].cpu() for x in predictions], dim=0)
            preds_to_ignore = torch.cat([x[1].cpu() for x in predictions], dim=0)
            preds_scores = torch.cat([x[2].cpu() for x in predictions], dim=0)
            n_targets = sum([x[3] for x in predictions])

            cls_precision, _, cls_recall = compute_detection_metrics_per_cls(
                preds_matched=preds_matched,
                preds_to_ignore=preds_to_ignore,
                preds_scores=preds_scores,
                n_targets=n_targets,
                recall_thresholds=self.recall_thresholds.cpu(),
                score_threshold=0,
                device="cpu",
            )

            precision[:, 0] = cls_precision.cpu().numpy()
            recall[:, 0] = cls_recall.cpu().numpy()

        def summarize(s):
            if len(s[s > -1]) == 0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s > -1])

            return mean_s

        metrics = {"AP": summarize(precision), "AR": summarize(recall)}

        if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
            for t in self.iou_thresholds_to_report:
                mask = np.where(t == self.iou_thresholds)[0]
                metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
                metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

        return metrics

__init__(post_prediction_callback, num_joints, max_objects_per_image=20, oks_sigmas=None, iou_thresholds=None, recall_thresholds=None, iou_thresholds_to_report=None)

Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values. If you need to get additional metrics (AP at specific threshold), pass these thresholds via iou_thresholds_to_report argument.

Parameters:

Name Type Description Default
post_prediction_callback Callable[[Any], Tuple[Tensor, Tensor]]

A callback to decode model predictions to poses. This should be callable that takes input (model predictions) and returns a tuple of (poses, scores)

required
num_joints int

Number of joints per pose

required
max_objects_per_image int

Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

20
oks_sigmas Optional[Iterable]

OKS sigma factor for custom keypoint detection dataset. If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

None
recall_thresholds Optional[Iterable]

List of recall thresholds to compute AP. If None, then will use default 101 recall thresholds from COCO in range [0..1]

None
iou_thresholds Optional[Iterable]

List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

None
Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_metrics.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def __init__(
    self,
    post_prediction_callback: Callable[[Any], Tuple[Tensor, Tensor]],
    num_joints: int,
    max_objects_per_image: int = 20,
    oks_sigmas: Optional[Iterable] = None,
    iou_thresholds: Optional[Iterable] = None,
    recall_thresholds: Optional[Iterable] = None,
    iou_thresholds_to_report: Optional[Iterable] = None,
):
    """
    Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
    If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

    :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                      and returns a tuple of (poses, scores)

    :param num_joints:                Number of joints per pose

    :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

    :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                      If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

    :param recall_thresholds:         List of recall thresholds to compute AP.
                                      If None, then will use default 101 recall thresholds from COCO in range [0..1]

    :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

    :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                      may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

    """
    super().__init__(dist_sync_on_step=False)
    self.num_joints = num_joints
    self.max_objects_per_image = max_objects_per_image
    self.stats_names = ["AP", "AR"]

    if recall_thresholds is None:
        recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
    self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

    if iou_thresholds is None:
        iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
    self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

    if iou_thresholds_to_report is not None:
        self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

        if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
            missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
            raise RuntimeError(
                f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
            )

        self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
        self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
    else:
        self.iou_thresholds_to_report = None

    self.greater_component_is_better = dict((k, True) for k in self.stats_names)

    if oks_sigmas is None:
        oks_sigmas = np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89]) / 10.0

    if len(oks_sigmas) != num_joints:
        raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

    self.oks_sigmas = torch.tensor(oks_sigmas).float()

    self.component_names = list(self.greater_component_is_better.keys())
    self.components = len(self.component_names)

    self.post_prediction_callback = post_prediction_callback
    self.is_distributed = is_distributed()
    self.world_size = None
    self.rank = None
    self.add_state("predictions", default=[], dist_reduce_fx=None)

compute()

Compute the metrics for all the accumulated results.

Returns:

Type Description
Dict[str, Union[float, torch.Tensor]]

Metrics of interest

Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_metrics.py
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
    """Compute the metrics for all the accumulated results.
    :return: Metrics of interest
    """
    T = len(self.iou_thresholds)
    K = 1  # num categories

    precision = -np.ones((T, K))
    recall = -np.ones((T, K))

    predictions = self.predictions  # All gathered by this time
    if len(predictions) > 0:
        preds_matched = torch.cat([x[0].cpu() for x in predictions], dim=0)
        preds_to_ignore = torch.cat([x[1].cpu() for x in predictions], dim=0)
        preds_scores = torch.cat([x[2].cpu() for x in predictions], dim=0)
        n_targets = sum([x[3] for x in predictions])

        cls_precision, _, cls_recall = compute_detection_metrics_per_cls(
            preds_matched=preds_matched,
            preds_to_ignore=preds_to_ignore,
            preds_scores=preds_scores,
            n_targets=n_targets,
            recall_thresholds=self.recall_thresholds.cpu(),
            score_threshold=0,
            device="cpu",
        )

        precision[:, 0] = cls_precision.cpu().numpy()
        recall[:, 0] = cls_recall.cpu().numpy()

    def summarize(s):
        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        return mean_s

    metrics = {"AP": summarize(precision), "AR": summarize(recall)}

    if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
        for t in self.iou_thresholds_to_report:
            mask = np.where(t == self.iou_thresholds)[0]
            metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
            metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

    return metrics

update(preds, target, gt_joints, gt_iscrowd=None, gt_bboxes=None, gt_areas=None)

Decode the predictions and update the metric

Parameters:

Name Type Description Default
preds

Raw output of the model

required
target

Targets for the model training (rarely used for evaluation)

required
gt_joints List[np.ndarray]

List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3). Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints. This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation, where predictions rescaled back to original size of the image. However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate system of the predicted image.

required
gt_iscrowd List[np.ndarray]

Optional argument indicating which instance is annotated with iscrowd flog and is not used for evaluation; If not provided, all instances are considered as non-crowd targets. For instance, in CrowdPose all instances are considered as "non-crowd".

None
gt_bboxes List[np.ndarray]

Bounding boxes of the groundtruth instances (XYWH). This is COCO-specific and is used in OKS computation for instances w/o visible keypoints. If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

None
gt_areas List[np.ndarray]

Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box, so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation. If not provided, the area is computed as the product of the width and height of the bounding box. (For instance this is used in CrowdPose dataset)

None
Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_metrics.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
@torch.no_grad()
def update(
    self,
    preds,
    target,
    gt_joints: List[np.ndarray],
    gt_iscrowd: List[np.ndarray] = None,
    gt_bboxes: List[np.ndarray] = None,
    gt_areas: List[np.ndarray] = None,
):
    """
    Decode the predictions and update the metric

    :param preds :           Raw output of the model

    :param target:           Targets for the model training (rarely used for evaluation)

    :param gt_joints:        List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                             Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                             This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                             where predictions rescaled back to original size of the image.
                             However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                             system of the predicted image.

    :param gt_iscrowd:       Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                             If not provided, all instances are considered as non-crowd targets.
                             For instance, in CrowdPose all instances are considered as "non-crowd".

    :param gt_bboxes:        Bounding boxes of the groundtruth instances (XYWH).
                             This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                             If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

    :param gt_areas:         Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                             so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                             If not provided, the area is computed as the product of the width and height of the bounding box.
                             (For instance this is used in CrowdPose dataset)

    """
    predicted_poses, predicted_scores = self.post_prediction_callback(preds)  # Decode raw predictions into poses

    if gt_bboxes is None:
        gt_bboxes = [compute_visible_bbox_xywh(torch.tensor(joints[:, :, 0:2]), torch.tensor(joints[:, :, 2])) for joints in gt_joints]

    if gt_areas is None:
        gt_areas = [bboxes[:, 2] * bboxes[:, 3] for bboxes in gt_bboxes]

    if gt_iscrowd is None:
        gt_iscrowd = [[False] * len(x) for x in gt_joints]

    for i in range(len(predicted_poses)):
        self.update_single_image(
            predicted_poses[i], predicted_scores[i], gt_joints[i], gt_areas=gt_areas[i], gt_bboxes=gt_bboxes[i], gt_iscrowd=gt_iscrowd[i]
        )

compute_img_keypoint_matching(preds, pred_scores, targets, targets_visibilities, targets_areas, targets_bboxes, targets_ignored, crowd_targets, crowd_visibilities, crowd_targets_areas, crowd_targets_bboxes, iou_thresholds, sigmas, top_k)

Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

Parameters:

Name Type Description Default
preds Tensor

Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons. Last dimension encode X,Y and confidence score of each joint

required
pred_scores Tensor

Tensor of shape (K) - Confidence scores for each pose

required
targets Tensor

Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons

required
targets_visibilities Tensor

Visibility status for each keypoint (M, NumJoints). Values are 0 - invisible, 1 - occluded, 2 - fully visible

required
targets_areas Tensor

Tensor of shape (M) - Areas of target objects

required
targets_bboxes Tensor

Tensor of shape (M,4) - Bounding boxes (XYWH) of targets

required
targets_ignored Tensor

Tensor of shape (M) - Array of target that marked as ignored (E.g all keypoints are not visible or target does not fit the desired area range)

required
crowd_targets Tensor

Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons Last dimension encode X,Y and visibility score of each joint: (0 - invisible, 1 - occluded, 2 - fully visible)

required
crowd_visibilities Tensor

Visibility status for each keypoint of crowd targets (Mc, NumJoints). Values are 0 - invisible, 1 - occluded, 2 - fully visible

required
crowd_targets_areas Tensor

Tensor of shape (Mc) - Areas of target objects

required
crowd_targets_bboxes Tensor

Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets

required
iou_thresholds torch.Tensor

IoU Threshold to compute the mAP

required
sigmas Tensor

Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard' it is to locate the exact groundtruth position of the joint.

required
top_k int

Number of predictions to keep, ordered by confidence score

required

Returns:

Type Description
ImageKeypointMatchingResult

:preds_matched: Tensor of shape (min(top_k, len(preds)), n_iou_thresholds) True when prediction (i) is matched with a target with respect to the (j)th IoU threshold :preds_to_ignore: Tensor of shape (min(top_k, len(preds)), n_iou_thresholds) True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold :preds_scores: Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions :num_targets: Number of groundtruth targets (total num targets minus number of ignored)

Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_utils.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def compute_img_keypoint_matching(
    preds: Tensor,
    pred_scores: Tensor,
    targets: Tensor,
    targets_visibilities: Tensor,
    targets_areas: Tensor,
    targets_bboxes: Tensor,
    targets_ignored: Tensor,
    crowd_targets: Tensor,
    crowd_visibilities: Tensor,
    crowd_targets_areas: Tensor,
    crowd_targets_bboxes: Tensor,
    iou_thresholds: torch.Tensor,
    sigmas: Tensor,
    top_k: int,
) -> ImageKeypointMatchingResult:
    """
    Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

    :param preds:            Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons.
                             Last dimension encode X,Y and confidence score of each joint

    :param pred_scores:      Tensor of shape (K) - Confidence scores for each pose

    :param targets:          Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons

    :param targets_visibilities: Visibility status for each keypoint (M, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param targets_areas:    Tensor of shape (M) - Areas of target objects

    :param targets_bboxes:   Tensor of shape (M,4) - Bounding boxes (XYWH) of targets

    :param targets_ignored:  Tensor of shape (M) - Array of target that marked as ignored
                             (E.g all keypoints are not visible or target does not fit the desired area range)

    :param crowd_targets:    Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons
                             Last dimension encode X,Y and visibility score of each joint:
                             (0 - invisible, 1 - occluded, 2 - fully visible)

    :param crowd_visibilities: Visibility status for each keypoint of crowd targets (Mc, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param crowd_targets_areas: Tensor of shape (Mc) - Areas of target objects

    :param crowd_targets_bboxes: Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets

    :param iou_thresholds:  IoU Threshold to compute the mAP

    :param sigmas:          Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard'
                            it is to locate the exact groundtruth position of the joint.

    :param top_k:           Number of predictions to keep, ordered by confidence score

    :return:
        :preds_matched:     Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a target with respect to the (j)th IoU threshold

        :preds_to_ignore:   Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold

        :preds_scores:      Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions

        :num_targets:       Number of groundtruth targets (total num targets minus number of ignored)

    """
    num_iou_thresholds = len(iou_thresholds)

    device = preds.device if torch.is_tensor(preds) else (targets.device if torch.is_tensor(targets) else "cpu")
    num_targets = len(targets) - torch.count_nonzero(targets_ignored)

    preds_matched = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)
    targets_matched = torch.zeros(len(targets), num_iou_thresholds, dtype=torch.bool, device=device)
    preds_to_ignore = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)

    if preds is None or len(preds) == 0:
        return ImageKeypointMatchingResult(
            preds_matched=preds_matched,
            preds_to_ignore=preds_to_ignore,
            preds_scores=pred_scores,
            num_targets=num_targets.item(),
        )

    # Ignore all but the predictions that were top_k
    k = min(top_k, len(pred_scores))
    preds_idx_to_use = torch.topk(pred_scores, k=k, sorted=True, largest=True).indices
    preds_to_ignore[:, :] = True
    preds_to_ignore[preds_idx_to_use] = False

    if len(targets) > 0:
        iou = compute_oks(preds[preds_idx_to_use], targets, targets_visibilities, sigmas, gt_areas=targets_areas, gt_bboxes=targets_bboxes)

        # The matching priority is first detection confidence and then IoU value.
        # The detection is already sorted by confidence in NMS, so here for each prediction we order the targets by iou.
        sorted_iou, target_sorted = iou.sort(descending=True, stable=True)

        # Only iterate over IoU values higher than min threshold to speed up the process
        for pred_selected_i, target_sorted_i in (sorted_iou > iou_thresholds[0]).nonzero(as_tuple=False):

            # pred_selected_i and target_sorted_i are relative to filters/sorting, so we extract their absolute indexes
            pred_i = preds_idx_to_use[pred_selected_i]
            target_i = target_sorted[pred_selected_i, target_sorted_i]

            # Vector[j], True when IoU(pred_i, target_i) is above the (j)th threshold
            is_iou_above_threshold = sorted_iou[pred_selected_i, target_sorted_i] > iou_thresholds

            # Vector[j], True when both pred_i and target_i are not matched yet for the (j)th threshold
            are_candidates_free = torch.logical_and(~preds_matched[pred_i, :], ~targets_matched[target_i, :])

            # Vector[j], True when (pred_i, target_i) can be matched for the (j)th threshold
            are_candidates_good = torch.logical_and(is_iou_above_threshold, are_candidates_free)

            is_matching_with_ignore = are_candidates_free & are_candidates_good & targets_ignored[target_i]

            if preds_matched[pred_i].any() and is_matching_with_ignore.any():
                continue

            # For every threshold (j) where target_i and pred_i can be matched together ( are_candidates_good[j]==True )
            # fill the matching placeholders with True
            targets_matched[target_i, are_candidates_good] = True
            preds_matched[pred_i, are_candidates_good] = True

            preds_to_ignore[pred_i] = torch.logical_or(preds_to_ignore[pred_i], is_matching_with_ignore)

            # When all the targets are matched with a prediction for every IoU Threshold, stop.
            if targets_matched.all():
                break

    # Crowd targets can be matched with many predictions.
    # Therefore, for every prediction we just need to check if it has IoA large enough with any crowd target.
    if len(crowd_targets) > 0:
        # shape = (n_preds_to_use x n_crowd_targets)
        ioa = compute_oks(
            preds[preds_idx_to_use],
            crowd_targets,
            crowd_visibilities,
            sigmas,
            gt_areas=crowd_targets_areas,
            gt_bboxes=crowd_targets_bboxes,
        )

        # For each prediction, we keep it's highest score with any crowd target (of same class)
        # shape = (n_preds_to_use)
        best_ioa, _ = ioa.max(1)

        # If a prediction has IoA higher than threshold (with any target of same class), then there is a match
        # shape = (n_preds_to_use x iou_thresholds)
        is_matching_with_crowd = best_ioa.view(-1, 1) > iou_thresholds.view(1, -1)

        preds_to_ignore[preds_idx_to_use] = torch.logical_or(preds_to_ignore[preds_idx_to_use], is_matching_with_crowd)

    return ImageKeypointMatchingResult(
        preds_matched=preds_matched[preds_idx_to_use],
        preds_to_ignore=preds_to_ignore[preds_idx_to_use],
        preds_scores=pred_scores[preds_idx_to_use],
        num_targets=num_targets.item(),
    )

compute_oks(pred_joints, gt_joints, gt_keypoint_visibility, sigmas, gt_areas=None, gt_bboxes=None)

Parameters:

Name Type Description Default
pred_joints Tensor

[K, NumJoints, 2] or [K, NumJoints, 3]

required
pred_scores

[K]

required
gt_joints Tensor

[M, NumJoints, 2]

required
gt_keypoint_visibility Tensor

[M, NumJoints]

required
gt_areas Tensor

[M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately. If None, we will use area of bounding box of each instance computed from gt_joints.

None
gt_bboxes Tensor

[M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.

None
sigmas Tensor

[NumJoints]

required

Returns:

Type Description
np.ndarray

IoU matrix [K, M]

Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_utils.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def compute_oks(
    pred_joints: Tensor,
    gt_joints: Tensor,
    gt_keypoint_visibility: Tensor,
    sigmas: Tensor,
    gt_areas: Tensor = None,
    gt_bboxes: Tensor = None,
) -> np.ndarray:
    """

    :param pred_joints: [K, NumJoints, 2] or [K, NumJoints, 3]
    :param pred_scores: [K]
    :param gt_joints:   [M, NumJoints, 2]
    :param gt_keypoint_visibility: [M, NumJoints]
    :param gt_areas: [M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately.
        If None, we will use area of bounding box of each instance computed from gt_joints.

    :param gt_bboxes: [M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.
    :param sigmas: [NumJoints]
    :return: IoU matrix [K, M]
    """

    ious = torch.zeros((len(pred_joints), len(gt_joints)), device=pred_joints.device)
    vars = (sigmas * 2) ** 2

    if gt_bboxes is None:
        gt_bboxes = compute_visible_bbox_xywh(gt_joints, gt_keypoint_visibility)

    if gt_areas is None:
        gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

    # compute oks between each detection and ground truth object
    for gt_index, (gt_keypoints, gt_keypoint_visibility, gt_bbox, gt_area) in enumerate(zip(gt_joints, gt_keypoint_visibility, gt_bboxes, gt_areas)):
        # create bounds for ignore regions(double the gt bbox)
        xg = gt_keypoints[:, 0]
        yg = gt_keypoints[:, 1]
        k1 = torch.count_nonzero(gt_keypoint_visibility > 0)

        x0 = gt_bbox[0] - gt_bbox[2]
        x1 = gt_bbox[0] + gt_bbox[2] * 2
        y0 = gt_bbox[1] - gt_bbox[3]
        y1 = gt_bbox[1] + gt_bbox[3] * 2

        for pred_index, pred_keypoints in enumerate(pred_joints):
            xd = pred_keypoints[:, 0]
            yd = pred_keypoints[:, 1]
            if k1 > 0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                dx = (x0 - xd).clamp_min(0) + (xd - x1).clamp_min(0)
                dy = (y0 - yd).clamp_min(0) + (yd - y1).clamp_min(0)

            e = (dx**2 + dy**2) / vars / (gt_area + torch.finfo(torch.float64).eps) / 2

            if k1 > 0:
                e = e[gt_keypoint_visibility > 0]
            ious[pred_index, gt_index] = torch.sum(torch.exp(-e)) / e.shape[0]

    return ious

compute_visible_bbox_xywh(joints, visibility_mask)

Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

Parameters:

Name Type Description Default
joints Tensor

[Num Instances, Num Joints, 2+] last channel must have dimension of at least 2 that is considered to contain (X,Y) coordinates of the keypoint

required
visibility_mask Tensor

[Num Instances, Num Joints]

required

Returns:

Type Description
np.ndarray

A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH

Source code in V3_1/src/super_gradients/training/metrics/pose_estimation_utils.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def compute_visible_bbox_xywh(joints: Tensor, visibility_mask: Tensor) -> np.ndarray:
    """
    Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

    :param joints:  [Num Instances, Num Joints, 2+] last channel must have dimension of
                    at least 2 that is considered to contain (X,Y) coordinates of the keypoint
    :param visibility_mask: [Num Instances, Num Joints]
    :return: A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH
    """
    visibility_mask = visibility_mask > 0
    initial_value = 1_000_000

    x1 = torch.min(joints[:, :, 0], where=visibility_mask, initial=initial_value, dim=-1)
    y1 = torch.min(joints[:, :, 1], where=visibility_mask, initial=initial_value, dim=-1)

    x1[x1 == initial_value] = 0
    y1[y1 == initial_value] = 0

    x2 = torch.max(joints[:, :, 0], where=visibility_mask, initial=0, dim=-1)
    y2 = torch.max(joints[:, :, 1], where=visibility_mask, initial=0, dim=-1)

    w = x2 - x1
    h = y2 - y1

    return torch.stack([x1, y1, w, h], dim=-1)

AbstractMetricsArgsPrepFn

Bases: ABC

Abstract preprocess metrics arguments class.

Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
125
126
127
128
129
130
131
132
133
134
135
class AbstractMetricsArgsPrepFn(ABC):
    """
    Abstract preprocess metrics arguments class.
    """

    @abstractmethod
    def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        All base classes must implement this function and return a tuple of torch tensors (predictions, target).
        """
        raise NotImplementedError()

__call__(preds, target) abstractmethod

All base classes must implement this function and return a tuple of torch tensors (predictions, target).

Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
130
131
132
133
134
135
@abstractmethod
def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    All base classes must implement this function and return a tuple of torch tensors (predictions, target).
    """
    raise NotImplementedError()

Dice

Bases: torchmetrics.JaccardIndex

Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
@register_metric(Metrics.DICE)
class Dice(torchmetrics.JaccardIndex):
    def __init__(
        self,
        num_classes: int,
        dist_sync_on_step: bool = False,
        ignore_index: Optional[int] = None,
        reduction: str = "elementwise_mean",
        threshold: float = 0.5,
        metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None,
    ):

        if num_classes <= 1:
            raise ValueError(f"Dice class only for multi-class usage! For binary usage, please call {BinaryDice.__name__}")

        super().__init__(num_classes=num_classes, dist_sync_on_step=dist_sync_on_step, ignore_index=ignore_index, reduction=reduction, threshold=threshold)
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)
        self.greater_is_better = True

    def update(self, preds, target: torch.Tensor):
        preds, target = self.metrics_args_prep_fn(preds, target)
        super().update(preds=preds, target=target)

    def compute(self) -> torch.Tensor:
        """Computes Dice coefficient"""
        return _dice_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)

compute()

Computes Dice coefficient

Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
238
239
240
def compute(self) -> torch.Tensor:
    """Computes Dice coefficient"""
    return _dice_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)

PreprocessSegmentationMetricsArgs

Bases: AbstractMetricsArgsPrepFn

Default segmentation inputs preprocess function before updating segmentation metrics, handles multiple inputs and apply normalizations.

Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
class PreprocessSegmentationMetricsArgs(AbstractMetricsArgsPrepFn):
    """
    Default segmentation inputs preprocess function before updating segmentation metrics, handles multiple inputs and
    apply normalizations.
    """

    def __init__(self, apply_arg_max: bool = False, apply_sigmoid: bool = False):
        """
        :param apply_arg_max: Whether to apply argmax on predictions tensor.
        :param apply_sigmoid:  Whether to apply sigmoid on predictions tensor.
        """
        self.apply_arg_max = apply_arg_max
        self.apply_sigmoid = apply_sigmoid

    def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        # WHEN DEALING WITH MULTIPLE OUTPUTS- OUTPUTS[0] IS THE MAIN SEGMENTATION MAP
        if isinstance(preds, (tuple, list)):
            preds = preds[0]
        if self.apply_arg_max:
            _, preds = torch.max(preds, 1)
        elif self.apply_sigmoid:
            preds = torch.sigmoid(preds)

        target = target.long()
        return preds, target

__init__(apply_arg_max=False, apply_sigmoid=False)

Parameters:

Name Type Description Default
apply_arg_max bool

Whether to apply argmax on predictions tensor.

False
apply_sigmoid bool

Whether to apply sigmoid on predictions tensor.

False
Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
144
145
146
147
148
149
150
def __init__(self, apply_arg_max: bool = False, apply_sigmoid: bool = False):
    """
    :param apply_arg_max: Whether to apply argmax on predictions tensor.
    :param apply_sigmoid:  Whether to apply sigmoid on predictions tensor.
    """
    self.apply_arg_max = apply_arg_max
    self.apply_sigmoid = apply_sigmoid

batch_intersection_union(predict, target, nclass)

Batch Intersection of Union

Parameters:

Name Type Description Default
predict torch.Tensor

input 4D tensor

required
target torch.Tensor

label 3D tensor

required
nclass int

number of categories (int)

required
Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def batch_intersection_union(predict: torch.Tensor, target: torch.Tensor, nclass: int) -> Tuple[float, float]:
    """Batch Intersection of Union

    :param predict: input 4D tensor
    :param target: label 3D tensor
    :param nclass: number of categories (int)
    """
    _, predict = torch.max(predict, 1)
    mini = 1
    maxi = nclass
    nbins = nclass
    predict = predict.cpu().numpy() + 1
    target = target.cpu().numpy() + 1

    predict = predict * (target > 0).astype(predict.dtype)
    intersection = predict * (predict == target)
    # areas of intersection and union
    area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
    area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
    area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
    area_union = area_pred + area_lab - area_inter
    assert (area_inter <= area_union).all(), "Intersection area should be smaller than Union area"
    return area_inter, area_union

batch_pix_accuracy(predict, target)

Batch Pixel Accuracy

Parameters:

Name Type Description Default
predict torch.Tensor

input 4D tensor

required
target torch.Tensor

label 3D tensor

required
Source code in V3_1/src/super_gradients/training/metrics/segmentation_metrics.py
14
15
16
17
18
19
20
21
22
23
24
25
26
def batch_pix_accuracy(predict: torch.Tensor, target: torch.Tensor) -> Tuple[float, float]:
    """Batch Pixel Accuracy

    :param predict: input 4D tensor
    :param target: label 3D tensor
    """
    _, predict = torch.max(predict, 1)
    predict = predict.cpu().numpy() + 1
    target = target.cpu().numpy() + 1
    pixel_labeled = np.sum(target > 0)
    pixel_correct = np.sum((predict == target) * (target > 0))
    assert pixel_correct <= pixel_labeled, "Correct area should be smaller than Labeled"
    return pixel_correct, pixel_labeled