Skip to content

Metrics

ToyTestClassificationMetric

Bases: Metric

Dummy classification Mettric object returning 0 always (for testing).

Source code in src/super_gradients/training/metrics/classification_metrics.py
81
82
83
84
85
86
87
88
89
90
91
92
93
class ToyTestClassificationMetric(Metric):
    """
    Dummy classification Mettric object returning 0 always (for testing).
    """

    def __init__(self, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)

    def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
        pass

    def compute(self):
        return 0

accuracy(output, target, topk=(1))

Computes the precision@k for the specified values of k

Parameters:

Name Type Description Default
output

Tensor / Numpy / List The prediction

required
target

Tensor / Numpy / List The corresponding lables

required
topk

tuple The type of accuracy to calculate, e.g. topk=(1,5) returns accuracy for top-1 and top-5

(1)
Source code in src/super_gradients/training/metrics/classification_metrics.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k
    :param output: Tensor / Numpy / List
        The prediction
    :param target: Tensor / Numpy / List
        The corresponding lables
    :param topk: tuple
        The type of accuracy to calculate, e.g. topk=(1,5) returns accuracy for top-1 and top-5"""
    # Convert to tensor
    output = convert_to_tensor(output)
    target = convert_to_tensor(target)

    # Get the maximal value of the accuracy measurment and the batch size
    maxk = max(topk)
    batch_size = target.size(0)

    # Get the top k predictions
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    # Count the number of correct predictions only for the highest k
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        # Count the number of correct prediction for the different K (the top predictions) values
        correct_k = correct[:k].reshape(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size).item())
    return res

Delta1

Bases: DepthEstimationMetricBase

Delta1 metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
164
165
166
167
168
169
170
171
172
173
174
@register_metric(Metrics.DELTA1)
class Delta1(DepthEstimationMetricBase):
    """
    Delta1 metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=DeltaMetric(delta=1.25), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

Delta2

Bases: DepthEstimationMetricBase

Delta2 metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
177
178
179
180
181
182
183
184
185
186
187
@register_metric(Metrics.DELTA2)
class Delta2(DepthEstimationMetricBase):
    """
    Delta2 metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=DeltaMetric(delta=1.25**2), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

Delta3

Bases: DepthEstimationMetricBase

Delta3 metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
190
191
192
193
194
195
196
197
198
199
200
@register_metric(Metrics.DELTA3)
class Delta3(DepthEstimationMetricBase):
    """
    Delta3 metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=DeltaMetric(delta=1.25**3), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

DeltaMetric

Bases: Metric

Delta metric - returns the percentage of pixels s.t max(preds / target, target / preds) < delta

Use inheritors for ignored values.

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
@register_metric(Metrics.DELTAMETRIC)
class DeltaMetric(Metric):
    """
    Delta metric - returns the percentage of pixels s.t max(preds / target, target / preds) < delta

    Use inheritors for ignored values.

    :param: delta (float): Threshold value for delta metric.

    """

    def __init__(self, delta: float):
        super().__init__()
        self.delta = delta
        self.add_state("total_delta_pixels", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state("total_pixels", default=torch.tensor(0.0), dist_reduce_fx="sum")

    def update(self, preds: Tensor, target: Tensor):
        self.total_pixels += target.numel()
        self.total_delta_pixels += self.compute_delta_pixels(preds, target)

    def compute_delta_pixels(self, preds: Tensor, target: Tensor) -> Tensor:
        """
        Compute delta metrics for depth estimation without support for ignored values.

        :param preds: Model predictions.
        :param target: Ground truth depth map.
        :return: Delta metric value.
        """
        ratio = torch.max(preds / target, target / preds)
        return torch.sum((ratio < self.delta).float())

    def compute(self):
        return self.total_delta_pixels / self.total_pixels

compute_delta_pixels(preds, target)

Compute delta metrics for depth estimation without support for ignored values.

Parameters:

Name Type Description Default
preds Tensor

Model predictions.

required
target Tensor

Ground truth depth map.

required

Returns:

Type Description
Tensor

Delta metric value.

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
149
150
151
152
153
154
155
156
157
158
def compute_delta_pixels(self, preds: Tensor, target: Tensor) -> Tensor:
    """
    Compute delta metrics for depth estimation without support for ignored values.

    :param preds: Model predictions.
    :param target: Ground truth depth map.
    :return: Delta metric value.
    """
    ratio = torch.max(preds / target, target / preds)
    return torch.sum((ratio < self.delta).float())

DepthEstimationMetricBase

Bases: Metric

Base class for depth estimation metrics, handling common processing steps.

Parameters:

Name Type Description Default
metric Metric

The specific torchmetrics metric instance.

required
ignore_val Optional[float]

Value to be ignored when computing metricsn. In depth estimation tasks, it is common to have regions in the depth map where the ground truth depth is not available or unreliable (e.g., marked as -1 or a specific value). In such cases, setting ignore_val allows you to exclude these regions from the metric computation. It is important that the dataset class providing the depth map fills the corresponding regions of the image with this ignore_val value to ensure consistency in metric calculations.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class DepthEstimationMetricBase(Metric):
    """
    Base class for depth estimation metrics, handling common processing steps.

    :param metric: The specific torchmetrics metric instance.
    :param ignore_val: Value to be ignored when computing metricsn. In depth estimation tasks, it is common
                      to have regions in the depth map where the ground truth depth is not available or unreliable (e.g.,
                      marked as -1 or a specific value). In such cases, setting `ignore_val` allows you to exclude these
                      regions from the metric computation. It is important that the dataset class providing the depth map
                      fills the corresponding regions of the image with this `ignore_val` value to ensure consistency in
                      metric calculations.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions.
    """

    def __init__(self, metric: Metric, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__()
        self.metric = metric
        self.ignore_val = ignore_val
        self.apply_sigmoid = apply_sigmoid

    def process_preds_and_target(self, preds: Union[Tensor, Sequence[Tensor]], target: Tensor) -> Tuple[Tensor, Tensor]:
        """
        Process predictions and target tensors for depth estimation metrics:
        - If a sequence is returned by the model -> sets preds to the first element
        - Squeezes the dummy dimension (i.e dim1) when preds.ndim == 4
        - Applies sigmoid to preds if apply_sigmoid is True
        - Removes entries to ignore where ignore_val is present in target

        :param preds: Model predictions, either a tensor or a sequence of tensors.
        :param target: Ground truth depth map.
        :return: Processed predictions and target tensors.
        """

        if isinstance(preds, Sequence):
            preds = preds[0]
        if self.apply_sigmoid:
            preds = torch.sigmoid(preds)
        if self.ignore_val is not None:
            non_ignored = preds != self.ignore_val
            preds = preds[non_ignored]
            target = target[non_ignored]
        return preds, target

    def update(self, preds: Tensor, target: Tensor):
        preds, target = self.process_preds_and_target(preds, target)
        self.metric.update(preds, target)

    def compute(self):
        return self.metric.compute()

    def reset(self) -> None:
        self.metric.reset()

process_preds_and_target(preds, target)

Process predictions and target tensors for depth estimation metrics: - If a sequence is returned by the model -> sets preds to the first element - Squeezes the dummy dimension (i.e dim1) when preds.ndim == 4 - Applies sigmoid to preds if apply_sigmoid is True - Removes entries to ignore where ignore_val is present in target

Parameters:

Name Type Description Default
preds Union[Tensor, Sequence[Tensor]]

Model predictions, either a tensor or a sequence of tensors.

required
target Tensor

Ground truth depth map.

required

Returns:

Type Description
Tuple[Tensor, Tensor]

Processed predictions and target tensors.

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def process_preds_and_target(self, preds: Union[Tensor, Sequence[Tensor]], target: Tensor) -> Tuple[Tensor, Tensor]:
    """
    Process predictions and target tensors for depth estimation metrics:
    - If a sequence is returned by the model -> sets preds to the first element
    - Squeezes the dummy dimension (i.e dim1) when preds.ndim == 4
    - Applies sigmoid to preds if apply_sigmoid is True
    - Removes entries to ignore where ignore_val is present in target

    :param preds: Model predictions, either a tensor or a sequence of tensors.
    :param target: Ground truth depth map.
    :return: Processed predictions and target tensors.
    """

    if isinstance(preds, Sequence):
        preds = preds[0]
    if self.apply_sigmoid:
        preds = torch.sigmoid(preds)
    if self.ignore_val is not None:
        non_ignored = preds != self.ignore_val
        preds = preds[non_ignored]
        target = target[non_ignored]
    return preds, target

DepthMAE

Bases: DepthEstimationMetricBase

Mean Absolute Error (MAE) metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
102
103
104
105
106
107
108
109
110
111
112
@register_metric(Metrics.DepthMAE)
class DepthMAE(DepthEstimationMetricBase):
    """
    Mean Absolute Error (MAE) metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanAbsoluteError(), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

DepthMAPE

Bases: DepthEstimationMetricBase

Mean Absolute Percentage Error (MAPE) metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
115
116
117
118
119
120
121
122
123
124
125
@register_metric(Metrics.DepthMAPE)
class DepthMAPE(DepthEstimationMetricBase):
    """
    Mean Absolute Percentage Error (MAPE) metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanAbsolutePercentageError(), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

DepthMSE

Bases: DepthEstimationMetricBase

Mean Squared Error metric (squared) for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
63
64
65
66
67
68
69
70
71
72
73
@register_metric(Metrics.DepthMSE)
class DepthMSE(DepthEstimationMetricBase):
    """
    Mean Squared Error metric (squared) for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanSquaredError(squared=True), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

DepthMSLE

Bases: DepthEstimationMetricBase

Mean Squared Logarithmic Error metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
89
90
91
92
93
94
95
96
97
98
99
@register_metric(Metrics.DepthMSLE)
class DepthMSLE(DepthEstimationMetricBase):
    """
    Mean Squared Logarithmic Error metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanSquaredLogError(), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

DepthRMSE

Bases: DepthEstimationMetricBase

Root Mean Squared Error metric for depth estimation with support for ignored values.

Parameters:

Name Type Description Default
ignore_val Optional[float]

Value to be ignored when computing the metric.

None
apply_sigmoid bool

Whether to apply the sigmoid function to predictions before updating the metric.

False
Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py
76
77
78
79
80
81
82
83
84
85
86
@register_metric(Metrics.DepthRMSE)
class DepthRMSE(DepthEstimationMetricBase):
    """
    Root Mean Squared Error metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanSquaredError(squared=False), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

DetectionMetrics

Bases: Metric

DetectionMetrics

Metric class for computing F1, Precision, Recall and Mean Average Precision.

Parameters:

Name Type Description Default
num_cls int

Number of classes.

required
post_prediction_callback DetectionPostPredictionCallback

DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).

required
normalize_targets bool

Whether to normalize bbox coordinates by image size.

False
iou_thres Union[IouThreshold, Tuple[float, float], float]

IoU threshold to compute the mAP. Could be either instance of IouThreshold, a tuple (lower bound, upper_bound) or single scalar.

IouThreshold.MAP_05_TO_095
recall_thres torch.Tensor

Recall threshold to compute the mAP.

None
score_thres float

Score threshold to compute Recall, Precision and F1.

0.1
top_k_predictions int

Number of predictions per class used to compute metrics, ordered by confidence score

100
dist_sync_on_step bool

Synchronize metric state across processes at each forward() before returning the value at the step.

False
accumulate_on_cpu bool

Run on CPU regardless of device used in other parts. This is to avoid "CUDA out of memory" that might happen on GPU.

True
include_classwise_ap bool

Whether to include the class-wise average precision in the returned metrics dictionary. If enabled, output metrics dictionary will look similar to this: { 'Precision0.5:0.95': 0.5, 'Recall0.5:0.95': 0.5, 'F10.5:0.95': 0.5, 'mAP0.5:0.95': 0.5, 'AP0.5:0.95_person': 0.5, 'AP0.5:0.95_car': 0.5, 'AP0.5:0.95_bicycle': 0.5, 'AP0.5:0.95_motorcycle': 0.5, ... } Class names are either provided via the class_names parameter or are generated automatically.

False
class_names List[str]

Array of class names. When include_classwise_ap=True, will use these names to make per-class APs keys in the output metrics dictionary. If None, will use dummy names class_{idx} instead.

None
state_dict_prefix str

A prefix to append to the state dict of the metric. A state dict used to synchronize metric in DDP mode. It was empirically found that if you have two metric classes A and B(A) that has same state key, for some reason torchmetrics attempts to sync their states all toghether which causes an error. In this case adding a prefix to the name of the synchronized state seems to help, but it is still unclear why it happens.

''
Source code in src/super_gradients/training/metrics/detection_metrics.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
@register_metric(Metrics.DETECTION_METRICS)
class DetectionMetrics(Metric):
    """
    DetectionMetrics

    Metric class for computing F1, Precision, Recall and Mean Average Precision.

    :param num_cls:                         Number of classes.
    :param post_prediction_callback:        DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).
    :param normalize_targets:               Whether to normalize bbox coordinates by image size.
    :param iou_thres:                       IoU threshold to compute the mAP.
                                            Could be either instance of IouThreshold, a tuple (lower bound, upper_bound) or single scalar.
    :param recall_thres:                    Recall threshold to compute the mAP.
    :param score_thres:                     Score threshold to compute Recall, Precision and F1.
    :param top_k_predictions:               Number of predictions per class used to compute metrics, ordered by confidence score
    :param dist_sync_on_step:               Synchronize metric state across processes at each ``forward()`` before returning the value at the step.
    :param accumulate_on_cpu:               Run on CPU regardless of device used in other parts.
                                            This is to avoid "CUDA out of memory" that might happen on GPU.
    :param calc_best_score_thresholds       Whether to calculate the best score threshold overall and per class
                                            If True, the compute() function will return a metrics dictionary that not
                                            only includes the average metrics calculated across all classes,
                                            but also the optimal score threshold overall and for each individual class.
    :param include_classwise_ap:            Whether to include the class-wise average precision in the returned metrics dictionary.
                                            If enabled, output metrics dictionary will look similar to this:
                                            {
                                                'Precision0.5:0.95': 0.5,
                                                'Recall0.5:0.95': 0.5,
                                                'F10.5:0.95': 0.5,
                                                'mAP0.5:0.95': 0.5,
                                                'AP0.5:0.95_person': 0.5,
                                                'AP0.5:0.95_car': 0.5,
                                                'AP0.5:0.95_bicycle': 0.5,
                                                'AP0.5:0.95_motorcycle': 0.5,
                                                ...
                                            }
                                            Class names are either provided via the class_names parameter or are generated automatically.
    :param class_names:                     Array of class names. When include_classwise_ap=True, will use these names to make
                                            per-class APs keys in the output metrics dictionary.
                                            If None, will use dummy names `class_{idx}` instead.
    :param state_dict_prefix:               A prefix to append to the state dict of the metric. A state dict used to synchronize metric in DDP mode.
                                            It was empirically found that if you have two metric classes A and B(A) that has same state key, for
                                            some reason torchmetrics attempts to sync their states all toghether which causes an error.
                                            In this case adding a prefix to the name of the synchronized state seems to help,
                                            but it is still unclear why it happens.


    """

    def __init__(
        self,
        num_cls: int,
        post_prediction_callback: DetectionPostPredictionCallback,
        normalize_targets: bool = False,
        iou_thres: Union[IouThreshold, Tuple[float, float], float] = IouThreshold.MAP_05_TO_095,
        recall_thres: torch.Tensor = None,
        score_thres: float = 0.1,
        top_k_predictions: int = 100,
        dist_sync_on_step: bool = False,
        accumulate_on_cpu: bool = True,
        calc_best_score_thresholds: bool = True,
        include_classwise_ap: bool = False,
        class_names: List[str] = None,
        state_dict_prefix: str = "",
    ):
        if class_names is None:
            if include_classwise_ap:
                logger.warning(
                    "Parameter 'include_classwise_ap' is set to True, but no class names are provided. "
                    "We will generate dummy class names, but we recommend to provide class names explicitly to"
                    "have meaningful names in reported metrics."
                )
            class_names = ["class_" + str(i) for i in range(num_cls)]
        else:
            class_names = list(class_names)

        if class_names is not None and len(class_names) != num_cls:
            raise ValueError(f"Number of class names ({len(class_names)}) does not match number of classes ({num_cls})")

        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_cls = num_cls
        self.iou_thres = iou_thres
        self.class_names = class_names

        if isinstance(iou_thres, IouThreshold):
            self.iou_thresholds = iou_thres.to_tensor()
        if isinstance(iou_thres, tuple):
            low, high = iou_thres
            self.iou_thresholds = IouThreshold.from_bounds(low, high)
        else:
            self.iou_thresholds = torch.tensor([iou_thres])

        self.map_str = "mAP" + self._get_range_str()
        self.include_classwise_ap = include_classwise_ap

        self.precision_metric_key = f"{state_dict_prefix}Precision{self._get_range_str()}"
        self.recall_metric_key = f"{state_dict_prefix}Recall{self._get_range_str()}"
        self.f1_metric_key = f"{state_dict_prefix}F1{self._get_range_str()}"
        self.map_metric_key = f"{state_dict_prefix}mAP{self._get_range_str()}"

        greater_component_is_better = [
            (self.precision_metric_key, True),
            (self.recall_metric_key, True),
            (self.map_metric_key, True),
            (self.f1_metric_key, True),
        ]

        if self.include_classwise_ap:
            self.per_class_ap_names = [f"{state_dict_prefix}AP{self._get_range_str()}_{class_name}" for class_name in class_names]
            greater_component_is_better += [(key, True) for key in self.per_class_ap_names]

        self.greater_component_is_better = collections.OrderedDict(greater_component_is_better)
        self.component_names = list(self.greater_component_is_better.keys())
        self.calc_best_score_thresholds = calc_best_score_thresholds
        self.best_threshold_per_class_names = [f"Best_score_threshold_{class_name}" for class_name in class_names]

        if self.calc_best_score_thresholds:
            self.component_names.append("Best_score_threshold")

        if self.calc_best_score_thresholds and self.include_classwise_ap:
            self.component_names += self.best_threshold_per_class_names

        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = super_gradients.is_distributed()
        self.denormalize_targets = not normalize_targets
        self.world_size = None
        self.rank = None
        self.state_key = f"{state_dict_prefix}matching_info{self._get_range_str()}"
        self.add_state(self.state_key, default=[], dist_reduce_fx=None)

        self.recall_thresholds = torch.linspace(0, 1, 101) if recall_thres is None else torch.tensor(recall_thres, dtype=torch.float32)
        self.score_threshold = score_thres
        self.top_k_predictions = top_k_predictions

        self.accumulate_on_cpu = accumulate_on_cpu

    def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
        """
        Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

        :param preds:           Raw output of the model, the format might change from one model to another,
                                but has to fit the input format of the post_prediction_callback (cx,cy,wh)
        :param target:          Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format:  (index, label, cx, cy, w, h)
        :param device:          Device to run on
        :param inputs:          Input image tensor of shape (batch_size, n_img, height, width)
        :param crowd_targets:   Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
        """
        self.iou_thresholds = self.iou_thresholds.to(device)
        _, _, height, width = inputs.shape
        iou_matcher = IoUMatching(self.iou_thresholds)

        targets = target.clone()
        crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

        preds = self.post_prediction_callback(preds, device=device)

        new_matching_info = compute_detection_matching(
            preds,
            targets,
            height,
            width,
            iou_thresholds=iou_matcher.get_thresholds(),
            matching_strategy=iou_matcher,
            crowd_targets=crowd_targets,
            top_k=self.top_k_predictions,
            denormalize_targets=self.denormalize_targets,
            device=self.device,
            return_on_cpu=self.accumulate_on_cpu,
        )

        accumulated_matching_info = getattr(self, self.state_key)
        setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        mean_ap, mean_precision, mean_recall, mean_f1, best_score_threshold = -1.0, -1.0, -1.0, -1.0, -1.0
        accumulated_matching_info = getattr(self, self.state_key)
        best_score_threshold_per_cls = np.zeros(self.num_cls)
        mean_ap_per_class = np.zeros(self.num_cls)

        if len(accumulated_matching_info):
            matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]

            # shape (n_class, nb_iou_thresh)
            (
                ap_per_present_classes,
                precision_per_present_classes,
                recall_per_present_classes,
                f1_per_present_classes,
                present_classes,
                best_score_threshold,
                best_score_thresholds_per_present_classes,
            ) = compute_detection_metrics(
                *matching_info_tensors,
                recall_thresholds=self.recall_thresholds,
                score_threshold=self.score_threshold,
                device="cpu" if self.accumulate_on_cpu else self.device,
            )

            # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
            # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
            mean_precision, mean_recall, mean_f1 = precision_per_present_classes.mean(), recall_per_present_classes.mean(), f1_per_present_classes.mean()

            # MaP is averaged over IoU thresholds and over classes
            mean_ap = ap_per_present_classes.mean()

            # Fill array of per-class AP scores with values for classes that were present in the dataset
            ap_per_class = ap_per_present_classes.mean(1)
            for i, class_index in enumerate(present_classes):
                mean_ap_per_class[class_index] = float(ap_per_class[i])
                best_score_threshold_per_cls[class_index] = float(best_score_thresholds_per_present_classes[i])

        output_dict = {
            self.precision_metric_key: float(mean_precision),
            self.recall_metric_key: float(mean_recall),
            self.map_metric_key: float(mean_ap),
            self.f1_metric_key: float(mean_f1),
        }

        if self.include_classwise_ap:
            for i, ap_i in enumerate(mean_ap_per_class):
                output_dict[self.per_class_ap_names[i]] = float(ap_i)

        if self.calc_best_score_thresholds:
            output_dict["Best_score_threshold"] = float(best_score_threshold)

        if self.include_classwise_ap and self.calc_best_score_thresholds:
            for threshold_per_class_names, threshold_value in zip(self.best_threshold_per_class_names, best_score_threshold_per_cls):
                output_dict[threshold_per_class_names] = float(threshold_value)

        return output_dict

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        :param dist_sync_fn:
        :return:
        """
        if self.world_size is None:
            self.world_size = super_gradients.common.environment.ddp_utils.get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = torch.distributed.get_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = {attr: getattr(self, attr) for attr in self._reductions.keys()}
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.barrier()
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            matching_info = []
            for state_dict in gathered_state_dicts:
                matching_info += state_dict[self.state_key]
            matching_info = tensor_container_to_device(matching_info, device="cpu" if self.accumulate_on_cpu else self.device)

            setattr(self, self.state_key, matching_info)

    def _get_range_str(self):
        return "@%.2f" % self.iou_thresholds[0] if not len(self.iou_thresholds) > 1 else "@%.2f:%.2f" % (self.iou_thresholds[0], self.iou_thresholds[-1])

compute()

Compute the metrics for all the accumulated results.

Returns:

Type Description
Dict[str, Union[float, torch.Tensor]]

Metrics of interest

Source code in src/super_gradients/training/metrics/detection_metrics.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
    """Compute the metrics for all the accumulated results.
    :return: Metrics of interest
    """
    mean_ap, mean_precision, mean_recall, mean_f1, best_score_threshold = -1.0, -1.0, -1.0, -1.0, -1.0
    accumulated_matching_info = getattr(self, self.state_key)
    best_score_threshold_per_cls = np.zeros(self.num_cls)
    mean_ap_per_class = np.zeros(self.num_cls)

    if len(accumulated_matching_info):
        matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]

        # shape (n_class, nb_iou_thresh)
        (
            ap_per_present_classes,
            precision_per_present_classes,
            recall_per_present_classes,
            f1_per_present_classes,
            present_classes,
            best_score_threshold,
            best_score_thresholds_per_present_classes,
        ) = compute_detection_metrics(
            *matching_info_tensors,
            recall_thresholds=self.recall_thresholds,
            score_threshold=self.score_threshold,
            device="cpu" if self.accumulate_on_cpu else self.device,
        )

        # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
        # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
        mean_precision, mean_recall, mean_f1 = precision_per_present_classes.mean(), recall_per_present_classes.mean(), f1_per_present_classes.mean()

        # MaP is averaged over IoU thresholds and over classes
        mean_ap = ap_per_present_classes.mean()

        # Fill array of per-class AP scores with values for classes that were present in the dataset
        ap_per_class = ap_per_present_classes.mean(1)
        for i, class_index in enumerate(present_classes):
            mean_ap_per_class[class_index] = float(ap_per_class[i])
            best_score_threshold_per_cls[class_index] = float(best_score_thresholds_per_present_classes[i])

    output_dict = {
        self.precision_metric_key: float(mean_precision),
        self.recall_metric_key: float(mean_recall),
        self.map_metric_key: float(mean_ap),
        self.f1_metric_key: float(mean_f1),
    }

    if self.include_classwise_ap:
        for i, ap_i in enumerate(mean_ap_per_class):
            output_dict[self.per_class_ap_names[i]] = float(ap_i)

    if self.calc_best_score_thresholds:
        output_dict["Best_score_threshold"] = float(best_score_threshold)

    if self.include_classwise_ap and self.calc_best_score_thresholds:
        for threshold_per_class_names, threshold_value in zip(self.best_threshold_per_class_names, best_score_threshold_per_cls):
            output_dict[threshold_per_class_names] = float(threshold_value)

    return output_dict

update(preds, target, device, inputs, crowd_targets=None)

Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

Parameters:

Name Type Description Default
preds

Raw output of the model, the format might change from one model to another, but has to fit the input format of the post_prediction_callback (cx,cy,wh)

required
target torch.Tensor

Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format: (index, label, cx, cy, w, h)

required
device str

Device to run on

required
inputs torch.tensor

Input image tensor of shape (batch_size, n_img, height, width)

required
crowd_targets Optional[torch.Tensor]

Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH

None
Source code in src/super_gradients/training/metrics/detection_metrics.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
    """
    Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

    :param preds:           Raw output of the model, the format might change from one model to another,
                            but has to fit the input format of the post_prediction_callback (cx,cy,wh)
    :param target:          Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format:  (index, label, cx, cy, w, h)
    :param device:          Device to run on
    :param inputs:          Input image tensor of shape (batch_size, n_img, height, width)
    :param crowd_targets:   Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
    """
    self.iou_thresholds = self.iou_thresholds.to(device)
    _, _, height, width = inputs.shape
    iou_matcher = IoUMatching(self.iou_thresholds)

    targets = target.clone()
    crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

    preds = self.post_prediction_callback(preds, device=device)

    new_matching_info = compute_detection_matching(
        preds,
        targets,
        height,
        width,
        iou_thresholds=iou_matcher.get_thresholds(),
        matching_strategy=iou_matcher,
        crowd_targets=crowd_targets,
        top_k=self.top_k_predictions,
        denormalize_targets=self.denormalize_targets,
        device=self.device,
        return_on_cpu=self.accumulate_on_cpu,
    )

    accumulated_matching_info = getattr(self, self.state_key)
    setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

DetectionMetricsDistanceBased

Bases: DetectionMetrics

Source code in src/super_gradients/training/metrics/detection_metrics.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
@register_metric(Metrics.DETECTION_METRICS_DISTANCE_BASED)
class DetectionMetricsDistanceBased(DetectionMetrics):
    def __init__(
        self,
        num_cls: int,
        post_prediction_callback: DetectionPostPredictionCallback,
        normalize_targets: bool = False,
        distance_thresholds: List[float] = [5.0],
        distance_metric: DistanceMetric = EuclideanDistance(),
        recall_thres: torch.Tensor = None,
        score_thres: float = 0.1,
        top_k_predictions: int = 100,
        dist_sync_on_step: bool = False,
        accumulate_on_cpu: bool = True,
        calc_best_score_thresholds: bool = True,
        include_classwise_ap: bool = False,
        class_names: List[str] = None,
    ):
        self.distance_thresholds = distance_thresholds
        self.distance_metric = distance_metric
        super().__init__(
            num_cls=num_cls,
            post_prediction_callback=post_prediction_callback,
            normalize_targets=normalize_targets,
            recall_thres=recall_thres,
            score_thres=score_thres,
            top_k_predictions=top_k_predictions,
            dist_sync_on_step=dist_sync_on_step,
            accumulate_on_cpu=accumulate_on_cpu,
            calc_best_score_thresholds=calc_best_score_thresholds,
            include_classwise_ap=include_classwise_ap,
            class_names=class_names,
            state_dict_prefix="distance_based_",
        )

    def update(self, preds: torch.Tensor, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
        """
        Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.
        Use distance-based definition of true positives.

        :param preds: torch.Tensor: Raw output of the model. The format might change from one model to another,
                                    but has to fit the input format of the post_prediction_callback (cx, cy, wh).
        :param target: torch.Tensor: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH.
                                      Format:  (index, label, cx, cy, w, h)
        :param device: str: Device to run on.
        :param inputs: torch.Tensor: Input image tensor of shape (batch_size, n_img, height, width).
        :param crowd_targets: Optional[torch.Tensor]: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH.
        """
        _, _, height, width = inputs.shape

        distance_matcher = DistanceMatching(self.distance_metric, self.distance_thresholds)

        targets = target.clone()
        crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

        preds = self.post_prediction_callback(preds, device=device)

        new_matching_info = compute_detection_matching(
            output=preds,
            targets=targets,
            height=height,
            width=width,
            crowd_targets=crowd_targets,
            top_k=self.top_k_predictions,
            denormalize_targets=self.denormalize_targets,
            device=self.device,
            return_on_cpu=self.accumulate_on_cpu,
            matching_strategy=distance_matcher,
        )

        accumulated_matching_info = getattr(self, self.state_key)
        setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

    def _get_range_str(self):
        return (
            "@DIST%.2f" % self.distance_thresholds[0]
            if not len(self.distance_thresholds) > 1
            else "@DIST%.2f:%.2f" % (self.distance_thresholds[0], self.distance_thresholds[-1])
        )

update(preds, target, device, inputs, crowd_targets=None)

Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly. Use distance-based definition of true positives.

Parameters:

Name Type Description Default
preds torch.Tensor

torch.Tensor: Raw output of the model. The format might change from one model to another, but has to fit the input format of the post_prediction_callback (cx, cy, wh).

required
target torch.Tensor

torch.Tensor: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. Format: (index, label, cx, cy, w, h)

required
device str

str: Device to run on.

required
inputs torch.tensor

torch.Tensor: Input image tensor of shape (batch_size, n_img, height, width).

required
crowd_targets Optional[torch.Tensor]

Optional[torch.Tensor]: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH.

None
Source code in src/super_gradients/training/metrics/detection_metrics.py
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def update(self, preds: torch.Tensor, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
    """
    Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.
    Use distance-based definition of true positives.

    :param preds: torch.Tensor: Raw output of the model. The format might change from one model to another,
                                but has to fit the input format of the post_prediction_callback (cx, cy, wh).
    :param target: torch.Tensor: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH.
                                  Format:  (index, label, cx, cy, w, h)
    :param device: str: Device to run on.
    :param inputs: torch.Tensor: Input image tensor of shape (batch_size, n_img, height, width).
    :param crowd_targets: Optional[torch.Tensor]: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH.
    """
    _, _, height, width = inputs.shape

    distance_matcher = DistanceMatching(self.distance_metric, self.distance_thresholds)

    targets = target.clone()
    crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

    preds = self.post_prediction_callback(preds, device=device)

    new_matching_info = compute_detection_matching(
        output=preds,
        targets=targets,
        height=height,
        width=width,
        crowd_targets=crowd_targets,
        top_k=self.top_k_predictions,
        denormalize_targets=self.denormalize_targets,
        device=self.device,
        return_on_cpu=self.accumulate_on_cpu,
        matching_strategy=distance_matcher,
    )

    accumulated_matching_info = getattr(self, self.state_key)
    setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

flatten_metrics_dict(metrics_dict)

Returns:

Type Description

flattened dict of metric values i.e {metric1_name: metric1_value...}

Source code in src/super_gradients/training/metrics/metric_utils.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def flatten_metrics_dict(metrics_dict: dict):
    """
    :param metrics_dict - dictionary of metric values where values can also be dictionaries containing subvalues
    (in the case of compound metrics)

    :return: flattened dict of metric values i.e {metric1_name: metric1_value...}
    """
    flattened = {}
    for metric_name, metric_val in metrics_dict.items():
        if metric_name == "additional_items":
            continue
        # COLLECT ALL OF THE COMPONENTS IN THE CASE OF COMPOUND METRICS
        elif isinstance(metric_val, dict):
            for sub_metric_name, sub_metric_val in metric_val.items():
                flattened[sub_metric_name] = sub_metric_val
        else:
            flattened[metric_name] = metric_val

    return flattened

get_logging_values(loss_loggings, metrics, criterion=None)

Parameters:

Name Type Description Default
loss_loggings AverageMeter

AverageMeter running average for the loss items

required
metrics MetricCollection

MetricCollection object for running user specified metrics

required

Returns:

Type Description

tuple of the computed values

Source code in src/super_gradients/training/metrics/metric_utils.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def get_logging_values(loss_loggings: AverageMeter, metrics: MetricCollection, criterion=None):
    """
    :param loss_loggings: AverageMeter running average for the loss items
    :param metrics: MetricCollection object for running user specified metrics
    :param criterion the object loss_loggings average meter is monitoring, when set to None- only the metrics values are
    computed and returned.

    :return: tuple of the computed values
    """
    if criterion is not None:
        loss_loggingg_avg = loss_loggings.average
        if not isinstance(loss_loggingg_avg, tuple):
            loss_loggingg_avg = tuple([loss_loggingg_avg])
        logging_vals = loss_loggingg_avg + get_metrics_results_tuple(metrics)
    else:
        logging_vals = get_metrics_results_tuple(metrics)

    return logging_vals

get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names)

Returns a dictionary with the epoch results as values and their names as keys.

Parameters:

Name Type Description Default
metrics_tuple

the result tuple

required
metrics_collection

MetricsCollection

required
loss_logging_item_names

loss component's names.

required

Returns:

Type Description

dict

Source code in src/super_gradients/training/metrics/metric_utils.py
79
80
81
82
83
84
85
86
87
88
89
def get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names):
    """
    Returns a dictionary with the epoch results as values and their names as keys.
    :param metrics_tuple: the result tuple
    :param metrics_collection: MetricsCollection
    :param loss_logging_item_names: loss component's names.
    :return: dict
    """
    keys = loss_logging_item_names + get_metrics_titles(metrics_collection)
    metrics_dict = dict(zip(keys, list(metrics_tuple)))
    return metrics_dict

get_metrics_results_tuple(metrics_collection)

Parameters:

Name Type Description Default
metrics_collection MetricCollection

metrics collection of the user specified metrics @type metrics_collection

required

Returns:

Type Description

tuple of metrics values

Source code in src/super_gradients/training/metrics/metric_utils.py
44
45
46
47
48
49
50
51
52
53
54
55
def get_metrics_results_tuple(metrics_collection: MetricCollection):
    """

    :param metrics_collection: metrics collection of the user specified metrics
    @type metrics_collection
    :return: tuple of metrics values
    """
    if metrics_collection is None:
        results_tuple = ()
    else:
        results_tuple = tuple(flatten_metrics_dict(metrics_collection.compute()).values())
    return results_tuple

get_metrics_titles(metrics_collection)

Parameters:

Name Type Description Default
metrics_collection MetricCollection

MetricCollection object for running user specified metrics

required

Returns:

Type Description

list of all the names of the computed values list(str)

Source code in src/super_gradients/training/metrics/metric_utils.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def get_metrics_titles(metrics_collection: MetricCollection):
    """

    :param metrics_collection: MetricCollection object for running user specified metrics
    :return: list of all the names of the computed values list(str)
    """
    titles = []
    for metric_name, metric in metrics_collection.items():
        if metric_name == "additional_items":
            continue
        elif hasattr(metric, "component_names"):
            titles += metric.component_names
        else:
            titles.append(metric_name)

    return titles

get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items)

Returns a dictionary with the epoch's logging items as values and their names as keys, with the purpose of passing it as a description to tqdm's progress bar.

Parameters:

Name Type Description Default
metrics_tuple

the result tuple

required
metrics_collection

MetricsCollection

required
loss_logging_item_names

loss component's names.

required

Returns:

Type Description

dict

Source code in src/super_gradients/training/metrics/metric_utils.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items):
    """
    Returns a dictionary with the epoch's logging items as values and their names as keys, with the purpose of
     passing it as a description to tqdm's progress bar.

    :param metrics_tuple: the result tuple
    :param metrics_collection: MetricsCollection
    :param loss_logging_item_names: loss component's names.
    :param log_items additional logging items to be rendered.
    :return: dict
    """
    log_items.update(get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names))
    for key, value in log_items.items():
        if isinstance(value, torch.Tensor):
            log_items[key] = value.detach().item()

    return log_items

PoseEstimationMetrics

Bases: Metric

Implementation of COCO Keypoint evaluation metric. When instantiated with default parameters, it will default to COCO params. By default, only AR and AP metrics are computed:

from super_gradients.training.metrics import PoseEstimationMetrics metric = PoseEstimationMetrics(...) metric.update(...) metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

If you wish to get AR/AR at specific thresholds, you can specify them using iou_thresholds_to_report argument:

from super_gradients.training.metrics import PoseEstimationMetrics metric = PoseEstimationMetrics(iou_thresholds_to_report=[0.5, 0.75], ...) metric.update(...) metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
@register_metric(Metrics.POSE_ESTIMATION_METRICS)
class PoseEstimationMetrics(Metric):
    """
    Implementation of COCO Keypoint evaluation metric.
    When instantiated with default parameters, it will default to COCO params.
    By default, only AR and AP metrics are computed:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

    If you wish to get AR/AR at specific thresholds, you can specify them using `iou_thresholds_to_report` argument:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(iou_thresholds_to_report=[0.5, 0.75], ...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

    """

    def __init__(
        self,
        post_prediction_callback: AbstractPoseEstimationPostPredictionCallback,
        num_joints: int,
        max_objects_per_image: int = 20,
        oks_sigmas: Optional[Iterable] = None,
        iou_thresholds: Optional[Iterable] = None,
        recall_thresholds: Optional[Iterable] = None,
        iou_thresholds_to_report: Optional[Iterable] = None,
    ):
        """
        Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
        If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

        :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                          and returns a tuple of (poses, scores)

        :param num_joints:                Number of joints per pose

        :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

        :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                          If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

        :param recall_thresholds:         List of recall thresholds to compute AP.
                                          If None, then will use default 101 recall thresholds from COCO in range [0..1]

        :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

        :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                          may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

        """
        super().__init__(dist_sync_on_step=False)
        self.num_joints = num_joints
        self.max_objects_per_image = max_objects_per_image
        self.stats_names = ["AP", "AR"]

        if recall_thresholds is None:
            recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
        self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

        if iou_thresholds is None:
            iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
        self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

        if iou_thresholds_to_report is not None:
            self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

            if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
                missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
                raise RuntimeError(
                    f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
                )

            self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
            self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
        else:
            self.iou_thresholds_to_report = None

        self.greater_component_is_better = dict((k, True) for k in self.stats_names)

        if oks_sigmas is None:
            if num_joints == 17:
                oks_sigmas = np.array([0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089])
            else:
                oks_sigmas = np.array([0.1] * num_joints)
                logger.warning(
                    f"Using default OKS sigmas of `0.1` for a custom dataset with {num_joints} joints. "
                    f"To silence this warning, you may want to specify OKS sigmas explicitly as it has direct impact on the AP score."
                )

        if len(oks_sigmas) != num_joints:
            raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

        self.oks_sigmas = torch.tensor(oks_sigmas).float()

        self.component_names = list(self.greater_component_is_better.keys())
        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = is_distributed()
        self.world_size = None
        self.rank = None
        self.add_state("predictions", default=[], dist_reduce_fx=None)

    def reset(self) -> None:
        self.predictions.clear()

    @torch.no_grad()
    def update(
        self,
        preds: Any,
        target: Any,
        gt_joints: List[np.ndarray] = None,
        gt_iscrowd: List[np.ndarray] = None,
        gt_bboxes: List[np.ndarray] = None,
        gt_areas: List[np.ndarray] = None,
        gt_samples: List[PoseEstimationSample] = None,
    ):
        """
        Decode the predictions and update the metric.

        The signature of this method is a bit complicated, because we want to support both old-style form of
        passing groundtruth information (gt_joints, gt_iscrowd, gt_bboxes, gt_areas) and a new style of passing
        groundtruth information as a list of PoseEstimationSample objects.

        Passing PoseEstimationSample objects is more convenient and default way to go with sample-centric datasets introduced in SuperGradients 3.3.
        Two options are mutually exclusive, so if you pass groundtruth_samples, all other arguments are ignored and vice versa.

        :param preds :      Raw output of the model
        :param target:      Targets for the model training (Not used for evaluation)
        :param gt_joints:   List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                            Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                            This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                            where predictions rescaled back to original size of the image.
                            However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                            system of the predicted image.

        :param gt_iscrowd:  Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                            If not provided, all instances are considered as non-crowd targets.
                            For instance, in CrowdPose all instances are considered as "non-crowd".

        :param gt_bboxes:   Bounding boxes of the groundtruth instances (XYWH).
                            This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                            If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

        :param gt_areas:    Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                            so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                            If not provided, the area is computed as the product of the width and height of the bounding box.
                            (For instance this is used in CrowdPose dataset)
        :param gt_samples:  List of ground-truth samples

        """
        predictions: List[PoseEstimationPredictions] = self.post_prediction_callback(preds)  # Decode raw predictions into poses

        if gt_samples is not None:
            self._update_with_samples(predictions, gt_samples)
        else:
            self._update_with_old_style_args(predictions, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)

    def _update_with_samples(self, predictions: List[PoseEstimationPredictions], gt_samples: List[PoseEstimationSample]) -> None:
        """
        Update internal state of metric class with a batch of predictions and groundtruth samples.

        :param predictions: Decoded list of pose predictions
        :param gt_samples:  Corresponding list of groundtruth samples
        """
        for i in range(len(predictions)):
            self.update_single_image(
                predicted_poses=predictions[i].poses,
                predicted_scores=predictions[i].scores,
                gt_joints=gt_samples[i].joints,
                gt_bboxes=gt_samples[i].bboxes_xywh,
                gt_areas=gt_samples[i].areas,
                gt_iscrowd=gt_samples[i].is_crowd,
            )

    def _update_with_old_style_args(
        self,
        predictions: List[PoseEstimationPredictions],
        gt_joints: List[np.ndarray],
        gt_bboxes: Optional[List[np.ndarray]],
        gt_areas: Optional[List[np.ndarray]],
        gt_iscrowd: Optional[List[np.ndarray]],
    ) -> None:
        """
        This method is here for backward compatibility with old-style datasets that do not use PoseEstimationSample objects.
        The now deprecated way of passing groundtruth information was through a dictionary with 'gt_joints', 'gt_bboxes', 'gt_areas', 'gt_iscrowd' keys
        which is not convenient and error-prone.

        It is still supported, but we recommend to use PoseEstimationSample objects instead.
        :param predictions: Decoded pose predictions
        :param gt_joints: List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
        :param gt_bboxes: List of ground-truth bounding boxes for each image in the batch.
                          Each element of list is a numpy array of shape (num_instances, 4) and boxes are in XYWH format.
                          Can be None, in which case bounding boxes are computed as minimum bounding box that contains all visible keypoints.
        :param gt_areas:  List of ground-truth areas for each image in the batch.
                          Can be None, in which case areas are computed as the product of the width and height of the bounding box.
        :param gt_iscrowd: List of single-dimensional numpy arrays of shape (num_instances,) indicating which instance is
                           annotated with `iscrowd` flog. Objects with `iscrowd` flag are not used for evaluation.
        """
        for i in range(len(predictions)):
            self.update_single_image(
                predicted_poses=predictions[i].poses,
                predicted_scores=predictions[i].scores,
                gt_joints=gt_joints[i],
                gt_bboxes=gt_bboxes[i] if gt_bboxes is not None else None,
                gt_areas=gt_areas[i] if gt_areas is not None else None,
                gt_iscrowd=gt_iscrowd[i] if gt_iscrowd is not None else None,
            )

    def update_single_image(
        self,
        predicted_poses: Union[Tensor, np.ndarray],
        predicted_scores: Union[Tensor, np.ndarray],
        gt_joints: np.ndarray,
        gt_bboxes: Optional[np.ndarray],
        gt_areas: Optional[np.ndarray],
        gt_iscrowd: Optional[np.ndarray],
    ) -> None:
        """
        Update internal state of metric class with a single image predictions & corresponding groundtruth.
        Method compute OKS for predicted poses, match them to groundtruth poses and update internal state of the metric.
        :param predicted_poses:  Predicted poses of shape (num_instances, num_joints, 3)
        :param predicted_scores: Predicted scores of shape (num_instances,)
        :param gt_joints:        Groundtruth joints of shape (num_instances, num_joints, 3)
        :param gt_bboxes:        Groundtruth bounding boxes of shape (num_instances, 4) in XYWH format
        :param gt_areas:         Groundtruth areas of shape (num_instances,)
        :param gt_iscrowd:       Groundtruth is_crowd flag of shape (num_instances,)
        """
        if len(predicted_poses) == 0 and len(gt_joints) == 0:
            return
        if len(predicted_poses) != len(predicted_scores):
            raise ValueError("Length of predicted poses and scores should be equal. Got {} and {}".format(len(predicted_poses), len(predicted_scores)))

        predicted_poses = convert_to_tensor(predicted_poses, dtype=torch.float32, device="cpu")
        predicted_scores = convert_to_tensor(predicted_scores, dtype=torch.float32, device="cpu")

        if gt_bboxes is None:
            gt_bboxes = compute_visible_bbox_xywh(torch.tensor(gt_joints[:, :, 0:2]), torch.tensor(gt_joints[:, :, 2]))

        if gt_areas is None:
            gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

        if gt_iscrowd is None:
            gt_iscrowd = [False] * len(gt_joints)

        gt_keypoints = convert_to_tensor(gt_joints, dtype=torch.float32, device="cpu")
        gt_areas = convert_to_tensor(gt_areas, dtype=torch.float32, device="cpu")
        gt_bboxes = convert_to_tensor(gt_bboxes, dtype=torch.float32, device="cpu")
        gt_iscrowd = convert_to_tensor(gt_iscrowd, dtype=torch.bool, device="cpu")

        gt_keypoints_xy = gt_keypoints[:, :, 0:2]
        gt_keypoints_visibility = gt_keypoints[:, :, 2]
        gt_all_kpts_invisible = gt_keypoints_visibility.eq(0).all(dim=1)
        gt_is_ignore = gt_all_kpts_invisible | gt_iscrowd

        targets = gt_keypoints_xy[~gt_is_ignore] if len(gt_joints) else []
        targets_visibilities = gt_keypoints_visibility[~gt_is_ignore] if len(gt_joints) else []
        targets_areas = gt_areas[~gt_is_ignore] if len(gt_joints) else []
        targets_bboxes = gt_bboxes[~gt_is_ignore]
        targets_ignored = gt_is_ignore[~gt_is_ignore]

        crowd_targets = gt_keypoints_xy[gt_is_ignore] if len(gt_joints) else []
        crowd_visibilities = gt_keypoints_visibility[gt_is_ignore] if len(gt_joints) else []
        crowd_targets_areas = gt_areas[gt_is_ignore]
        crowd_targets_bboxes = gt_bboxes[gt_is_ignore]

        mr = compute_img_keypoint_matching(
            predicted_poses,
            predicted_scores,
            #
            targets=targets,
            targets_visibilities=targets_visibilities,
            targets_areas=targets_areas,
            targets_bboxes=targets_bboxes,
            targets_ignored=targets_ignored,
            #
            crowd_targets=crowd_targets,
            crowd_visibilities=crowd_visibilities,
            crowd_targets_areas=crowd_targets_areas,
            crowd_targets_bboxes=crowd_targets_bboxes,
            #
            iou_thresholds=self.iou_thresholds.to("cpu"),
            sigmas=self.oks_sigmas.to("cpu"),
            top_k=self.max_objects_per_image,
        )

        self.predictions.append((mr.preds_matched.cpu(), mr.preds_to_ignore.cpu(), mr.preds_scores.cpu(), int(mr.num_targets)))

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        :param dist_sync_fn:
        :return:
        """
        if self.world_size is None:
            self.world_size = get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = get_local_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = self.predictions
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            self.predictions = list(itertools.chain(*gathered_state_dicts))

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        T = len(self.iou_thresholds)
        K = 1  # num categories

        precision = -np.ones((T, K))
        recall = -np.ones((T, K))

        predictions = self.predictions  # All gathered by this time
        if len(predictions) > 0:
            preds_matched = torch.cat([x[0].cpu() for x in predictions], dim=0)
            preds_to_ignore = torch.cat([x[1].cpu() for x in predictions], dim=0)
            preds_scores = torch.cat([x[2].cpu() for x in predictions], dim=0)
            n_targets = sum([x[3] for x in predictions])

            cls_precision, _, cls_recall, _, _ = compute_detection_metrics_per_cls(
                preds_matched=preds_matched,
                preds_to_ignore=preds_to_ignore,
                preds_scores=preds_scores,
                n_targets=n_targets,
                recall_thresholds=self.recall_thresholds.cpu(),
                score_threshold=0,
                device="cpu",
            )

            precision[:, 0] = cls_precision.cpu().numpy()
            recall[:, 0] = cls_recall.cpu().numpy()

        def summarize(s):
            if len(s[s > -1]) == 0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s > -1])

            return mean_s

        metrics = {"AP": summarize(precision), "AR": summarize(recall)}

        if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
            for t in self.iou_thresholds_to_report:
                mask = np.where(t == self.iou_thresholds)[0]
                metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
                metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

        return metrics

__init__(post_prediction_callback, num_joints, max_objects_per_image=20, oks_sigmas=None, iou_thresholds=None, recall_thresholds=None, iou_thresholds_to_report=None)

Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values. If you need to get additional metrics (AP at specific threshold), pass these thresholds via iou_thresholds_to_report argument.

Parameters:

Name Type Description Default
post_prediction_callback AbstractPoseEstimationPostPredictionCallback

A callback to decode model predictions to poses. This should be callable that takes input (model predictions) and returns a tuple of (poses, scores)

required
num_joints int

Number of joints per pose

required
max_objects_per_image int

Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

20
oks_sigmas Optional[Iterable]

OKS sigma factor for custom keypoint detection dataset. If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

None
recall_thresholds Optional[Iterable]

List of recall thresholds to compute AP. If None, then will use default 101 recall thresholds from COCO in range [0..1]

None
iou_thresholds Optional[Iterable]

List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

None
Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def __init__(
    self,
    post_prediction_callback: AbstractPoseEstimationPostPredictionCallback,
    num_joints: int,
    max_objects_per_image: int = 20,
    oks_sigmas: Optional[Iterable] = None,
    iou_thresholds: Optional[Iterable] = None,
    recall_thresholds: Optional[Iterable] = None,
    iou_thresholds_to_report: Optional[Iterable] = None,
):
    """
    Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
    If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

    :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                      and returns a tuple of (poses, scores)

    :param num_joints:                Number of joints per pose

    :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

    :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                      If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

    :param recall_thresholds:         List of recall thresholds to compute AP.
                                      If None, then will use default 101 recall thresholds from COCO in range [0..1]

    :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

    :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                      may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

    """
    super().__init__(dist_sync_on_step=False)
    self.num_joints = num_joints
    self.max_objects_per_image = max_objects_per_image
    self.stats_names = ["AP", "AR"]

    if recall_thresholds is None:
        recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
    self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

    if iou_thresholds is None:
        iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
    self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

    if iou_thresholds_to_report is not None:
        self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

        if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
            missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
            raise RuntimeError(
                f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
            )

        self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
        self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
    else:
        self.iou_thresholds_to_report = None

    self.greater_component_is_better = dict((k, True) for k in self.stats_names)

    if oks_sigmas is None:
        if num_joints == 17:
            oks_sigmas = np.array([0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089])
        else:
            oks_sigmas = np.array([0.1] * num_joints)
            logger.warning(
                f"Using default OKS sigmas of `0.1` for a custom dataset with {num_joints} joints. "
                f"To silence this warning, you may want to specify OKS sigmas explicitly as it has direct impact on the AP score."
            )

    if len(oks_sigmas) != num_joints:
        raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

    self.oks_sigmas = torch.tensor(oks_sigmas).float()

    self.component_names = list(self.greater_component_is_better.keys())
    self.components = len(self.component_names)

    self.post_prediction_callback = post_prediction_callback
    self.is_distributed = is_distributed()
    self.world_size = None
    self.rank = None
    self.add_state("predictions", default=[], dist_reduce_fx=None)

compute()

Compute the metrics for all the accumulated results.

Returns:

Type Description
Dict[str, Union[float, torch.Tensor]]

Metrics of interest

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
    """Compute the metrics for all the accumulated results.
    :return: Metrics of interest
    """
    T = len(self.iou_thresholds)
    K = 1  # num categories

    precision = -np.ones((T, K))
    recall = -np.ones((T, K))

    predictions = self.predictions  # All gathered by this time
    if len(predictions) > 0:
        preds_matched = torch.cat([x[0].cpu() for x in predictions], dim=0)
        preds_to_ignore = torch.cat([x[1].cpu() for x in predictions], dim=0)
        preds_scores = torch.cat([x[2].cpu() for x in predictions], dim=0)
        n_targets = sum([x[3] for x in predictions])

        cls_precision, _, cls_recall, _, _ = compute_detection_metrics_per_cls(
            preds_matched=preds_matched,
            preds_to_ignore=preds_to_ignore,
            preds_scores=preds_scores,
            n_targets=n_targets,
            recall_thresholds=self.recall_thresholds.cpu(),
            score_threshold=0,
            device="cpu",
        )

        precision[:, 0] = cls_precision.cpu().numpy()
        recall[:, 0] = cls_recall.cpu().numpy()

    def summarize(s):
        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        return mean_s

    metrics = {"AP": summarize(precision), "AR": summarize(recall)}

    if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
        for t in self.iou_thresholds_to_report:
            mask = np.where(t == self.iou_thresholds)[0]
            metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
            metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

    return metrics

update(preds, target, gt_joints=None, gt_iscrowd=None, gt_bboxes=None, gt_areas=None, gt_samples=None)

Decode the predictions and update the metric.

The signature of this method is a bit complicated, because we want to support both old-style form of passing groundtruth information (gt_joints, gt_iscrowd, gt_bboxes, gt_areas) and a new style of passing groundtruth information as a list of PoseEstimationSample objects.

Passing PoseEstimationSample objects is more convenient and default way to go with sample-centric datasets introduced in SuperGradients 3.3. Two options are mutually exclusive, so if you pass groundtruth_samples, all other arguments are ignored and vice versa.

Parameters:

Name Type Description Default
preds

Raw output of the model

required
target Any

Targets for the model training (Not used for evaluation)

required
gt_joints List[np.ndarray]

List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3). Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints. This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation, where predictions rescaled back to original size of the image. However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate system of the predicted image.

None
gt_iscrowd List[np.ndarray]

Optional argument indicating which instance is annotated with iscrowd flog and is not used for evaluation; If not provided, all instances are considered as non-crowd targets. For instance, in CrowdPose all instances are considered as "non-crowd".

None
gt_bboxes List[np.ndarray]

Bounding boxes of the groundtruth instances (XYWH). This is COCO-specific and is used in OKS computation for instances w/o visible keypoints. If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

None
gt_areas List[np.ndarray]

Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box, so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation. If not provided, the area is computed as the product of the width and height of the bounding box. (For instance this is used in CrowdPose dataset)

None
gt_samples List[PoseEstimationSample]

List of ground-truth samples

None
Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
@torch.no_grad()
def update(
    self,
    preds: Any,
    target: Any,
    gt_joints: List[np.ndarray] = None,
    gt_iscrowd: List[np.ndarray] = None,
    gt_bboxes: List[np.ndarray] = None,
    gt_areas: List[np.ndarray] = None,
    gt_samples: List[PoseEstimationSample] = None,
):
    """
    Decode the predictions and update the metric.

    The signature of this method is a bit complicated, because we want to support both old-style form of
    passing groundtruth information (gt_joints, gt_iscrowd, gt_bboxes, gt_areas) and a new style of passing
    groundtruth information as a list of PoseEstimationSample objects.

    Passing PoseEstimationSample objects is more convenient and default way to go with sample-centric datasets introduced in SuperGradients 3.3.
    Two options are mutually exclusive, so if you pass groundtruth_samples, all other arguments are ignored and vice versa.

    :param preds :      Raw output of the model
    :param target:      Targets for the model training (Not used for evaluation)
    :param gt_joints:   List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                        Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                        This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                        where predictions rescaled back to original size of the image.
                        However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                        system of the predicted image.

    :param gt_iscrowd:  Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                        If not provided, all instances are considered as non-crowd targets.
                        For instance, in CrowdPose all instances are considered as "non-crowd".

    :param gt_bboxes:   Bounding boxes of the groundtruth instances (XYWH).
                        This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                        If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

    :param gt_areas:    Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                        so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                        If not provided, the area is computed as the product of the width and height of the bounding box.
                        (For instance this is used in CrowdPose dataset)
    :param gt_samples:  List of ground-truth samples

    """
    predictions: List[PoseEstimationPredictions] = self.post_prediction_callback(preds)  # Decode raw predictions into poses

    if gt_samples is not None:
        self._update_with_samples(predictions, gt_samples)
    else:
        self._update_with_old_style_args(predictions, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)

update_single_image(predicted_poses, predicted_scores, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)

Update internal state of metric class with a single image predictions & corresponding groundtruth. Method compute OKS for predicted poses, match them to groundtruth poses and update internal state of the metric.

Parameters:

Name Type Description Default
predicted_poses Union[Tensor, np.ndarray]

Predicted poses of shape (num_instances, num_joints, 3)

required
predicted_scores Union[Tensor, np.ndarray]

Predicted scores of shape (num_instances,)

required
gt_joints np.ndarray

Groundtruth joints of shape (num_instances, num_joints, 3)

required
gt_bboxes Optional[np.ndarray]

Groundtruth bounding boxes of shape (num_instances, 4) in XYWH format

required
gt_areas Optional[np.ndarray]

Groundtruth areas of shape (num_instances,)

required
gt_iscrowd Optional[np.ndarray]

Groundtruth is_crowd flag of shape (num_instances,)

required
Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
def update_single_image(
    self,
    predicted_poses: Union[Tensor, np.ndarray],
    predicted_scores: Union[Tensor, np.ndarray],
    gt_joints: np.ndarray,
    gt_bboxes: Optional[np.ndarray],
    gt_areas: Optional[np.ndarray],
    gt_iscrowd: Optional[np.ndarray],
) -> None:
    """
    Update internal state of metric class with a single image predictions & corresponding groundtruth.
    Method compute OKS for predicted poses, match them to groundtruth poses and update internal state of the metric.
    :param predicted_poses:  Predicted poses of shape (num_instances, num_joints, 3)
    :param predicted_scores: Predicted scores of shape (num_instances,)
    :param gt_joints:        Groundtruth joints of shape (num_instances, num_joints, 3)
    :param gt_bboxes:        Groundtruth bounding boxes of shape (num_instances, 4) in XYWH format
    :param gt_areas:         Groundtruth areas of shape (num_instances,)
    :param gt_iscrowd:       Groundtruth is_crowd flag of shape (num_instances,)
    """
    if len(predicted_poses) == 0 and len(gt_joints) == 0:
        return
    if len(predicted_poses) != len(predicted_scores):
        raise ValueError("Length of predicted poses and scores should be equal. Got {} and {}".format(len(predicted_poses), len(predicted_scores)))

    predicted_poses = convert_to_tensor(predicted_poses, dtype=torch.float32, device="cpu")
    predicted_scores = convert_to_tensor(predicted_scores, dtype=torch.float32, device="cpu")

    if gt_bboxes is None:
        gt_bboxes = compute_visible_bbox_xywh(torch.tensor(gt_joints[:, :, 0:2]), torch.tensor(gt_joints[:, :, 2]))

    if gt_areas is None:
        gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

    if gt_iscrowd is None:
        gt_iscrowd = [False] * len(gt_joints)

    gt_keypoints = convert_to_tensor(gt_joints, dtype=torch.float32, device="cpu")
    gt_areas = convert_to_tensor(gt_areas, dtype=torch.float32, device="cpu")
    gt_bboxes = convert_to_tensor(gt_bboxes, dtype=torch.float32, device="cpu")
    gt_iscrowd = convert_to_tensor(gt_iscrowd, dtype=torch.bool, device="cpu")

    gt_keypoints_xy = gt_keypoints[:, :, 0:2]
    gt_keypoints_visibility = gt_keypoints[:, :, 2]
    gt_all_kpts_invisible = gt_keypoints_visibility.eq(0).all(dim=1)
    gt_is_ignore = gt_all_kpts_invisible | gt_iscrowd

    targets = gt_keypoints_xy[~gt_is_ignore] if len(gt_joints) else []
    targets_visibilities = gt_keypoints_visibility[~gt_is_ignore] if len(gt_joints) else []
    targets_areas = gt_areas[~gt_is_ignore] if len(gt_joints) else []
    targets_bboxes = gt_bboxes[~gt_is_ignore]
    targets_ignored = gt_is_ignore[~gt_is_ignore]

    crowd_targets = gt_keypoints_xy[gt_is_ignore] if len(gt_joints) else []
    crowd_visibilities = gt_keypoints_visibility[gt_is_ignore] if len(gt_joints) else []
    crowd_targets_areas = gt_areas[gt_is_ignore]
    crowd_targets_bboxes = gt_bboxes[gt_is_ignore]

    mr = compute_img_keypoint_matching(
        predicted_poses,
        predicted_scores,
        #
        targets=targets,
        targets_visibilities=targets_visibilities,
        targets_areas=targets_areas,
        targets_bboxes=targets_bboxes,
        targets_ignored=targets_ignored,
        #
        crowd_targets=crowd_targets,
        crowd_visibilities=crowd_visibilities,
        crowd_targets_areas=crowd_targets_areas,
        crowd_targets_bboxes=crowd_targets_bboxes,
        #
        iou_thresholds=self.iou_thresholds.to("cpu"),
        sigmas=self.oks_sigmas.to("cpu"),
        top_k=self.max_objects_per_image,
    )

    self.predictions.append((mr.preds_matched.cpu(), mr.preds_to_ignore.cpu(), mr.preds_scores.cpu(), int(mr.num_targets)))

compute_img_keypoint_matching(preds, pred_scores, targets, targets_visibilities, targets_areas, targets_bboxes, targets_ignored, crowd_targets, crowd_visibilities, crowd_targets_areas, crowd_targets_bboxes, iou_thresholds, sigmas, top_k)

Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

Parameters:

Name Type Description Default
preds Tensor

Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons. Last dimension encode X,Y and confidence score of each joint

required
pred_scores Tensor

Tensor of shape (K) - Confidence scores for each pose

required
targets Tensor

Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons

required
targets_visibilities Tensor

Visibility status for each keypoint (M, NumJoints). Values are 0 - invisible, 1 - occluded, 2 - fully visible

required
targets_areas Tensor

Tensor of shape (M) - Areas of target objects

required
targets_bboxes Tensor

Tensor of shape (M,4) - Bounding boxes (XYWH) of targets

required
targets_ignored Tensor

Tensor of shape (M) - Array of target that marked as ignored (E.g all keypoints are not visible or target does not fit the desired area range)

required
crowd_targets Tensor

Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons Last dimension encode X,Y and visibility score of each joint: (0 - invisible, 1 - occluded, 2 - fully visible)

required
crowd_visibilities Tensor

Visibility status for each keypoint of crowd targets (Mc, NumJoints). Values are 0 - invisible, 1 - occluded, 2 - fully visible

required
crowd_targets_areas Tensor

Tensor of shape (Mc) - Areas of target objects

required
crowd_targets_bboxes Tensor

Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets

required
iou_thresholds torch.Tensor

IoU Threshold to compute the mAP

required
sigmas Tensor

Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard' it is to locate the exact groundtruth position of the joint.

required
top_k int

Number of predictions to keep, ordered by confidence score

required

Returns:

Type Description
ImageKeypointMatchingResult

:preds_matched: Tensor of shape (min(top_k, len(preds)), n_iou_thresholds) True when prediction (i) is matched with a target with respect to the (j)th IoU threshold :preds_to_ignore: Tensor of shape (min(top_k, len(preds)), n_iou_thresholds) True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold :preds_scores: Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions :num_targets: Number of groundtruth targets (total num targets minus number of ignored)

Source code in src/super_gradients/training/metrics/pose_estimation_utils.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def compute_img_keypoint_matching(
    preds: Tensor,
    pred_scores: Tensor,
    targets: Tensor,
    targets_visibilities: Tensor,
    targets_areas: Tensor,
    targets_bboxes: Tensor,
    targets_ignored: Tensor,
    crowd_targets: Tensor,
    crowd_visibilities: Tensor,
    crowd_targets_areas: Tensor,
    crowd_targets_bboxes: Tensor,
    iou_thresholds: torch.Tensor,
    sigmas: Tensor,
    top_k: int,
) -> ImageKeypointMatchingResult:
    """
    Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

    :param preds:            Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons.
                             Last dimension encode X,Y and confidence score of each joint

    :param pred_scores:      Tensor of shape (K) - Confidence scores for each pose

    :param targets:          Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons

    :param targets_visibilities: Visibility status for each keypoint (M, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param targets_areas:    Tensor of shape (M) - Areas of target objects

    :param targets_bboxes:   Tensor of shape (M,4) - Bounding boxes (XYWH) of targets

    :param targets_ignored:  Tensor of shape (M) - Array of target that marked as ignored
                             (E.g all keypoints are not visible or target does not fit the desired area range)

    :param crowd_targets:    Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons
                             Last dimension encode X,Y and visibility score of each joint:
                             (0 - invisible, 1 - occluded, 2 - fully visible)

    :param crowd_visibilities: Visibility status for each keypoint of crowd targets (Mc, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param crowd_targets_areas: Tensor of shape (Mc) - Areas of target objects

    :param crowd_targets_bboxes: Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets

    :param iou_thresholds:  IoU Threshold to compute the mAP

    :param sigmas:          Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard'
                            it is to locate the exact groundtruth position of the joint.

    :param top_k:           Number of predictions to keep, ordered by confidence score

    :return:
        :preds_matched:     Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a target with respect to the (j)th IoU threshold

        :preds_to_ignore:   Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold

        :preds_scores:      Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions

        :num_targets:       Number of groundtruth targets (total num targets minus number of ignored)

    """
    num_iou_thresholds = len(iou_thresholds)

    device = preds.device if torch.is_tensor(preds) else (targets.device if torch.is_tensor(targets) else "cpu")
    num_targets = len(targets) - torch.count_nonzero(targets_ignored)

    preds_matched = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)
    targets_matched = torch.zeros(len(targets), num_iou_thresholds, dtype=torch.bool, device=device)
    preds_to_ignore = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)

    if preds is None or len(preds) == 0:
        return ImageKeypointMatchingResult(
            preds_matched=preds_matched,
            preds_to_ignore=preds_to_ignore,
            preds_scores=pred_scores,
            num_targets=num_targets.item(),
        )

    # Ignore all but the predictions that were top_k
    k = min(top_k, len(pred_scores))
    preds_idx_to_use = torch.topk(pred_scores, k=k, sorted=True, largest=True).indices
    preds_to_ignore[:, :] = True
    preds_to_ignore[preds_idx_to_use] = False

    if len(targets) > 0:
        iou = compute_oks(preds[preds_idx_to_use], targets, targets_visibilities, sigmas, gt_areas=targets_areas, gt_bboxes=targets_bboxes)

        # The matching priority is first detection confidence and then IoU value.
        # The detection is already sorted by confidence in NMS, so here for each prediction we order the targets by iou.
        sorted_iou, target_sorted = iou.sort(descending=True, stable=True)

        # Only iterate over IoU values higher than min threshold to speed up the process
        for pred_selected_i, target_sorted_i in (sorted_iou > iou_thresholds[0]).nonzero(as_tuple=False):

            # pred_selected_i and target_sorted_i are relative to filters/sorting, so we extract their absolute indexes
            pred_i = preds_idx_to_use[pred_selected_i]
            target_i = target_sorted[pred_selected_i, target_sorted_i]

            # Vector[j], True when IoU(pred_i, target_i) is above the (j)th threshold
            is_iou_above_threshold = sorted_iou[pred_selected_i, target_sorted_i] > iou_thresholds

            # Vector[j], True when both pred_i and target_i are not matched yet for the (j)th threshold
            are_candidates_free = torch.logical_and(~preds_matched[pred_i, :], ~targets_matched[target_i, :])

            # Vector[j], True when (pred_i, target_i) can be matched for the (j)th threshold
            are_candidates_good = torch.logical_and(is_iou_above_threshold, are_candidates_free)

            is_matching_with_ignore = are_candidates_free & are_candidates_good & targets_ignored[target_i]

            if preds_matched[pred_i].any() and is_matching_with_ignore.any():
                continue

            # For every threshold (j) where target_i and pred_i can be matched together ( are_candidates_good[j]==True )
            # fill the matching placeholders with True
            targets_matched[target_i, are_candidates_good] = True
            preds_matched[pred_i, are_candidates_good] = True

            preds_to_ignore[pred_i] = torch.logical_or(preds_to_ignore[pred_i], is_matching_with_ignore)

            # When all the targets are matched with a prediction for every IoU Threshold, stop.
            if targets_matched.all():
                break

    # Crowd targets can be matched with many predictions.
    # Therefore, for every prediction we just need to check if it has IoA large enough with any crowd target.
    if len(crowd_targets) > 0:
        # shape = (n_preds_to_use x n_crowd_targets)
        ioa = compute_oks(
            preds[preds_idx_to_use],
            crowd_targets,
            crowd_visibilities,
            sigmas,
            gt_areas=crowd_targets_areas,
            gt_bboxes=crowd_targets_bboxes,
        )

        # For each prediction, we keep it's highest score with any crowd target (of same class)
        # shape = (n_preds_to_use)
        best_ioa, _ = ioa.max(1)

        # If a prediction has IoA higher than threshold (with any target of same class), then there is a match
        # shape = (n_preds_to_use x iou_thresholds)
        is_matching_with_crowd = best_ioa.view(-1, 1) > iou_thresholds.view(1, -1)

        preds_to_ignore[preds_idx_to_use] = torch.logical_or(preds_to_ignore[preds_idx_to_use], is_matching_with_crowd)

    return ImageKeypointMatchingResult(
        preds_matched=preds_matched[preds_idx_to_use],
        preds_to_ignore=preds_to_ignore[preds_idx_to_use],
        preds_scores=pred_scores[preds_idx_to_use],
        num_targets=num_targets.item(),
    )

compute_oks(pred_joints, gt_joints, gt_keypoint_visibility, sigmas, gt_areas=None, gt_bboxes=None)

Parameters:

Name Type Description Default
pred_joints Tensor

[K, NumJoints, 2] or [K, NumJoints, 3]

required
pred_scores

[K]

required
gt_joints Tensor

[M, NumJoints, 2]

required
gt_keypoint_visibility Tensor

[M, NumJoints]

required
gt_areas Tensor

[M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately. If None, we will use area of bounding box of each instance computed from gt_joints.

None
gt_bboxes Tensor

[M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.

None
sigmas Tensor

[NumJoints]

required

Returns:

Type Description
np.ndarray

IoU matrix [K, M]

Source code in src/super_gradients/training/metrics/pose_estimation_utils.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def compute_oks(
    pred_joints: Tensor,
    gt_joints: Tensor,
    gt_keypoint_visibility: Tensor,
    sigmas: Tensor,
    gt_areas: Tensor = None,
    gt_bboxes: Tensor = None,
) -> np.ndarray:
    """

    :param pred_joints: [K, NumJoints, 2] or [K, NumJoints, 3]
    :param pred_scores: [K]
    :param gt_joints:   [M, NumJoints, 2]
    :param gt_keypoint_visibility: [M, NumJoints]
    :param gt_areas: [M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately.
        If None, we will use area of bounding box of each instance computed from gt_joints.

    :param gt_bboxes: [M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.
    :param sigmas: [NumJoints]
    :return: IoU matrix [K, M]
    """

    ious = torch.zeros((len(pred_joints), len(gt_joints)), device=pred_joints.device)
    vars = (sigmas * 2) ** 2

    if gt_bboxes is None:
        gt_bboxes = compute_visible_bbox_xywh(gt_joints, gt_keypoint_visibility)

    if gt_areas is None:
        gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

    # compute oks between each detection and ground truth object
    for gt_index, (gt_keypoints, gt_keypoint_visibility, gt_bbox, gt_area) in enumerate(zip(gt_joints, gt_keypoint_visibility, gt_bboxes, gt_areas)):
        # create bounds for ignore regions(double the gt bbox)
        xg = gt_keypoints[:, 0]
        yg = gt_keypoints[:, 1]
        k1 = torch.count_nonzero(gt_keypoint_visibility > 0)

        x0 = gt_bbox[0] - gt_bbox[2]
        x1 = gt_bbox[0] + gt_bbox[2] * 2
        y0 = gt_bbox[1] - gt_bbox[3]
        y1 = gt_bbox[1] + gt_bbox[3] * 2

        for pred_index, pred_keypoints in enumerate(pred_joints):
            xd = pred_keypoints[:, 0]
            yd = pred_keypoints[:, 1]
            if k1 > 0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                dx = (x0 - xd).clamp_min(0) + (xd - x1).clamp_min(0)
                dy = (y0 - yd).clamp_min(0) + (yd - y1).clamp_min(0)

            e = (dx**2 + dy**2) / vars / (gt_area + torch.finfo(torch.float64).eps) / 2

            if k1 > 0:
                e = e[gt_keypoint_visibility > 0]
            ious[pred_index, gt_index] = torch.sum(torch.exp(-e)) / e.shape[0]

    return ious

compute_visible_bbox_xywh(joints, visibility_mask)

Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

Parameters:

Name Type Description Default
joints Tensor

[Num Instances, Num Joints, 2+] last channel must have dimension of at least 2 that is considered to contain (X,Y) coordinates of the keypoint

required
visibility_mask Tensor

[Num Instances, Num Joints]

required

Returns:

Type Description
np.ndarray

A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH

Source code in src/super_gradients/training/metrics/pose_estimation_utils.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def compute_visible_bbox_xywh(joints: Tensor, visibility_mask: Tensor) -> np.ndarray:
    """
    Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

    :param joints:  [Num Instances, Num Joints, 2+] last channel must have dimension of
                    at least 2 that is considered to contain (X,Y) coordinates of the keypoint
    :param visibility_mask: [Num Instances, Num Joints]
    :return: A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH
    """
    visibility_mask = visibility_mask > 0
    initial_value = 1_000_000

    x1 = torch.min(joints[:, :, 0], where=visibility_mask, initial=initial_value, dim=-1)
    y1 = torch.min(joints[:, :, 1], where=visibility_mask, initial=initial_value, dim=-1)

    x1[x1 == initial_value] = 0
    y1[y1 == initial_value] = 0

    x2 = torch.max(joints[:, :, 0], where=visibility_mask, initial=0, dim=-1)
    y2 = torch.max(joints[:, :, 1], where=visibility_mask, initial=0, dim=-1)

    w = x2 - x1
    h = y2 - y1

    return torch.stack([x1, y1, w, h], dim=-1)

AbstractMetricsArgsPrepFn

Bases: ABC

Abstract preprocess metrics arguments class.

Source code in src/super_gradients/training/metrics/segmentation_metrics.py
156
157
158
159
160
161
162
163
164
165
166
class AbstractMetricsArgsPrepFn(ABC):
    """
    Abstract preprocess metrics arguments class.
    """

    @abstractmethod
    def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        All base classes must implement this function and return a tuple of torch tensors (predictions, target).
        """
        raise NotImplementedError()

__call__(preds, target) abstractmethod

All base classes must implement this function and return a tuple of torch tensors (predictions, target).

Source code in src/super_gradients/training/metrics/segmentation_metrics.py
161
162
163
164
165
166
@abstractmethod
def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    All base classes must implement this function and return a tuple of torch tensors (predictions, target).
    """
    raise NotImplementedError()

Dice

Bases: torchmetrics.JaccardIndex

Dice Coefficient Metric

Args: num_classes: Number of classes in the dataset. ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. Has no effect if given an int that is not in the range [0, num_classes-1]. By default, no index is ignored, and all classes are used. IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error. threshold: Threshold value for binary or multi-label probabilities. reduction: a method to reduce metric score over labels:

    - ``'elementwise_mean'``: takes the mean (default)
    - ``'sum'``: takes the sum
    - ``'none'``: no reduction will be applied

metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
    By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
Source code in src/super_gradients/training/metrics/segmentation_metrics.py
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
@register_metric(Metrics.DICE)
class Dice(torchmetrics.JaccardIndex):
    """
    Dice Coefficient Metric

    Args:
        num_classes: Number of classes in the dataset.
        ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore.
            If given, this class index does not contribute to the returned score, regardless of reduction method.
            Has no effect if given an int that is not in the range [0, num_classes-1].
            By default, no index is ignored, and all classes are used.
            IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error.
        threshold: Threshold value for binary or multi-label probabilities.
        reduction: a method to reduce metric score over labels:

            - ``'elementwise_mean'``: takes the mean (default)
            - ``'sum'``: takes the sum
            - ``'none'``: no reduction will be applied

        metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
            By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
    """

    def __init__(
        self,
        num_classes: int,
        dist_sync_on_step: bool = False,
        ignore_index: Optional[int] = None,
        reduction: str = "elementwise_mean",
        threshold: float = 0.5,
        metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None,
    ):

        if num_classes <= 1:
            raise ValueError(f"Dice class only for multi-class usage! For binary usage, please call {BinaryDice.__name__}")

        ignore_index, ignore_index_list, num_classes, unfiltered_num_classes = _handle_multiple_ignored_inds(ignore_index, num_classes)

        super().__init__(num_classes=num_classes, dist_sync_on_step=dist_sync_on_step, ignore_index=ignore_index, reduction=reduction, threshold=threshold)

        self.ignore_index_list = ignore_index_list
        self.unfiltered_num_classes = unfiltered_num_classes
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)
        self.greater_is_better = True

    def update(self, preds, target: torch.Tensor):
        preds, target = self.metrics_args_prep_fn(preds, target)
        if self.ignore_index_list is not None:
            target = _map_ignored_inds(target, self.ignore_index_list, self.unfiltered_num_classes)
            preds = _map_ignored_inds(preds, self.ignore_index_list, self.unfiltered_num_classes)
        super().update(preds=preds, target=target)

    def compute(self) -> torch.Tensor:
        """Computes Dice coefficient"""
        return _dice_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)

compute()

Computes Dice coefficient

Source code in src/super_gradients/training/metrics/segmentation_metrics.py
386
387
388
def compute(self) -> torch.Tensor:
    """Computes Dice coefficient"""
    return _dice_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)

IoU

Bases: torchmetrics.JaccardIndex

IoU Metric

Args: num_classes: Number of classes in the dataset. ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. Has no effect if given an int that is not in the range [0, num_classes-1]. By default, no index is ignored, and all classes are used. IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error. threshold: Threshold value for binary or multi-label probabilities. reduction: a method to reduce metric score over labels:

    - ``'elementwise_mean'``: takes the mean (default)
    - ``'sum'``: takes the sum
    - ``'none'``: no reduction will be applied

metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
    By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
Source code in src/super_gradients/training/metrics/segmentation_metrics.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
@register_metric(Metrics.IOU)
class IoU(torchmetrics.JaccardIndex):
    """
    IoU Metric

    Args:
        num_classes: Number of classes in the dataset.
        ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore.
            If given, this class index does not contribute to the returned score, regardless of reduction method.
            Has no effect if given an int that is not in the range [0, num_classes-1].
            By default, no index is ignored, and all classes are used.
            IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error.
        threshold: Threshold value for binary or multi-label probabilities.
        reduction: a method to reduce metric score over labels:

            - ``'elementwise_mean'``: takes the mean (default)
            - ``'sum'``: takes the sum
            - ``'none'``: no reduction will be applied

        metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
            By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
    """

    def __init__(
        self,
        num_classes: int,
        dist_sync_on_step: bool = False,
        ignore_index: Optional[Union[int, List[int]]] = None,
        reduction: str = "elementwise_mean",
        threshold: float = 0.5,
        metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None,
    ):

        if num_classes <= 1:
            raise ValueError(f"IoU class only for multi-class usage! For binary usage, please call {BinaryIOU.__name__}")
        if isinstance(ignore_index, typing.Iterable) and reduction == "none":
            raise ValueError("passing multiple ignore indices ")
        ignore_index, ignore_index_list, num_classes, unfiltered_num_classes = _handle_multiple_ignored_inds(ignore_index, num_classes)

        super().__init__(num_classes=num_classes, dist_sync_on_step=dist_sync_on_step, ignore_index=ignore_index, reduction=reduction, threshold=threshold)

        self.unfiltered_num_classes = unfiltered_num_classes
        self.ignore_index_list = ignore_index_list
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)
        self.greater_is_better = True

    def update(self, preds, target: torch.Tensor):
        preds, target = self.metrics_args_prep_fn(preds, target)
        if self.ignore_index_list is not None:
            target = _map_ignored_inds(target, self.ignore_index_list, self.unfiltered_num_classes)
            preds = _map_ignored_inds(preds, self.ignore_index_list, self.unfiltered_num_classes)
        super().update(preds=preds, target=target)

PixelAccuracy

Bases: Metric

Pixel Accuracy

Args: ignore_label: Optional[Union[int, List[int]]], specifying a target class(es) to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. Has no effect if given an int that is not in the range [0, num_classes-1]. By default, no index is ignored, and all classes are used. IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error. reduction: a method to reduce metric score over labels:

    - ``'elementwise_mean'``: takes the mean (default)
    - ``'sum'``: takes the sum
    - ``'none'``: no reduction will be applied

metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
    By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
Source code in src/super_gradients/training/metrics/segmentation_metrics.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
@register_metric(Metrics.PIXEL_ACCURACY)
class PixelAccuracy(Metric):
    """
    Pixel Accuracy

    Args:
        ignore_label: Optional[Union[int, List[int]]], specifying a target class(es) to ignore.
            If given, this class index does not contribute to the returned score, regardless of reduction method.
            Has no effect if given an int that is not in the range [0, num_classes-1].
            By default, no index is ignored, and all classes are used.
            IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error.
        reduction: a method to reduce metric score over labels:

            - ``'elementwise_mean'``: takes the mean (default)
            - ``'sum'``: takes the sum
            - ``'none'``: no reduction will be applied

        metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
            By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
    """

    def __init__(self, ignore_label: Union[int, List[int]] = -100, dist_sync_on_step=False, metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.ignore_label = ignore_label
        self.greater_is_better = True
        self.add_state("total_correct", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state("total_label", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        predict, target = self.metrics_args_prep_fn(preds, target)
        labeled_mask = self._handle_multiple_ignored_inds(target)

        pixel_labeled = torch.sum(labeled_mask)
        pixel_correct = torch.sum((predict == target) * labeled_mask)
        self.total_correct += pixel_correct
        self.total_label += pixel_labeled

    def _handle_multiple_ignored_inds(self, target):
        if isinstance(self.ignore_label, typing.Iterable):
            evaluated_classes_mask = torch.ones_like(target)
            for ignored_label in self.ignore_label:
                evaluated_classes_mask = evaluated_classes_mask.masked_fill(target.eq(ignored_label), 0)
        else:
            evaluated_classes_mask = target.ne(self.ignore_label)

        return evaluated_classes_mask

    def compute(self):
        _total_correct = self.total_correct.cpu().detach().numpy().astype("int64")
        _total_label = self.total_label.cpu().detach().numpy().astype("int64")
        pix_acc = np.float64(1.0) * _total_correct / (np.spacing(1, dtype=np.float64) + _total_label)
        return pix_acc

PreprocessSegmentationMetricsArgs

Bases: AbstractMetricsArgsPrepFn

Default segmentation inputs preprocess function before updating segmentation metrics, handles multiple inputs and apply normalizations.

Source code in src/super_gradients/training/metrics/segmentation_metrics.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class PreprocessSegmentationMetricsArgs(AbstractMetricsArgsPrepFn):
    """
    Default segmentation inputs preprocess function before updating segmentation metrics, handles multiple inputs and
    apply normalizations.
    """

    def __init__(self, apply_arg_max: bool = False, apply_sigmoid: bool = False):
        """
        :param apply_arg_max: Whether to apply argmax on predictions tensor.
        :param apply_sigmoid:  Whether to apply sigmoid on predictions tensor.
        """
        self.apply_arg_max = apply_arg_max
        self.apply_sigmoid = apply_sigmoid

    def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        # WHEN DEALING WITH MULTIPLE OUTPUTS- OUTPUTS[0] IS THE MAIN SEGMENTATION MAP
        if isinstance(preds, (tuple, list)):
            preds = preds[0]
        if self.apply_arg_max:
            _, preds = torch.max(preds, 1)
        elif self.apply_sigmoid:
            preds = torch.sigmoid(preds)

        target = target.long()
        return preds, target

__init__(apply_arg_max=False, apply_sigmoid=False)

Parameters:

Name Type Description Default
apply_arg_max bool

Whether to apply argmax on predictions tensor.

False
apply_sigmoid bool

Whether to apply sigmoid on predictions tensor.

False
Source code in src/super_gradients/training/metrics/segmentation_metrics.py
175
176
177
178
179
180
181
def __init__(self, apply_arg_max: bool = False, apply_sigmoid: bool = False):
    """
    :param apply_arg_max: Whether to apply argmax on predictions tensor.
    :param apply_sigmoid:  Whether to apply sigmoid on predictions tensor.
    """
    self.apply_arg_max = apply_arg_max
    self.apply_sigmoid = apply_sigmoid

batch_intersection_union(predict, target, nclass)

Batch Intersection of Union

Parameters:

Name Type Description Default
predict torch.Tensor

input 4D tensor

required
target torch.Tensor

label 3D tensor

required
nclass int

number of categories (int)

required
Source code in src/super_gradients/training/metrics/segmentation_metrics.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def batch_intersection_union(predict: torch.Tensor, target: torch.Tensor, nclass: int) -> Tuple[float, float]:
    """Batch Intersection of Union

    :param predict: input 4D tensor
    :param target: label 3D tensor
    :param nclass: number of categories (int)
    """
    _, predict = torch.max(predict, 1)
    mini = 1
    maxi = nclass
    nbins = nclass
    predict = predict.cpu().numpy() + 1
    target = target.cpu().numpy() + 1

    predict = predict * (target > 0).astype(predict.dtype)
    intersection = predict * (predict == target)
    # areas of intersection and union
    area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
    area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
    area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
    area_union = area_pred + area_lab - area_inter
    assert (area_inter <= area_union).all(), "Intersection area should be smaller than Union area"
    return area_inter, area_union

batch_pix_accuracy(predict, target)

Batch Pixel Accuracy

Parameters:

Name Type Description Default
predict torch.Tensor

input 4D tensor

required
target torch.Tensor

label 3D tensor

required
Source code in src/super_gradients/training/metrics/segmentation_metrics.py
16
17
18
19
20
21
22
23
24
25
26
27
28
def batch_pix_accuracy(predict: torch.Tensor, target: torch.Tensor) -> Tuple[float, float]:
    """Batch Pixel Accuracy

    :param predict: input 4D tensor
    :param target: label 3D tensor
    """
    _, predict = torch.max(predict, 1)
    predict = predict.cpu().numpy() + 1
    target = target.cpu().numpy() + 1
    pixel_labeled = np.sum(target > 0)
    pixel_correct = np.sum((predict == target) * (target > 0))
    assert pixel_correct <= pixel_labeled, "Correct area should be smaller than Labeled"
    return pixel_correct, pixel_labeled