Metrics

`ToyTestClassificationMetric`

Bases: Metric

Dummy classification Mettric object returning 0 always (for testing).

Source code in src/super_gradients/training/metrics/classification_metrics.py

class ToyTestClassificationMetric(Metric):
    """
    Dummy classification Mettric object returning 0 always (for testing).
    """

    def __init__(self, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)

    def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
        pass

    def compute(self):
        return 0

`accuracy(output, target, topk=(1))`

Computes the precision@k for the specified values of k

Parameters:

Name	Description	Default
`output`	Tensor / Numpy / List The prediction	required
`target`	Tensor / Numpy / List The corresponding lables	required
`topk`	tuple The type of accuracy to calculate, e.g. topk=(1,5) returns accuracy for top-1 and top-5	`(1)`

Source code in src/super_gradients/training/metrics/classification_metrics.py

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k
    :param output: Tensor / Numpy / List
        The prediction
    :param target: Tensor / Numpy / List
        The corresponding lables
    :param topk: tuple
        The type of accuracy to calculate, e.g. topk=(1,5) returns accuracy for top-1 and top-5"""
    # Convert to tensor
    output = convert_to_tensor(output)
    target = convert_to_tensor(target)

    # Get the maximal value of the accuracy measurment and the batch size
    maxk = max(topk)
    batch_size = target.size(0)

    # Get the top k predictions
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    # Count the number of correct predictions only for the highest k
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        # Count the number of correct prediction for the different K (the top predictions) values
        correct_k = correct[:k].reshape(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size).item())
    return res

`Delta1`

Bases: DepthEstimationMetricBase

Delta1 metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DELTA1)
class Delta1(DepthEstimationMetricBase):
    """
    Delta1 metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=DeltaMetric(delta=1.25), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`Delta2`

Bases: DepthEstimationMetricBase

Delta2 metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DELTA2)
class Delta2(DepthEstimationMetricBase):
    """
    Delta2 metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=DeltaMetric(delta=1.25**2), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`Delta3`

Bases: DepthEstimationMetricBase

Delta3 metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DELTA3)
class Delta3(DepthEstimationMetricBase):
    """
    Delta3 metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=DeltaMetric(delta=1.25**3), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`DeltaMetric`

Bases: Metric

Delta metric - returns the percentage of pixels s.t max(preds / target, target / preds) < delta

Use inheritors for ignored values.

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DELTAMETRIC)
class DeltaMetric(Metric):
    """
    Delta metric - returns the percentage of pixels s.t max(preds / target, target / preds) < delta

    Use inheritors for ignored values.

    :param: delta (float): Threshold value for delta metric.

    """

    def __init__(self, delta: float):
        super().__init__()
        self.delta = delta
        self.add_state("total_delta_pixels", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state("total_pixels", default=torch.tensor(0.0), dist_reduce_fx="sum")

    def update(self, preds: Tensor, target: Tensor):
        self.total_pixels += target.numel()
        self.total_delta_pixels += self.compute_delta_pixels(preds, target)

    def compute_delta_pixels(self, preds: Tensor, target: Tensor) -> Tensor:
        """
        Compute delta metrics for depth estimation without support for ignored values.

        :param preds: Model predictions.
        :param target: Ground truth depth map.
        :return: Delta metric value.
        """
        ratio = torch.max(preds / target, target / preds)
        return torch.sum((ratio < self.delta).float())

    def compute(self):
        return self.total_delta_pixels / self.total_pixels

`compute_delta_pixels(preds, target)`

Compute delta metrics for depth estimation without support for ignored values.

Parameters:

Name	Type	Description	Default
`preds`	`Tensor`	Model predictions.	required
`target`	`Tensor`	Ground truth depth map.	required

Returns:

Type	Description
`Tensor`	Delta metric value.

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

def compute_delta_pixels(self, preds: Tensor, target: Tensor) -> Tensor:
    """
    Compute delta metrics for depth estimation without support for ignored values.

    :param preds: Model predictions.
    :param target: Ground truth depth map.
    :return: Delta metric value.
    """
    ratio = torch.max(preds / target, target / preds)
    return torch.sum((ratio < self.delta).float())

`DepthEstimationMetricBase`

Bases: Metric

Base class for depth estimation metrics, handling common processing steps.

Parameters:

Name	Type	Description	Default
`metric`	`Metric`	The specific torchmetrics metric instance.	required
`ignore_val`	`Optional[float]`	Value to be ignored when computing metricsn. In depth estimation tasks, it is common to have regions in the depth map where the ground truth depth is not available or unreliable (e.g., marked as -1 or a specific value). In such cases, setting `ignore_val` allows you to exclude these regions from the metric computation. It is important that the dataset class providing the depth map fills the corresponding regions of the image with this `ignore_val` value to ensure consistency in metric calculations.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

class DepthEstimationMetricBase(Metric):
    """
    Base class for depth estimation metrics, handling common processing steps.

    :param metric: The specific torchmetrics metric instance.
    :param ignore_val: Value to be ignored when computing metricsn. In depth estimation tasks, it is common
                      to have regions in the depth map where the ground truth depth is not available or unreliable (e.g.,
                      marked as -1 or a specific value). In such cases, setting `ignore_val` allows you to exclude these
                      regions from the metric computation. It is important that the dataset class providing the depth map
                      fills the corresponding regions of the image with this `ignore_val` value to ensure consistency in
                      metric calculations.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions.
    """

    def __init__(self, metric: Metric, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__()
        self.metric = metric
        self.ignore_val = ignore_val
        self.apply_sigmoid = apply_sigmoid

    def process_preds_and_target(self, preds: Union[Tensor, Sequence[Tensor]], target: Tensor) -> Tuple[Tensor, Tensor]:
        """
        Process predictions and target tensors for depth estimation metrics:
        - If a sequence is returned by the model -> sets preds to the first element
        - Squeezes the dummy dimension (i.e dim1) when preds.ndim == 4
        - Applies sigmoid to preds if apply_sigmoid is True
        - Removes entries to ignore where ignore_val is present in target

        :param preds: Model predictions, either a tensor or a sequence of tensors.
        :param target: Ground truth depth map.
        :return: Processed predictions and target tensors.
        """

        if isinstance(preds, Sequence):
            preds = preds[0]
        if self.apply_sigmoid:
            preds = torch.sigmoid(preds)
        if self.ignore_val is not None:
            non_ignored = preds != self.ignore_val
            preds = preds[non_ignored]
            target = target[non_ignored]
        return preds, target

    def update(self, preds: Tensor, target: Tensor):
        preds, target = self.process_preds_and_target(preds, target)
        self.metric.update(preds, target)

    def compute(self):
        return self.metric.compute()

    def reset(self) -> None:
        self.metric.reset()

`process_preds_and_target(preds, target)`

Process predictions and target tensors for depth estimation metrics: - If a sequence is returned by the model -> sets preds to the first element - Squeezes the dummy dimension (i.e dim1) when preds.ndim == 4 - Applies sigmoid to preds if apply_sigmoid is True - Removes entries to ignore where ignore_val is present in target

Parameters:

Name	Type	Description	Default
`preds`	`Union[Tensor, Sequence[Tensor]]`	Model predictions, either a tensor or a sequence of tensors.	required
`target`	`Tensor`	Ground truth depth map.	required

Returns:

Type	Description
`Tuple[Tensor, Tensor]`	Processed predictions and target tensors.

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

def process_preds_and_target(self, preds: Union[Tensor, Sequence[Tensor]], target: Tensor) -> Tuple[Tensor, Tensor]:
    """
    Process predictions and target tensors for depth estimation metrics:
    - If a sequence is returned by the model -> sets preds to the first element
    - Squeezes the dummy dimension (i.e dim1) when preds.ndim == 4
    - Applies sigmoid to preds if apply_sigmoid is True
    - Removes entries to ignore where ignore_val is present in target

    :param preds: Model predictions, either a tensor or a sequence of tensors.
    :param target: Ground truth depth map.
    :return: Processed predictions and target tensors.
    """

    if isinstance(preds, Sequence):
        preds = preds[0]
    if self.apply_sigmoid:
        preds = torch.sigmoid(preds)
    if self.ignore_val is not None:
        non_ignored = preds != self.ignore_val
        preds = preds[non_ignored]
        target = target[non_ignored]
    return preds, target

`DepthMAE`

Bases: DepthEstimationMetricBase

Mean Absolute Error (MAE) metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DepthMAE)
class DepthMAE(DepthEstimationMetricBase):
    """
    Mean Absolute Error (MAE) metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanAbsoluteError(), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`DepthMAPE`

Bases: DepthEstimationMetricBase

Mean Absolute Percentage Error (MAPE) metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DepthMAPE)
class DepthMAPE(DepthEstimationMetricBase):
    """
    Mean Absolute Percentage Error (MAPE) metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanAbsolutePercentageError(), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`DepthMSE`

Bases: DepthEstimationMetricBase

Mean Squared Error metric (squared) for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DepthMSE)
class DepthMSE(DepthEstimationMetricBase):
    """
    Mean Squared Error metric (squared) for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanSquaredError(squared=True), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`DepthMSLE`

Bases: DepthEstimationMetricBase

Mean Squared Logarithmic Error metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DepthMSLE)
class DepthMSLE(DepthEstimationMetricBase):
    """
    Mean Squared Logarithmic Error metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanSquaredLogError(), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`DepthRMSE`

Bases: DepthEstimationMetricBase

Root Mean Squared Error metric for depth estimation with support for ignored values.

Parameters:

Name	Type	Description	Default
`ignore_val`	`Optional[float]`	Value to be ignored when computing the metric.	`None`
`apply_sigmoid`	`bool`	Whether to apply the sigmoid function to predictions before updating the metric.	`False`

Source code in src/super_gradients/training/metrics/depth_estimation_metrics.py

@register_metric(Metrics.DepthRMSE)
class DepthRMSE(DepthEstimationMetricBase):
    """
    Root Mean Squared Error metric for depth estimation with support for ignored values.

    :param ignore_val: Value to be ignored when computing the metric.
    :param apply_sigmoid: Whether to apply the sigmoid function to predictions before updating the metric.
    """

    def __init__(self, ignore_val: Optional[float] = None, apply_sigmoid: bool = False):
        super().__init__(metric=MeanSquaredError(squared=False), ignore_val=ignore_val, apply_sigmoid=apply_sigmoid)

`DetectionMetrics`

Bases: Metric

DetectionMetrics

Metric class for computing F1, Precision, Recall and Mean Average Precision.

Parameters:

Name	Type	Description	Default
`num_cls`	`int`	Number of classes.	required
`post_prediction_callback`	`DetectionPostPredictionCallback`	DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).	required
`normalize_targets`	`bool`	Whether to normalize bbox coordinates by image size.	`False`
`iou_thres`	`Union[IouThreshold, Tuple[float, float], float]`	IoU threshold to compute the mAP. Could be either instance of IouThreshold, a tuple (lower bound, upper_bound) or single scalar.	`IouThreshold.MAP_05_TO_095`
`recall_thres`	`torch.Tensor`	Recall threshold to compute the mAP.	`None`
`score_thres`	`float`	Score threshold to compute Recall, Precision and F1.	`0.1`
`top_k_predictions`	`int`	Number of predictions per class used to compute metrics, ordered by confidence score	`100`
`dist_sync_on_step`	`bool`	Synchronize metric state across processes at each `forward()` before returning the value at the step.	`False`
`accumulate_on_cpu`	`bool`	Run on CPU regardless of device used in other parts. This is to avoid "CUDA out of memory" that might happen on GPU.	`True`
`include_classwise_ap`	`bool`	Whether to include the class-wise average precision in the returned metrics dictionary. If enabled, output metrics dictionary will look similar to this: { 'Precision0.5:0.95': 0.5, 'Recall0.5:0.95': 0.5, 'F10.5:0.95': 0.5, 'mAP0.5:0.95': 0.5, 'AP0.5:0.95_person': 0.5, 'AP0.5:0.95_car': 0.5, 'AP0.5:0.95_bicycle': 0.5, 'AP0.5:0.95_motorcycle': 0.5, ... } Class names are either provided via the class_names parameter or are generated automatically.	`False`
`class_names`	`List[str]`	Array of class names. When include_classwise_ap=True, will use these names to make per-class APs keys in the output metrics dictionary. If None, will use dummy names `class_{idx}` instead.	`None`
`state_dict_prefix`	`str`	A prefix to append to the state dict of the metric. A state dict used to synchronize metric in DDP mode. It was empirically found that if you have two metric classes A and B(A) that has same state key, for some reason torchmetrics attempts to sync their states all toghether which causes an error. In this case adding a prefix to the name of the synchronized state seems to help, but it is still unclear why it happens.	`''`

Source code in src/super_gradients/training/metrics/detection_metrics.py

@register_metric(Metrics.DETECTION_METRICS)
class DetectionMetrics(Metric):
    """
    DetectionMetrics

    Metric class for computing F1, Precision, Recall and Mean Average Precision.

    :param num_cls:                         Number of classes.
    :param post_prediction_callback:        DetectionPostPredictionCallback to be applied on net's output prior to the metric computation (NMS).
    :param normalize_targets:               Whether to normalize bbox coordinates by image size.
    :param iou_thres:                       IoU threshold to compute the mAP.
                                            Could be either instance of IouThreshold, a tuple (lower bound, upper_bound) or single scalar.
    :param recall_thres:                    Recall threshold to compute the mAP.
    :param score_thres:                     Score threshold to compute Recall, Precision and F1.
    :param top_k_predictions:               Number of predictions per class used to compute metrics, ordered by confidence score
    :param dist_sync_on_step:               Synchronize metric state across processes at each ``forward()`` before returning the value at the step.
    :param accumulate_on_cpu:               Run on CPU regardless of device used in other parts.
                                            This is to avoid "CUDA out of memory" that might happen on GPU.
    :param calc_best_score_thresholds       Whether to calculate the best score threshold overall and per class
                                            If True, the compute() function will return a metrics dictionary that not
                                            only includes the average metrics calculated across all classes,
                                            but also the optimal score threshold overall and for each individual class.
    :param include_classwise_ap:            Whether to include the class-wise average precision in the returned metrics dictionary.
                                            If enabled, output metrics dictionary will look similar to this:
                                            {
                                                'Precision0.5:0.95': 0.5,
                                                'Recall0.5:0.95': 0.5,
                                                'F10.5:0.95': 0.5,
                                                'mAP0.5:0.95': 0.5,
                                                'AP0.5:0.95_person': 0.5,
                                                'AP0.5:0.95_car': 0.5,
                                                'AP0.5:0.95_bicycle': 0.5,
                                                'AP0.5:0.95_motorcycle': 0.5,
                                                ...
                                            }
                                            Class names are either provided via the class_names parameter or are generated automatically.
    :param class_names:                     Array of class names. When include_classwise_ap=True, will use these names to make
                                            per-class APs keys in the output metrics dictionary.
                                            If None, will use dummy names `class_{idx}` instead.
    :param state_dict_prefix:               A prefix to append to the state dict of the metric. A state dict used to synchronize metric in DDP mode.
                                            It was empirically found that if you have two metric classes A and B(A) that has same state key, for
                                            some reason torchmetrics attempts to sync their states all toghether which causes an error.
                                            In this case adding a prefix to the name of the synchronized state seems to help,
                                            but it is still unclear why it happens.


    """

    def __init__(
        self,
        num_cls: int,
        post_prediction_callback: DetectionPostPredictionCallback,
        normalize_targets: bool = False,
        iou_thres: Union[IouThreshold, Tuple[float, float], float] = IouThreshold.MAP_05_TO_095,
        recall_thres: torch.Tensor = None,
        score_thres: float = 0.1,
        top_k_predictions: int = 100,
        dist_sync_on_step: bool = False,
        accumulate_on_cpu: bool = True,
        calc_best_score_thresholds: bool = True,
        include_classwise_ap: bool = False,
        class_names: List[str] = None,
        state_dict_prefix: str = "",
    ):
        if class_names is None:
            if include_classwise_ap:
                logger.warning(
                    "Parameter 'include_classwise_ap' is set to True, but no class names are provided. "
                    "We will generate dummy class names, but we recommend to provide class names explicitly to"
                    "have meaningful names in reported metrics."
                )
            class_names = ["class_" + str(i) for i in range(num_cls)]
        else:
            class_names = list(class_names)

        if class_names is not None and len(class_names) != num_cls:
            raise ValueError(f"Number of class names ({len(class_names)}) does not match number of classes ({num_cls})")

        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.num_cls = num_cls
        self.iou_thres = iou_thres
        self.class_names = class_names

        if isinstance(iou_thres, IouThreshold):
            self.iou_thresholds = iou_thres.to_tensor()
        if isinstance(iou_thres, tuple):
            low, high = iou_thres
            self.iou_thresholds = IouThreshold.from_bounds(low, high)
        else:
            self.iou_thresholds = torch.tensor([iou_thres])

        self.map_str = "mAP" + self._get_range_str()
        self.include_classwise_ap = include_classwise_ap

        self.precision_metric_key = f"{state_dict_prefix}Precision{self._get_range_str()}"
        self.recall_metric_key = f"{state_dict_prefix}Recall{self._get_range_str()}"
        self.f1_metric_key = f"{state_dict_prefix}F1{self._get_range_str()}"
        self.map_metric_key = f"{state_dict_prefix}mAP{self._get_range_str()}"

        greater_component_is_better = [
            (self.precision_metric_key, True),
            (self.recall_metric_key, True),
            (self.map_metric_key, True),
            (self.f1_metric_key, True),
        ]

        if self.include_classwise_ap:
            self.per_class_ap_names = [f"{state_dict_prefix}AP{self._get_range_str()}_{class_name}" for class_name in class_names]
            greater_component_is_better += [(key, True) for key in self.per_class_ap_names]

        self.greater_component_is_better = collections.OrderedDict(greater_component_is_better)
        self.component_names = list(self.greater_component_is_better.keys())
        self.calc_best_score_thresholds = calc_best_score_thresholds
        self.best_threshold_per_class_names = [f"Best_score_threshold_{class_name}" for class_name in class_names]

        if self.calc_best_score_thresholds:
            self.component_names.append("Best_score_threshold")

        if self.calc_best_score_thresholds and self.include_classwise_ap:
            self.component_names += self.best_threshold_per_class_names

        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = super_gradients.is_distributed()
        self.denormalize_targets = not normalize_targets
        self.world_size = None
        self.rank = None
        self.state_key = f"{state_dict_prefix}matching_info{self._get_range_str()}"
        self.add_state(self.state_key, default=[], dist_reduce_fx=None)

        self.recall_thresholds = torch.linspace(0, 1, 101) if recall_thres is None else torch.tensor(recall_thres, dtype=torch.float32)
        self.score_threshold = score_thres
        self.top_k_predictions = top_k_predictions

        self.accumulate_on_cpu = accumulate_on_cpu

    def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
        """
        Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

        :param preds:           Raw output of the model, the format might change from one model to another,
                                but has to fit the input format of the post_prediction_callback (cx,cy,wh)
        :param target:          Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format:  (index, label, cx, cy, w, h)
        :param device:          Device to run on
        :param inputs:          Input image tensor of shape (batch_size, n_img, height, width)
        :param crowd_targets:   Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
        """
        self.iou_thresholds = self.iou_thresholds.to(device)
        _, _, height, width = inputs.shape
        iou_matcher = IoUMatching(self.iou_thresholds)

        targets = target.clone()
        crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

        preds = self.post_prediction_callback(preds, device=device)

        new_matching_info = compute_detection_matching(
            preds,
            targets,
            height,
            width,
            iou_thresholds=iou_matcher.get_thresholds(),
            matching_strategy=iou_matcher,
            crowd_targets=crowd_targets,
            top_k=self.top_k_predictions,
            denormalize_targets=self.denormalize_targets,
            device=self.device,
            return_on_cpu=self.accumulate_on_cpu,
        )

        accumulated_matching_info = getattr(self, self.state_key)
        setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        mean_ap, mean_precision, mean_recall, mean_f1, best_score_threshold = -1.0, -1.0, -1.0, -1.0, -1.0
        accumulated_matching_info = getattr(self, self.state_key)
        best_score_threshold_per_cls = np.zeros(self.num_cls)
        mean_ap_per_class = np.zeros(self.num_cls)

        if len(accumulated_matching_info):
            matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]

            # shape (n_class, nb_iou_thresh)
            (
                ap_per_present_classes,
                precision_per_present_classes,
                recall_per_present_classes,
                f1_per_present_classes,
                present_classes,
                best_score_threshold,
                best_score_thresholds_per_present_classes,
            ) = compute_detection_metrics(
                *matching_info_tensors,
                recall_thresholds=self.recall_thresholds,
                score_threshold=self.score_threshold,
                device="cpu" if self.accumulate_on_cpu else self.device,
            )

            # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
            # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
            mean_precision, mean_recall, mean_f1 = precision_per_present_classes.mean(), recall_per_present_classes.mean(), f1_per_present_classes.mean()

            # MaP is averaged over IoU thresholds and over classes
            mean_ap = ap_per_present_classes.mean()

            # Fill array of per-class AP scores with values for classes that were present in the dataset
            ap_per_class = ap_per_present_classes.mean(1)
            for i, class_index in enumerate(present_classes):
                mean_ap_per_class[class_index] = float(ap_per_class[i])
                best_score_threshold_per_cls[class_index] = float(best_score_thresholds_per_present_classes[i])

        output_dict = {
            self.precision_metric_key: float(mean_precision),
            self.recall_metric_key: float(mean_recall),
            self.map_metric_key: float(mean_ap),
            self.f1_metric_key: float(mean_f1),
        }

        if self.include_classwise_ap:
            for i, ap_i in enumerate(mean_ap_per_class):
                output_dict[self.per_class_ap_names[i]] = float(ap_i)

        if self.calc_best_score_thresholds:
            output_dict["Best_score_threshold"] = float(best_score_threshold)

        if self.include_classwise_ap and self.calc_best_score_thresholds:
            for threshold_per_class_names, threshold_value in zip(self.best_threshold_per_class_names, best_score_threshold_per_cls):
                output_dict[threshold_per_class_names] = float(threshold_value)

        return output_dict

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        :param dist_sync_fn:
        :return:
        """
        if self.world_size is None:
            self.world_size = super_gradients.common.environment.ddp_utils.get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = torch.distributed.get_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = {attr: getattr(self, attr) for attr in self._reductions.keys()}
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.barrier()
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            matching_info = []
            for state_dict in gathered_state_dicts:
                matching_info += state_dict[self.state_key]
            matching_info = tensor_container_to_device(matching_info, device="cpu" if self.accumulate_on_cpu else self.device)

            setattr(self, self.state_key, matching_info)

    def _get_range_str(self):
        return "@%.2f" % self.iou_thresholds[0] if not len(self.iou_thresholds) > 1 else "@%.2f:%.2f" % (self.iou_thresholds[0], self.iou_thresholds[-1])

`compute()`

Compute the metrics for all the accumulated results.

Returns:

Type	Description
`Dict[str, Union[float, torch.Tensor]]`	Metrics of interest

Source code in src/super_gradients/training/metrics/detection_metrics.py

def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
    """Compute the metrics for all the accumulated results.
    :return: Metrics of interest
    """
    mean_ap, mean_precision, mean_recall, mean_f1, best_score_threshold = -1.0, -1.0, -1.0, -1.0, -1.0
    accumulated_matching_info = getattr(self, self.state_key)
    best_score_threshold_per_cls = np.zeros(self.num_cls)
    mean_ap_per_class = np.zeros(self.num_cls)

    if len(accumulated_matching_info):
        matching_info_tensors = [torch.cat(x, 0) for x in list(zip(*accumulated_matching_info))]

        # shape (n_class, nb_iou_thresh)
        (
            ap_per_present_classes,
            precision_per_present_classes,
            recall_per_present_classes,
            f1_per_present_classes,
            present_classes,
            best_score_threshold,
            best_score_thresholds_per_present_classes,
        ) = compute_detection_metrics(
            *matching_info_tensors,
            recall_thresholds=self.recall_thresholds,
            score_threshold=self.score_threshold,
            device="cpu" if self.accumulate_on_cpu else self.device,
        )

        # Precision, recall and f1 are computed for IoU threshold range, averaged over classes
        # results before version 3.0.4 (Dec 11 2022) were computed only for smallest value (i.e IoU 0.5 if metric is @0.5:0.95)
        mean_precision, mean_recall, mean_f1 = precision_per_present_classes.mean(), recall_per_present_classes.mean(), f1_per_present_classes.mean()

        # MaP is averaged over IoU thresholds and over classes
        mean_ap = ap_per_present_classes.mean()

        # Fill array of per-class AP scores with values for classes that were present in the dataset
        ap_per_class = ap_per_present_classes.mean(1)
        for i, class_index in enumerate(present_classes):
            mean_ap_per_class[class_index] = float(ap_per_class[i])
            best_score_threshold_per_cls[class_index] = float(best_score_thresholds_per_present_classes[i])

    output_dict = {
        self.precision_metric_key: float(mean_precision),
        self.recall_metric_key: float(mean_recall),
        self.map_metric_key: float(mean_ap),
        self.f1_metric_key: float(mean_f1),
    }

    if self.include_classwise_ap:
        for i, ap_i in enumerate(mean_ap_per_class):
            output_dict[self.per_class_ap_names[i]] = float(ap_i)

    if self.calc_best_score_thresholds:
        output_dict["Best_score_threshold"] = float(best_score_threshold)

    if self.include_classwise_ap and self.calc_best_score_thresholds:
        for threshold_per_class_names, threshold_value in zip(self.best_threshold_per_class_names, best_score_threshold_per_cls):
            output_dict[threshold_per_class_names] = float(threshold_value)

    return output_dict

`update(preds, target, device, inputs, crowd_targets=None)`

Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

Parameters:

Name	Type	Description	Default
`preds`		Raw output of the model, the format might change from one model to another, but has to fit the input format of the post_prediction_callback (cx,cy,wh)	required
`target`	`torch.Tensor`	Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format: (index, label, cx, cy, w, h)	required
`device`	`str`	Device to run on	required
`inputs`	`torch.tensor`	Input image tensor of shape (batch_size, n_img, height, width)	required
`crowd_targets`	`Optional[torch.Tensor]`	Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH	`None`

Source code in src/super_gradients/training/metrics/detection_metrics.py

def update(self, preds, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
    """
    Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.

    :param preds:           Raw output of the model, the format might change from one model to another,
                            but has to fit the input format of the post_prediction_callback (cx,cy,wh)
    :param target:          Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. format:  (index, label, cx, cy, w, h)
    :param device:          Device to run on
    :param inputs:          Input image tensor of shape (batch_size, n_img, height, width)
    :param crowd_targets:   Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH
    """
    self.iou_thresholds = self.iou_thresholds.to(device)
    _, _, height, width = inputs.shape
    iou_matcher = IoUMatching(self.iou_thresholds)

    targets = target.clone()
    crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

    preds = self.post_prediction_callback(preds, device=device)

    new_matching_info = compute_detection_matching(
        preds,
        targets,
        height,
        width,
        iou_thresholds=iou_matcher.get_thresholds(),
        matching_strategy=iou_matcher,
        crowd_targets=crowd_targets,
        top_k=self.top_k_predictions,
        denormalize_targets=self.denormalize_targets,
        device=self.device,
        return_on_cpu=self.accumulate_on_cpu,
    )

    accumulated_matching_info = getattr(self, self.state_key)
    setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

`DetectionMetricsDistanceBased`

Bases: DetectionMetrics

Source code in src/super_gradients/training/metrics/detection_metrics.py

@register_metric(Metrics.DETECTION_METRICS_DISTANCE_BASED)
class DetectionMetricsDistanceBased(DetectionMetrics):
    def __init__(
        self,
        num_cls: int,
        post_prediction_callback: DetectionPostPredictionCallback,
        normalize_targets: bool = False,
        distance_thresholds: List[float] = [5.0],
        distance_metric: DistanceMetric = EuclideanDistance(),
        recall_thres: torch.Tensor = None,
        score_thres: float = 0.1,
        top_k_predictions: int = 100,
        dist_sync_on_step: bool = False,
        accumulate_on_cpu: bool = True,
        calc_best_score_thresholds: bool = True,
        include_classwise_ap: bool = False,
        class_names: List[str] = None,
    ):
        self.distance_thresholds = distance_thresholds
        self.distance_metric = distance_metric
        super().__init__(
            num_cls=num_cls,
            post_prediction_callback=post_prediction_callback,
            normalize_targets=normalize_targets,
            recall_thres=recall_thres,
            score_thres=score_thres,
            top_k_predictions=top_k_predictions,
            dist_sync_on_step=dist_sync_on_step,
            accumulate_on_cpu=accumulate_on_cpu,
            calc_best_score_thresholds=calc_best_score_thresholds,
            include_classwise_ap=include_classwise_ap,
            class_names=class_names,
            state_dict_prefix="distance_based_",
        )

    def update(self, preds: torch.Tensor, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
        """
        Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.
        Use distance-based definition of true positives.

        :param preds: torch.Tensor: Raw output of the model. The format might change from one model to another,
                                    but has to fit the input format of the post_prediction_callback (cx, cy, wh).
        :param target: torch.Tensor: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH.
                                      Format:  (index, label, cx, cy, w, h)
        :param device: str: Device to run on.
        :param inputs: torch.Tensor: Input image tensor of shape (batch_size, n_img, height, width).
        :param crowd_targets: Optional[torch.Tensor]: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH.
        """
        _, _, height, width = inputs.shape

        distance_matcher = DistanceMatching(self.distance_metric, self.distance_thresholds)

        targets = target.clone()
        crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

        preds = self.post_prediction_callback(preds, device=device)

        new_matching_info = compute_detection_matching(
            output=preds,
            targets=targets,
            height=height,
            width=width,
            crowd_targets=crowd_targets,
            top_k=self.top_k_predictions,
            denormalize_targets=self.denormalize_targets,
            device=self.device,
            return_on_cpu=self.accumulate_on_cpu,
            matching_strategy=distance_matcher,
        )

        accumulated_matching_info = getattr(self, self.state_key)
        setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

    def _get_range_str(self):
        return (
            "@DIST%.2f" % self.distance_thresholds[0]
            if not len(self.distance_thresholds) > 1
            else "@DIST%.2f:%.2f" % (self.distance_thresholds[0], self.distance_thresholds[-1])
        )

`update(preds, target, device, inputs, crowd_targets=None)`

Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly. Use distance-based definition of true positives.

Parameters:

Name	Type	Description	Default
`preds`	`torch.Tensor`	torch.Tensor: Raw output of the model. The format might change from one model to another, but has to fit the input format of the post_prediction_callback (cx, cy, wh).	required
`target`	`torch.Tensor`	torch.Tensor: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH. Format: (index, label, cx, cy, w, h)	required
`device`	`str`	str: Device to run on.	required
`inputs`	`torch.tensor`	torch.Tensor: Input image tensor of shape (batch_size, n_img, height, width).	required
`crowd_targets`	`Optional[torch.Tensor]`	Optional[torch.Tensor]: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH.	`None`

Source code in src/super_gradients/training/metrics/detection_metrics.py

def update(self, preds: torch.Tensor, target: torch.Tensor, device: str, inputs: torch.tensor, crowd_targets: Optional[torch.Tensor] = None) -> None:
    """
    Apply NMS and match all the predictions and targets of a given batch, and update the metric state accordingly.
    Use distance-based definition of true positives.

    :param preds: torch.Tensor: Raw output of the model. The format might change from one model to another,
                                but has to fit the input format of the post_prediction_callback (cx, cy, wh).
    :param target: torch.Tensor: Targets for all images of shape (total_num_targets, 6) LABEL_CXCYWH.
                                  Format:  (index, label, cx, cy, w, h)
    :param device: str: Device to run on.
    :param inputs: torch.Tensor: Input image tensor of shape (batch_size, n_img, height, width).
    :param crowd_targets: Optional[torch.Tensor]: Crowd targets for all images of shape (total_num_targets, 6), LABEL_CXCYWH.
    """
    _, _, height, width = inputs.shape

    distance_matcher = DistanceMatching(self.distance_metric, self.distance_thresholds)

    targets = target.clone()
    crowd_targets = torch.zeros(size=(0, 6), device=device) if crowd_targets is None else crowd_targets.clone()

    preds = self.post_prediction_callback(preds, device=device)

    new_matching_info = compute_detection_matching(
        output=preds,
        targets=targets,
        height=height,
        width=width,
        crowd_targets=crowd_targets,
        top_k=self.top_k_predictions,
        denormalize_targets=self.denormalize_targets,
        device=self.device,
        return_on_cpu=self.accumulate_on_cpu,
        matching_strategy=distance_matcher,
    )

    accumulated_matching_info = getattr(self, self.state_key)
    setattr(self, self.state_key, accumulated_matching_info + new_matching_info)

`flatten_metrics_dict(metrics_dict)`

Returns:

Type	Description
	flattened dict of metric values i.e {metric1_name: metric1_value...}

Source code in src/super_gradients/training/metrics/metric_utils.py

def flatten_metrics_dict(metrics_dict: dict):
    """
    :param metrics_dict - dictionary of metric values where values can also be dictionaries containing subvalues
    (in the case of compound metrics)

    :return: flattened dict of metric values i.e {metric1_name: metric1_value...}
    """
    flattened = {}
    for metric_name, metric_val in metrics_dict.items():
        if metric_name == "additional_items":
            continue
        # COLLECT ALL OF THE COMPONENTS IN THE CASE OF COMPOUND METRICS
        elif isinstance(metric_val, dict):
            for sub_metric_name, sub_metric_val in metric_val.items():
                flattened[sub_metric_name] = sub_metric_val
        else:
            flattened[metric_name] = metric_val

    return flattened

`get_logging_values(loss_loggings, metrics, criterion=None)`

Parameters:

Name	Type	Description	Default
`loss_loggings`	`AverageMeter`	AverageMeter running average for the loss items	required
`metrics`	`MetricCollection`	MetricCollection object for running user specified metrics	required

Returns:

Type	Description
	tuple of the computed values

Source code in src/super_gradients/training/metrics/metric_utils.py

def get_logging_values(loss_loggings: AverageMeter, metrics: MetricCollection, criterion=None):
    """
    :param loss_loggings: AverageMeter running average for the loss items
    :param metrics: MetricCollection object for running user specified metrics
    :param criterion the object loss_loggings average meter is monitoring, when set to None- only the metrics values are
    computed and returned.

    :return: tuple of the computed values
    """
    if criterion is not None:
        loss_loggingg_avg = loss_loggings.average
        if not isinstance(loss_loggingg_avg, tuple):
            loss_loggingg_avg = tuple([loss_loggingg_avg])
        logging_vals = loss_loggingg_avg + get_metrics_results_tuple(metrics)
    else:
        logging_vals = get_metrics_results_tuple(metrics)

    return logging_vals

`get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names)`

Returns a dictionary with the epoch results as values and their names as keys.

Parameters:

Name	Description	Default
`metrics_tuple`	the result tuple	required
`metrics_collection`	MetricsCollection	required
`loss_logging_item_names`	loss component's names.	required

Returns:

Type	Description
	dict

Source code in src/super_gradients/training/metrics/metric_utils.py

def get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names):
    """
    Returns a dictionary with the epoch results as values and their names as keys.
    :param metrics_tuple: the result tuple
    :param metrics_collection: MetricsCollection
    :param loss_logging_item_names: loss component's names.
    :return: dict
    """
    keys = loss_logging_item_names + get_metrics_titles(metrics_collection)
    metrics_dict = dict(zip(keys, list(metrics_tuple)))
    return metrics_dict

`get_metrics_results_tuple(metrics_collection)`

Parameters:

Name	Type	Description	Default
`metrics_collection`	`MetricCollection`	metrics collection of the user specified metrics @type metrics_collection	required

Returns:

Type	Description
	tuple of metrics values

Source code in src/super_gradients/training/metrics/metric_utils.py

def get_metrics_results_tuple(metrics_collection: MetricCollection):
    """

    :param metrics_collection: metrics collection of the user specified metrics
    @type metrics_collection
    :return: tuple of metrics values
    """
    if metrics_collection is None:
        results_tuple = ()
    else:
        results_tuple = tuple(flatten_metrics_dict(metrics_collection.compute()).values())
    return results_tuple

`get_metrics_titles(metrics_collection)`

Parameters:

Name	Type	Description	Default
`metrics_collection`	`MetricCollection`	MetricCollection object for running user specified metrics	required

Returns:

Type	Description
	list of all the names of the computed values list(str)

Source code in src/super_gradients/training/metrics/metric_utils.py

def get_metrics_titles(metrics_collection: MetricCollection):
    """

    :param metrics_collection: MetricCollection object for running user specified metrics
    :return: list of all the names of the computed values list(str)
    """
    titles = []
    for metric_name, metric in metrics_collection.items():
        if metric_name == "additional_items":
            continue
        elif hasattr(metric, "component_names"):
            titles += metric.component_names
        else:
            titles.append(metric_name)

    return titles

`get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items)`

Returns a dictionary with the epoch's logging items as values and their names as keys, with the purpose of passing it as a description to tqdm's progress bar.

Parameters:

Name	Description	Default
`metrics_tuple`	the result tuple	required
`metrics_collection`	MetricsCollection	required
`loss_logging_item_names`	loss component's names.	required

Returns:

Type	Description
	dict

Source code in src/super_gradients/training/metrics/metric_utils.py

def get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items):
    """
    Returns a dictionary with the epoch's logging items as values and their names as keys, with the purpose of
     passing it as a description to tqdm's progress bar.

    :param metrics_tuple: the result tuple
    :param metrics_collection: MetricsCollection
    :param loss_logging_item_names: loss component's names.
    :param log_items additional logging items to be rendered.
    :return: dict
    """
    log_items.update(get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names))
    for key, value in log_items.items():
        if isinstance(value, torch.Tensor):
            log_items[key] = value.detach().item()

    return log_items

`PoseEstimationMetrics`

Bases: Metric

Implementation of COCO Keypoint evaluation metric. When instantiated with default parameters, it will default to COCO params. By default, only AR and AP metrics are computed:

from super_gradients.training.metrics import PoseEstimationMetrics metric = PoseEstimationMetrics(...) metric.update(...) metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

If you wish to get AR/AR at specific thresholds, you can specify them using iou_thresholds_to_report argument:

from super_gradients.training.metrics import PoseEstimationMetrics metric = PoseEstimationMetrics(iou_thresholds_to_report=[0.5, 0.75], ...) metric.update(...) metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py

@register_metric(Metrics.POSE_ESTIMATION_METRICS)
class PoseEstimationMetrics(Metric):
    """
    Implementation of COCO Keypoint evaluation metric.
    When instantiated with default parameters, it will default to COCO params.
    By default, only AR and AP metrics are computed:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AR": 0.456 }

    If you wish to get AR/AR at specific thresholds, you can specify them using `iou_thresholds_to_report` argument:

    >>> from super_gradients.training.metrics import PoseEstimationMetrics
    >>> metric = PoseEstimationMetrics(iou_thresholds_to_report=[0.5, 0.75], ...)
    >>> metric.update(...)
    >>> metrics = metric.compute() # {"AP": 0.123, "AP_0.5": 0.222, "AP_0.75: 0.111, "AR": 0.456, "AR_0.5":0.212, "AR_0.75": 0.443 }

    """

    def __init__(
        self,
        post_prediction_callback: AbstractPoseEstimationPostPredictionCallback,
        num_joints: int,
        max_objects_per_image: int = 20,
        oks_sigmas: Optional[Iterable] = None,
        iou_thresholds: Optional[Iterable] = None,
        recall_thresholds: Optional[Iterable] = None,
        iou_thresholds_to_report: Optional[Iterable] = None,
    ):
        """
        Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
        If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

        :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                          and returns a tuple of (poses, scores)

        :param num_joints:                Number of joints per pose

        :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

        :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                          If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

        :param recall_thresholds:         List of recall thresholds to compute AP.
                                          If None, then will use default 101 recall thresholds from COCO in range [0..1]

        :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

        :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                          may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

        """
        super().__init__(dist_sync_on_step=False)
        self.num_joints = num_joints
        self.max_objects_per_image = max_objects_per_image
        self.stats_names = ["AP", "AR"]

        if recall_thresholds is None:
            recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
        self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

        if iou_thresholds is None:
            iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
        self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

        if iou_thresholds_to_report is not None:
            self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

            if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
                missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
                raise RuntimeError(
                    f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
                )

            self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
            self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
        else:
            self.iou_thresholds_to_report = None

        self.greater_component_is_better = dict((k, True) for k in self.stats_names)

        if oks_sigmas is None:
            if num_joints == 17:
                oks_sigmas = np.array([0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089])
            else:
                oks_sigmas = np.array([0.1] * num_joints)
                logger.warning(
                    f"Using default OKS sigmas of `0.1` for a custom dataset with {num_joints} joints. "
                    f"To silence this warning, you may want to specify OKS sigmas explicitly as it has direct impact on the AP score."
                )

        if len(oks_sigmas) != num_joints:
            raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

        self.oks_sigmas = torch.tensor(oks_sigmas).float()

        self.component_names = list(self.greater_component_is_better.keys())
        self.components = len(self.component_names)

        self.post_prediction_callback = post_prediction_callback
        self.is_distributed = is_distributed()
        self.world_size = None
        self.rank = None
        self.add_state("predictions", default=[], dist_reduce_fx=None)

    def reset(self) -> None:
        self.predictions.clear()

    @torch.no_grad()
    def update(
        self,
        preds: Any,
        target: Any,
        gt_joints: List[np.ndarray] = None,
        gt_iscrowd: List[np.ndarray] = None,
        gt_bboxes: List[np.ndarray] = None,
        gt_areas: List[np.ndarray] = None,
        gt_samples: List[PoseEstimationSample] = None,
    ):
        """
        Decode the predictions and update the metric.

        The signature of this method is a bit complicated, because we want to support both old-style form of
        passing groundtruth information (gt_joints, gt_iscrowd, gt_bboxes, gt_areas) and a new style of passing
        groundtruth information as a list of PoseEstimationSample objects.

        Passing PoseEstimationSample objects is more convenient and default way to go with sample-centric datasets introduced in SuperGradients 3.3.
        Two options are mutually exclusive, so if you pass groundtruth_samples, all other arguments are ignored and vice versa.

        :param preds :      Raw output of the model
        :param target:      Targets for the model training (Not used for evaluation)
        :param gt_joints:   List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                            Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                            This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                            where predictions rescaled back to original size of the image.
                            However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                            system of the predicted image.

        :param gt_iscrowd:  Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                            If not provided, all instances are considered as non-crowd targets.
                            For instance, in CrowdPose all instances are considered as "non-crowd".

        :param gt_bboxes:   Bounding boxes of the groundtruth instances (XYWH).
                            This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                            If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

        :param gt_areas:    Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                            so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                            If not provided, the area is computed as the product of the width and height of the bounding box.
                            (For instance this is used in CrowdPose dataset)
        :param gt_samples:  List of ground-truth samples

        """
        predictions: List[PoseEstimationPredictions] = self.post_prediction_callback(preds)  # Decode raw predictions into poses

        if gt_samples is not None:
            self._update_with_samples(predictions, gt_samples)
        else:
            self._update_with_old_style_args(predictions, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)

    def _update_with_samples(self, predictions: List[PoseEstimationPredictions], gt_samples: List[PoseEstimationSample]) -> None:
        """
        Update internal state of metric class with a batch of predictions and groundtruth samples.

        :param predictions: Decoded list of pose predictions
        :param gt_samples:  Corresponding list of groundtruth samples
        """
        for i in range(len(predictions)):
            self.update_single_image(
                predicted_poses=predictions[i].poses,
                predicted_scores=predictions[i].scores,
                gt_joints=gt_samples[i].joints,
                gt_bboxes=gt_samples[i].bboxes_xywh,
                gt_areas=gt_samples[i].areas,
                gt_iscrowd=gt_samples[i].is_crowd,
            )

    def _update_with_old_style_args(
        self,
        predictions: List[PoseEstimationPredictions],
        gt_joints: List[np.ndarray],
        gt_bboxes: Optional[List[np.ndarray]],
        gt_areas: Optional[List[np.ndarray]],
        gt_iscrowd: Optional[List[np.ndarray]],
    ) -> None:
        """
        This method is here for backward compatibility with old-style datasets that do not use PoseEstimationSample objects.
        The now deprecated way of passing groundtruth information was through a dictionary with 'gt_joints', 'gt_bboxes', 'gt_areas', 'gt_iscrowd' keys
        which is not convenient and error-prone.

        It is still supported, but we recommend to use PoseEstimationSample objects instead.
        :param predictions: Decoded pose predictions
        :param gt_joints: List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
        :param gt_bboxes: List of ground-truth bounding boxes for each image in the batch.
                          Each element of list is a numpy array of shape (num_instances, 4) and boxes are in XYWH format.
                          Can be None, in which case bounding boxes are computed as minimum bounding box that contains all visible keypoints.
        :param gt_areas:  List of ground-truth areas for each image in the batch.
                          Can be None, in which case areas are computed as the product of the width and height of the bounding box.
        :param gt_iscrowd: List of single-dimensional numpy arrays of shape (num_instances,) indicating which instance is
                           annotated with `iscrowd` flog. Objects with `iscrowd` flag are not used for evaluation.
        """
        for i in range(len(predictions)):
            self.update_single_image(
                predicted_poses=predictions[i].poses,
                predicted_scores=predictions[i].scores,
                gt_joints=gt_joints[i],
                gt_bboxes=gt_bboxes[i] if gt_bboxes is not None else None,
                gt_areas=gt_areas[i] if gt_areas is not None else None,
                gt_iscrowd=gt_iscrowd[i] if gt_iscrowd is not None else None,
            )

    def update_single_image(
        self,
        predicted_poses: Union[Tensor, np.ndarray],
        predicted_scores: Union[Tensor, np.ndarray],
        gt_joints: np.ndarray,
        gt_bboxes: Optional[np.ndarray],
        gt_areas: Optional[np.ndarray],
        gt_iscrowd: Optional[np.ndarray],
    ) -> None:
        """
        Update internal state of metric class with a single image predictions & corresponding groundtruth.
        Method compute OKS for predicted poses, match them to groundtruth poses and update internal state of the metric.
        :param predicted_poses:  Predicted poses of shape (num_instances, num_joints, 3)
        :param predicted_scores: Predicted scores of shape (num_instances,)
        :param gt_joints:        Groundtruth joints of shape (num_instances, num_joints, 3)
        :param gt_bboxes:        Groundtruth bounding boxes of shape (num_instances, 4) in XYWH format
        :param gt_areas:         Groundtruth areas of shape (num_instances,)
        :param gt_iscrowd:       Groundtruth is_crowd flag of shape (num_instances,)
        """
        if len(predicted_poses) == 0 and len(gt_joints) == 0:
            return
        if len(predicted_poses) != len(predicted_scores):
            raise ValueError("Length of predicted poses and scores should be equal. Got {} and {}".format(len(predicted_poses), len(predicted_scores)))

        predicted_poses = convert_to_tensor(predicted_poses, dtype=torch.float32, device="cpu")
        predicted_scores = convert_to_tensor(predicted_scores, dtype=torch.float32, device="cpu")

        if gt_bboxes is None:
            gt_bboxes = compute_visible_bbox_xywh(torch.tensor(gt_joints[:, :, 0:2]), torch.tensor(gt_joints[:, :, 2]))

        if gt_areas is None:
            gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

        if gt_iscrowd is None:
            gt_iscrowd = [False] * len(gt_joints)

        gt_keypoints = convert_to_tensor(gt_joints, dtype=torch.float32, device="cpu")
        gt_areas = convert_to_tensor(gt_areas, dtype=torch.float32, device="cpu")
        gt_bboxes = convert_to_tensor(gt_bboxes, dtype=torch.float32, device="cpu")
        gt_iscrowd = convert_to_tensor(gt_iscrowd, dtype=torch.bool, device="cpu")

        gt_keypoints_xy = gt_keypoints[:, :, 0:2]
        gt_keypoints_visibility = gt_keypoints[:, :, 2]
        gt_all_kpts_invisible = gt_keypoints_visibility.eq(0).all(dim=1)
        gt_is_ignore = gt_all_kpts_invisible | gt_iscrowd

        targets = gt_keypoints_xy[~gt_is_ignore] if len(gt_joints) else []
        targets_visibilities = gt_keypoints_visibility[~gt_is_ignore] if len(gt_joints) else []
        targets_areas = gt_areas[~gt_is_ignore] if len(gt_joints) else []
        targets_bboxes = gt_bboxes[~gt_is_ignore]
        targets_ignored = gt_is_ignore[~gt_is_ignore]

        crowd_targets = gt_keypoints_xy[gt_is_ignore] if len(gt_joints) else []
        crowd_visibilities = gt_keypoints_visibility[gt_is_ignore] if len(gt_joints) else []
        crowd_targets_areas = gt_areas[gt_is_ignore]
        crowd_targets_bboxes = gt_bboxes[gt_is_ignore]

        mr = compute_img_keypoint_matching(
            predicted_poses,
            predicted_scores,
            #
            targets=targets,
            targets_visibilities=targets_visibilities,
            targets_areas=targets_areas,
            targets_bboxes=targets_bboxes,
            targets_ignored=targets_ignored,
            #
            crowd_targets=crowd_targets,
            crowd_visibilities=crowd_visibilities,
            crowd_targets_areas=crowd_targets_areas,
            crowd_targets_bboxes=crowd_targets_bboxes,
            #
            iou_thresholds=self.iou_thresholds.to("cpu"),
            sigmas=self.oks_sigmas.to("cpu"),
            top_k=self.max_objects_per_image,
        )

        self.predictions.append((mr.preds_matched.cpu(), mr.preds_to_ignore.cpu(), mr.preds_scores.cpu(), int(mr.num_targets)))

    def _sync_dist(self, dist_sync_fn=None, process_group=None):
        """
        When in distributed mode, stats are aggregated after each forward pass to the metric state. Since these have all
        different sizes we override the synchronization function since it works only for tensors (and use
        all_gather_object)
        :param dist_sync_fn:
        :return:
        """
        if self.world_size is None:
            self.world_size = get_world_size() if self.is_distributed else -1
        if self.rank is None:
            self.rank = get_local_rank() if self.is_distributed else -1

        if self.is_distributed:
            local_state_dict = self.predictions
            gathered_state_dicts = [None] * self.world_size
            torch.distributed.all_gather_object(gathered_state_dicts, local_state_dict)
            self.predictions = list(itertools.chain(*gathered_state_dicts))

    def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
        """Compute the metrics for all the accumulated results.
        :return: Metrics of interest
        """
        T = len(self.iou_thresholds)
        K = 1  # num categories

        precision = -np.ones((T, K))
        recall = -np.ones((T, K))

        predictions = self.predictions  # All gathered by this time
        if len(predictions) > 0:
            preds_matched = torch.cat([x[0].cpu() for x in predictions], dim=0)
            preds_to_ignore = torch.cat([x[1].cpu() for x in predictions], dim=0)
            preds_scores = torch.cat([x[2].cpu() for x in predictions], dim=0)
            n_targets = sum([x[3] for x in predictions])

            cls_precision, _, cls_recall, _, _ = compute_detection_metrics_per_cls(
                preds_matched=preds_matched,
                preds_to_ignore=preds_to_ignore,
                preds_scores=preds_scores,
                n_targets=n_targets,
                recall_thresholds=self.recall_thresholds.cpu(),
                score_threshold=0,
                device="cpu",
            )

            precision[:, 0] = cls_precision.cpu().numpy()
            recall[:, 0] = cls_recall.cpu().numpy()

        def summarize(s):
            if len(s[s > -1]) == 0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s > -1])

            return mean_s

        metrics = {"AP": summarize(precision), "AR": summarize(recall)}

        if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
            for t in self.iou_thresholds_to_report:
                mask = np.where(t == self.iou_thresholds)[0]
                metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
                metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

        return metrics

`init(post_prediction_callback, num_joints, max_objects_per_image=20, oks_sigmas=None, iou_thresholds=None, recall_thresholds=None, iou_thresholds_to_report=None)`

Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values. If you need to get additional metrics (AP at specific threshold), pass these thresholds via iou_thresholds_to_report argument.

Parameters:

Name	Type	Description	Default
`post_prediction_callback`	`AbstractPoseEstimationPostPredictionCallback`	A callback to decode model predictions to poses. This should be callable that takes input (model predictions) and returns a tuple of (poses, scores)	required
`num_joints`	`int`	Number of joints per pose	required
`max_objects_per_image`	`int`	Maximum number of predicted poses to include in evaluation (Top-K poses will be used).	`20`
`oks_sigmas`	`Optional[Iterable]`	OKS sigma factor for custom keypoint detection dataset. If None, then metric will use default OKS from COCO and expect num_joints to be equal 17	`None`
`recall_thresholds`	`Optional[Iterable]`	List of recall thresholds to compute AP. If None, then will use default 101 recall thresholds from COCO in range [0..1]	`None`
`iou_thresholds`	`Optional[Iterable]`	List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)	`None`

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py

def __init__(
    self,
    post_prediction_callback: AbstractPoseEstimationPostPredictionCallback,
    num_joints: int,
    max_objects_per_image: int = 20,
    oks_sigmas: Optional[Iterable] = None,
    iou_thresholds: Optional[Iterable] = None,
    recall_thresholds: Optional[Iterable] = None,
    iou_thresholds_to_report: Optional[Iterable] = None,
):
    """
    Compute the AP & AR metrics for pose estimation. By default, this class returns only AP and AR values.
    If you need to get additional metrics (AP at specific threshold), pass these thresholds via `iou_thresholds_to_report` argument.

    :param post_prediction_callback:  A callback to decode model predictions to poses. This should be callable that takes input (model predictions)
                                      and returns a tuple of (poses, scores)

    :param num_joints:                Number of joints per pose

    :param max_objects_per_image:     Maximum number of predicted poses to include in evaluation (Top-K poses will be used).

    :param oks_sigmas:                OKS sigma factor for custom keypoint detection dataset.
                                      If None, then metric will use default OKS from COCO and expect num_joints to be equal 17

    :param recall_thresholds:         List of recall thresholds to compute AP.
                                      If None, then will use default 101 recall thresholds from COCO in range [0..1]

    :param iou_thresholds:            List of IoU thresholds to use. If None, then COCO version of IoU will be used (0.5 ... 0.95)

    :param: iou_thresholds_to_report: List of IoU thresholds to return in metric. By default, only AP/AR metrics are returned, but one
                                      may also request to return AP_0.5,AP_0.75,AR_0.5,AR_0.75 setting `iou_thresholds_to_report=[0.5, 0.75]`

    """
    super().__init__(dist_sync_on_step=False)
    self.num_joints = num_joints
    self.max_objects_per_image = max_objects_per_image
    self.stats_names = ["AP", "AR"]

    if recall_thresholds is None:
        recall_thresholds = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True, dtype=np.float32)
    self.recall_thresholds = torch.tensor(recall_thresholds, dtype=torch.float32)

    if iou_thresholds is None:
        iou_thresholds = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True, dtype=np.float32)
    self.iou_thresholds = torch.tensor(iou_thresholds, dtype=torch.float32)

    if iou_thresholds_to_report is not None:
        self.iou_thresholds_to_report = np.array([float(t) for t in iou_thresholds_to_report], dtype=np.float32)

        if not np.isin(self.iou_thresholds_to_report, self.iou_thresholds).all():
            missing = ~np.isin(self.iou_thresholds_to_report, self.iou_thresholds)
            raise RuntimeError(
                f"One or many IoU thresholds to report are not present in IoU thresholds. Missing thresholds: {self.iou_thresholds_to_report[missing]}"
            )

        self.stats_names += [f"AP_{t:.2f}" for t in self.iou_thresholds_to_report]
        self.stats_names += [f"AR_{t:.2f}" for t in self.iou_thresholds_to_report]
    else:
        self.iou_thresholds_to_report = None

    self.greater_component_is_better = dict((k, True) for k in self.stats_names)

    if oks_sigmas is None:
        if num_joints == 17:
            oks_sigmas = np.array([0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089])
        else:
            oks_sigmas = np.array([0.1] * num_joints)
            logger.warning(
                f"Using default OKS sigmas of `0.1` for a custom dataset with {num_joints} joints. "
                f"To silence this warning, you may want to specify OKS sigmas explicitly as it has direct impact on the AP score."
            )

    if len(oks_sigmas) != num_joints:
        raise ValueError(f"Length of oks_sigmas ({len(oks_sigmas)}) should be equal to num_joints {num_joints}")

    self.oks_sigmas = torch.tensor(oks_sigmas).float()

    self.component_names = list(self.greater_component_is_better.keys())
    self.components = len(self.component_names)

    self.post_prediction_callback = post_prediction_callback
    self.is_distributed = is_distributed()
    self.world_size = None
    self.rank = None
    self.add_state("predictions", default=[], dist_reduce_fx=None)

`compute()`

Compute the metrics for all the accumulated results.

Returns:

Type	Description
`Dict[str, Union[float, torch.Tensor]]`	Metrics of interest

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py

def compute(self) -> Dict[str, Union[float, torch.Tensor]]:
    """Compute the metrics for all the accumulated results.
    :return: Metrics of interest
    """
    T = len(self.iou_thresholds)
    K = 1  # num categories

    precision = -np.ones((T, K))
    recall = -np.ones((T, K))

    predictions = self.predictions  # All gathered by this time
    if len(predictions) > 0:
        preds_matched = torch.cat([x[0].cpu() for x in predictions], dim=0)
        preds_to_ignore = torch.cat([x[1].cpu() for x in predictions], dim=0)
        preds_scores = torch.cat([x[2].cpu() for x in predictions], dim=0)
        n_targets = sum([x[3] for x in predictions])

        cls_precision, _, cls_recall, _, _ = compute_detection_metrics_per_cls(
            preds_matched=preds_matched,
            preds_to_ignore=preds_to_ignore,
            preds_scores=preds_scores,
            n_targets=n_targets,
            recall_thresholds=self.recall_thresholds.cpu(),
            score_threshold=0,
            device="cpu",
        )

        precision[:, 0] = cls_precision.cpu().numpy()
        recall[:, 0] = cls_recall.cpu().numpy()

    def summarize(s):
        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])

        return mean_s

    metrics = {"AP": summarize(precision), "AR": summarize(recall)}

    if self.iou_thresholds_to_report is not None and len(self.iou_thresholds_to_report):
        for t in self.iou_thresholds_to_report:
            mask = np.where(t == self.iou_thresholds)[0]
            metrics[f"AP_{t:.2f}"] = summarize(precision[mask])
            metrics[f"AR_{t:.2f}"] = summarize(recall[mask])

    return metrics

`update(preds, target, gt_joints=None, gt_iscrowd=None, gt_bboxes=None, gt_areas=None, gt_samples=None)`

Decode the predictions and update the metric.

The signature of this method is a bit complicated, because we want to support both old-style form of passing groundtruth information (gt_joints, gt_iscrowd, gt_bboxes, gt_areas) and a new style of passing groundtruth information as a list of PoseEstimationSample objects.

Passing PoseEstimationSample objects is more convenient and default way to go with sample-centric datasets introduced in SuperGradients 3.3. Two options are mutually exclusive, so if you pass groundtruth_samples, all other arguments are ignored and vice versa.

Parameters:

Name	Type	Description	Default
	`preds`	Raw output of the model	required
`target`	`Any`	Targets for the model training (Not used for evaluation)	required
`gt_joints`	`List[np.ndarray]`	List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3). Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints. This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation, where predictions rescaled back to original size of the image. However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate system of the predicted image.	`None`
`gt_iscrowd`	`List[np.ndarray]`	Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation; If not provided, all instances are considered as non-crowd targets. For instance, in CrowdPose all instances are considered as "non-crowd".	`None`
`gt_bboxes`	`List[np.ndarray]`	Bounding boxes of the groundtruth instances (XYWH). This is COCO-specific and is used in OKS computation for instances w/o visible keypoints. If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.	`None`
`gt_areas`	`List[np.ndarray]`	Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box, so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation. If not provided, the area is computed as the product of the width and height of the bounding box. (For instance this is used in CrowdPose dataset)	`None`
`gt_samples`	`List[PoseEstimationSample]`	List of ground-truth samples	`None`

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py

@torch.no_grad()
def update(
    self,
    preds: Any,
    target: Any,
    gt_joints: List[np.ndarray] = None,
    gt_iscrowd: List[np.ndarray] = None,
    gt_bboxes: List[np.ndarray] = None,
    gt_areas: List[np.ndarray] = None,
    gt_samples: List[PoseEstimationSample] = None,
):
    """
    Decode the predictions and update the metric.

    The signature of this method is a bit complicated, because we want to support both old-style form of
    passing groundtruth information (gt_joints, gt_iscrowd, gt_bboxes, gt_areas) and a new style of passing
    groundtruth information as a list of PoseEstimationSample objects.

    Passing PoseEstimationSample objects is more convenient and default way to go with sample-centric datasets introduced in SuperGradients 3.3.
    Two options are mutually exclusive, so if you pass groundtruth_samples, all other arguments are ignored and vice versa.

    :param preds :      Raw output of the model
    :param target:      Targets for the model training (Not used for evaluation)
    :param gt_joints:   List of ground-truth joints for each image in the batch. Each element is a numpy array of shape (num_instances, num_joints, 3).
                        Note that augmentation/preprocessing transformations (Affine transforms specifically) must also be applied to gt_joints.
                        This is to ensure joint coordinates are transforms identically as image. This is differs form COCO evaluation,
                        where predictions rescaled back to original size of the image.
                        However, this makes code much more (unnecessary) complicated, so we do it differently and evaluate joints in the coordinate
                        system of the predicted image.

    :param gt_iscrowd:  Optional argument indicating which instance is annotated with `iscrowd` flog and is not used for evaluation;
                        If not provided, all instances are considered as non-crowd targets.
                        For instance, in CrowdPose all instances are considered as "non-crowd".

    :param gt_bboxes:   Bounding boxes of the groundtruth instances (XYWH).
                        This is COCO-specific and is used in OKS computation for instances w/o visible keypoints.
                        If not provided, the bounding box is computed as the minimum bounding box that contains all visible keypoints.

    :param gt_areas:    Area of the groundtruth area. in COCO this is the area of the corresponding segmentation mask and not the bounding box,
                        so it cannot be computed programmatically. It's value used in object-keypoint similarity metric (OKS) computation.
                        If not provided, the area is computed as the product of the width and height of the bounding box.
                        (For instance this is used in CrowdPose dataset)
    :param gt_samples:  List of ground-truth samples

    """
    predictions: List[PoseEstimationPredictions] = self.post_prediction_callback(preds)  # Decode raw predictions into poses

    if gt_samples is not None:
        self._update_with_samples(predictions, gt_samples)
    else:
        self._update_with_old_style_args(predictions, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)

`update_single_image(predicted_poses, predicted_scores, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)`

Update internal state of metric class with a single image predictions & corresponding groundtruth. Method compute OKS for predicted poses, match them to groundtruth poses and update internal state of the metric.

Parameters:

Name	Type	Description	Default
`predicted_poses`	`Union[Tensor, np.ndarray]`	Predicted poses of shape (num_instances, num_joints, 3)	required
`predicted_scores`	`Union[Tensor, np.ndarray]`	Predicted scores of shape (num_instances,)	required
`gt_joints`	`np.ndarray`	Groundtruth joints of shape (num_instances, num_joints, 3)	required
`gt_bboxes`	`Optional[np.ndarray]`	Groundtruth bounding boxes of shape (num_instances, 4) in XYWH format	required
`gt_areas`	`Optional[np.ndarray]`	Groundtruth areas of shape (num_instances,)	required
`gt_iscrowd`	`Optional[np.ndarray]`	Groundtruth is_crowd flag of shape (num_instances,)	required

Source code in src/super_gradients/training/metrics/pose_estimation_metrics.py

def update_single_image(
    self,
    predicted_poses: Union[Tensor, np.ndarray],
    predicted_scores: Union[Tensor, np.ndarray],
    gt_joints: np.ndarray,
    gt_bboxes: Optional[np.ndarray],
    gt_areas: Optional[np.ndarray],
    gt_iscrowd: Optional[np.ndarray],
) -> None:
    """
    Update internal state of metric class with a single image predictions & corresponding groundtruth.
    Method compute OKS for predicted poses, match them to groundtruth poses and update internal state of the metric.
    :param predicted_poses:  Predicted poses of shape (num_instances, num_joints, 3)
    :param predicted_scores: Predicted scores of shape (num_instances,)
    :param gt_joints:        Groundtruth joints of shape (num_instances, num_joints, 3)
    :param gt_bboxes:        Groundtruth bounding boxes of shape (num_instances, 4) in XYWH format
    :param gt_areas:         Groundtruth areas of shape (num_instances,)
    :param gt_iscrowd:       Groundtruth is_crowd flag of shape (num_instances,)
    """
    if len(predicted_poses) == 0 and len(gt_joints) == 0:
        return
    if len(predicted_poses) != len(predicted_scores):
        raise ValueError("Length of predicted poses and scores should be equal. Got {} and {}".format(len(predicted_poses), len(predicted_scores)))

    predicted_poses = convert_to_tensor(predicted_poses, dtype=torch.float32, device="cpu")
    predicted_scores = convert_to_tensor(predicted_scores, dtype=torch.float32, device="cpu")

    if gt_bboxes is None:
        gt_bboxes = compute_visible_bbox_xywh(torch.tensor(gt_joints[:, :, 0:2]), torch.tensor(gt_joints[:, :, 2]))

    if gt_areas is None:
        gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

    if gt_iscrowd is None:
        gt_iscrowd = [False] * len(gt_joints)

    gt_keypoints = convert_to_tensor(gt_joints, dtype=torch.float32, device="cpu")
    gt_areas = convert_to_tensor(gt_areas, dtype=torch.float32, device="cpu")
    gt_bboxes = convert_to_tensor(gt_bboxes, dtype=torch.float32, device="cpu")
    gt_iscrowd = convert_to_tensor(gt_iscrowd, dtype=torch.bool, device="cpu")

    gt_keypoints_xy = gt_keypoints[:, :, 0:2]
    gt_keypoints_visibility = gt_keypoints[:, :, 2]
    gt_all_kpts_invisible = gt_keypoints_visibility.eq(0).all(dim=1)
    gt_is_ignore = gt_all_kpts_invisible | gt_iscrowd

    targets = gt_keypoints_xy[~gt_is_ignore] if len(gt_joints) else []
    targets_visibilities = gt_keypoints_visibility[~gt_is_ignore] if len(gt_joints) else []
    targets_areas = gt_areas[~gt_is_ignore] if len(gt_joints) else []
    targets_bboxes = gt_bboxes[~gt_is_ignore]
    targets_ignored = gt_is_ignore[~gt_is_ignore]

    crowd_targets = gt_keypoints_xy[gt_is_ignore] if len(gt_joints) else []
    crowd_visibilities = gt_keypoints_visibility[gt_is_ignore] if len(gt_joints) else []
    crowd_targets_areas = gt_areas[gt_is_ignore]
    crowd_targets_bboxes = gt_bboxes[gt_is_ignore]

    mr = compute_img_keypoint_matching(
        predicted_poses,
        predicted_scores,
        #
        targets=targets,
        targets_visibilities=targets_visibilities,
        targets_areas=targets_areas,
        targets_bboxes=targets_bboxes,
        targets_ignored=targets_ignored,
        #
        crowd_targets=crowd_targets,
        crowd_visibilities=crowd_visibilities,
        crowd_targets_areas=crowd_targets_areas,
        crowd_targets_bboxes=crowd_targets_bboxes,
        #
        iou_thresholds=self.iou_thresholds.to("cpu"),
        sigmas=self.oks_sigmas.to("cpu"),
        top_k=self.max_objects_per_image,
    )

    self.predictions.append((mr.preds_matched.cpu(), mr.preds_to_ignore.cpu(), mr.preds_scores.cpu(), int(mr.num_targets)))

`compute_img_keypoint_matching(preds, pred_scores, targets, targets_visibilities, targets_areas, targets_bboxes, targets_ignored, crowd_targets, crowd_visibilities, crowd_targets_areas, crowd_targets_bboxes, iou_thresholds, sigmas, top_k)`

Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

Parameters:

Name	Type	Description	Default
`preds`	`Tensor`	Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons. Last dimension encode X,Y and confidence score of each joint	required
`pred_scores`	`Tensor`	Tensor of shape (K) - Confidence scores for each pose	required
`targets`	`Tensor`	Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons	required
`targets_visibilities`	`Tensor`	Visibility status for each keypoint (M, NumJoints). Values are 0 - invisible, 1 - occluded, 2 - fully visible	required
`targets_areas`	`Tensor`	Tensor of shape (M) - Areas of target objects	required
`targets_bboxes`	`Tensor`	Tensor of shape (M,4) - Bounding boxes (XYWH) of targets	required
`targets_ignored`	`Tensor`	Tensor of shape (M) - Array of target that marked as ignored (E.g all keypoints are not visible or target does not fit the desired area range)	required
`crowd_targets`	`Tensor`	Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons Last dimension encode X,Y and visibility score of each joint: (0 - invisible, 1 - occluded, 2 - fully visible)	required
`crowd_visibilities`	`Tensor`	Visibility status for each keypoint of crowd targets (Mc, NumJoints). Values are 0 - invisible, 1 - occluded, 2 - fully visible	required
`crowd_targets_areas`	`Tensor`	Tensor of shape (Mc) - Areas of target objects	required
`crowd_targets_bboxes`	`Tensor`	Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets	required
`iou_thresholds`	`torch.Tensor`	IoU Threshold to compute the mAP	required
`sigmas`	`Tensor`	Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard' it is to locate the exact groundtruth position of the joint.	required
`top_k`	`int`	Number of predictions to keep, ordered by confidence score	required

Returns:

Type Description

ImageKeypointMatchingResult

:preds_matched: Tensor of shape (min(top_k, len(preds)), n_iou_thresholds) True when prediction (i) is matched with a target with respect to the (j)th IoU threshold :preds_to_ignore: Tensor of shape (min(top_k, len(preds)), n_iou_thresholds) True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold :preds_scores: Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions :num_targets: Number of groundtruth targets (total num targets minus number of ignored)

Source code in src/super_gradients/training/metrics/pose_estimation_utils.py

def compute_img_keypoint_matching(
    preds: Tensor,
    pred_scores: Tensor,
    targets: Tensor,
    targets_visibilities: Tensor,
    targets_areas: Tensor,
    targets_bboxes: Tensor,
    targets_ignored: Tensor,
    crowd_targets: Tensor,
    crowd_visibilities: Tensor,
    crowd_targets_areas: Tensor,
    crowd_targets_bboxes: Tensor,
    iou_thresholds: torch.Tensor,
    sigmas: Tensor,
    top_k: int,
) -> ImageKeypointMatchingResult:
    """
    Match predictions and the targets (ground truth) with respect to IoU and confidence score for a given image.

    :param preds:            Tensor of shape (K, NumJoints, 3) - Array of predicted skeletons.
                             Last dimension encode X,Y and confidence score of each joint

    :param pred_scores:      Tensor of shape (K) - Confidence scores for each pose

    :param targets:          Targets joints (M, NumJoints, 2) - Array of groundtruth skeletons

    :param targets_visibilities: Visibility status for each keypoint (M, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param targets_areas:    Tensor of shape (M) - Areas of target objects

    :param targets_bboxes:   Tensor of shape (M,4) - Bounding boxes (XYWH) of targets

    :param targets_ignored:  Tensor of shape (M) - Array of target that marked as ignored
                             (E.g all keypoints are not visible or target does not fit the desired area range)

    :param crowd_targets:    Targets joints (Mc, NumJoints, 3) - Array of groundtruth skeletons
                             Last dimension encode X,Y and visibility score of each joint:
                             (0 - invisible, 1 - occluded, 2 - fully visible)

    :param crowd_visibilities: Visibility status for each keypoint of crowd targets (Mc, NumJoints).
                             Values are 0 - invisible, 1 - occluded, 2 - fully visible

    :param crowd_targets_areas: Tensor of shape (Mc) - Areas of target objects

    :param crowd_targets_bboxes: Tensor of shape (Mc, 4) - Bounding boxes (XYWH) of crowd targets

    :param iou_thresholds:  IoU Threshold to compute the mAP

    :param sigmas:          Tensor of shape (NumJoints) with sigmas for each joint. Sigma value represent how 'hard'
                            it is to locate the exact groundtruth position of the joint.

    :param top_k:           Number of predictions to keep, ordered by confidence score

    :return:
        :preds_matched:     Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a target with respect to the (j)th IoU threshold

        :preds_to_ignore:   Tensor of shape (min(top_k, len(preds)), n_iou_thresholds)
                                True when prediction (i) is matched with a crowd target with respect to the (j)th IoU threshold

        :preds_scores:      Tensor of shape (min(top_k, len(preds))) with scores of top-k predictions

        :num_targets:       Number of groundtruth targets (total num targets minus number of ignored)

    """
    num_iou_thresholds = len(iou_thresholds)

    device = preds.device if torch.is_tensor(preds) else (targets.device if torch.is_tensor(targets) else "cpu")
    num_targets = len(targets) - torch.count_nonzero(targets_ignored)

    preds_matched = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)
    targets_matched = torch.zeros(len(targets), num_iou_thresholds, dtype=torch.bool, device=device)
    preds_to_ignore = torch.zeros(len(preds), num_iou_thresholds, dtype=torch.bool, device=device)

    if preds is None or len(preds) == 0:
        return ImageKeypointMatchingResult(
            preds_matched=preds_matched,
            preds_to_ignore=preds_to_ignore,
            preds_scores=pred_scores,
            num_targets=num_targets.item(),
        )

    # Ignore all but the predictions that were top_k
    k = min(top_k, len(pred_scores))
    preds_idx_to_use = torch.topk(pred_scores, k=k, sorted=True, largest=True).indices
    preds_to_ignore[:, :] = True
    preds_to_ignore[preds_idx_to_use] = False

    if len(targets) > 0:
        iou = compute_oks(preds[preds_idx_to_use], targets, targets_visibilities, sigmas, gt_areas=targets_areas, gt_bboxes=targets_bboxes)

        # The matching priority is first detection confidence and then IoU value.
        # The detection is already sorted by confidence in NMS, so here for each prediction we order the targets by iou.
        sorted_iou, target_sorted = iou.sort(descending=True, stable=True)

        # Only iterate over IoU values higher than min threshold to speed up the process
        for pred_selected_i, target_sorted_i in (sorted_iou > iou_thresholds[0]).nonzero(as_tuple=False):

            # pred_selected_i and target_sorted_i are relative to filters/sorting, so we extract their absolute indexes
            pred_i = preds_idx_to_use[pred_selected_i]
            target_i = target_sorted[pred_selected_i, target_sorted_i]

            # Vector[j], True when IoU(pred_i, target_i) is above the (j)th threshold
            is_iou_above_threshold = sorted_iou[pred_selected_i, target_sorted_i] > iou_thresholds

            # Vector[j], True when both pred_i and target_i are not matched yet for the (j)th threshold
            are_candidates_free = torch.logical_and(~preds_matched[pred_i, :], ~targets_matched[target_i, :])

            # Vector[j], True when (pred_i, target_i) can be matched for the (j)th threshold
            are_candidates_good = torch.logical_and(is_iou_above_threshold, are_candidates_free)

            is_matching_with_ignore = are_candidates_free & are_candidates_good & targets_ignored[target_i]

            if preds_matched[pred_i].any() and is_matching_with_ignore.any():
                continue

            # For every threshold (j) where target_i and pred_i can be matched together ( are_candidates_good[j]==True )
            # fill the matching placeholders with True
            targets_matched[target_i, are_candidates_good] = True
            preds_matched[pred_i, are_candidates_good] = True

            preds_to_ignore[pred_i] = torch.logical_or(preds_to_ignore[pred_i], is_matching_with_ignore)

            # When all the targets are matched with a prediction for every IoU Threshold, stop.
            if targets_matched.all():
                break

    # Crowd targets can be matched with many predictions.
    # Therefore, for every prediction we just need to check if it has IoA large enough with any crowd target.
    if len(crowd_targets) > 0:
        # shape = (n_preds_to_use x n_crowd_targets)
        ioa = compute_oks(
            preds[preds_idx_to_use],
            crowd_targets,
            crowd_visibilities,
            sigmas,
            gt_areas=crowd_targets_areas,
            gt_bboxes=crowd_targets_bboxes,
        )

        # For each prediction, we keep it's highest score with any crowd target (of same class)
        # shape = (n_preds_to_use)
        best_ioa, _ = ioa.max(1)

        # If a prediction has IoA higher than threshold (with any target of same class), then there is a match
        # shape = (n_preds_to_use x iou_thresholds)
        is_matching_with_crowd = best_ioa.view(-1, 1) > iou_thresholds.view(1, -1)

        preds_to_ignore[preds_idx_to_use] = torch.logical_or(preds_to_ignore[preds_idx_to_use], is_matching_with_crowd)

    return ImageKeypointMatchingResult(
        preds_matched=preds_matched[preds_idx_to_use],
        preds_to_ignore=preds_to_ignore[preds_idx_to_use],
        preds_scores=pred_scores[preds_idx_to_use],
        num_targets=num_targets.item(),
    )

`compute_oks(pred_joints, gt_joints, gt_keypoint_visibility, sigmas, gt_areas=None, gt_bboxes=None)`

Parameters:

Name	Type	Description	Default
`pred_joints`	`Tensor`	[K, NumJoints, 2] or [K, NumJoints, 3]	required
`pred_scores`		[K]	required
`gt_joints`	`Tensor`	[M, NumJoints, 2]	required
`gt_keypoint_visibility`	`Tensor`	[M, NumJoints]	required
`gt_areas`	`Tensor`	[M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately. If None, we will use area of bounding box of each instance computed from gt_joints.	`None`
`gt_bboxes`	`Tensor`	[M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.	`None`
`sigmas`	`Tensor`	[NumJoints]	required

Returns:

Type	Description
`np.ndarray`	IoU matrix [K, M]

Source code in src/super_gradients/training/metrics/pose_estimation_utils.py

def compute_oks(
    pred_joints: Tensor,
    gt_joints: Tensor,
    gt_keypoint_visibility: Tensor,
    sigmas: Tensor,
    gt_areas: Tensor = None,
    gt_bboxes: Tensor = None,
) -> np.ndarray:
    """

    :param pred_joints: [K, NumJoints, 2] or [K, NumJoints, 3]
    :param pred_scores: [K]
    :param gt_joints:   [M, NumJoints, 2]
    :param gt_keypoint_visibility: [M, NumJoints]
    :param gt_areas: [M] Area of each ground truth instance. COCOEval uses area of the instance mask to scale OKs, so it must be provided separately.
        If None, we will use area of bounding box of each instance computed from gt_joints.

    :param gt_bboxes: [M, 4] Bounding box (X,Y,W,H) of each ground truth instance. If None, we will use bounding box of each instance computed from gt_joints.
    :param sigmas: [NumJoints]
    :return: IoU matrix [K, M]
    """

    ious = torch.zeros((len(pred_joints), len(gt_joints)), device=pred_joints.device)
    vars = (sigmas * 2) ** 2

    if gt_bboxes is None:
        gt_bboxes = compute_visible_bbox_xywh(gt_joints, gt_keypoint_visibility)

    if gt_areas is None:
        gt_areas = gt_bboxes[:, 2] * gt_bboxes[:, 3]

    # compute oks between each detection and ground truth object
    for gt_index, (gt_keypoints, gt_keypoint_visibility, gt_bbox, gt_area) in enumerate(zip(gt_joints, gt_keypoint_visibility, gt_bboxes, gt_areas)):
        # create bounds for ignore regions(double the gt bbox)
        xg = gt_keypoints[:, 0]
        yg = gt_keypoints[:, 1]
        k1 = torch.count_nonzero(gt_keypoint_visibility > 0)

        x0 = gt_bbox[0] - gt_bbox[2]
        x1 = gt_bbox[0] + gt_bbox[2] * 2
        y0 = gt_bbox[1] - gt_bbox[3]
        y1 = gt_bbox[1] + gt_bbox[3] * 2

        for pred_index, pred_keypoints in enumerate(pred_joints):
            xd = pred_keypoints[:, 0]
            yd = pred_keypoints[:, 1]
            if k1 > 0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                dx = (x0 - xd).clamp_min(0) + (xd - x1).clamp_min(0)
                dy = (y0 - yd).clamp_min(0) + (yd - y1).clamp_min(0)

            e = (dx**2 + dy**2) / vars / (gt_area + torch.finfo(torch.float64).eps) / 2

            if k1 > 0:
                e = e[gt_keypoint_visibility > 0]
            ious[pred_index, gt_index] = torch.sum(torch.exp(-e)) / e.shape[0]

    return ious

`compute_visible_bbox_xywh(joints, visibility_mask)`

Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

Parameters:

Name	Type	Description	Default
`joints`	`Tensor`	[Num Instances, Num Joints, 2+] last channel must have dimension of at least 2 that is considered to contain (X,Y) coordinates of the keypoint	required
`visibility_mask`	`Tensor`	[Num Instances, Num Joints]	required

Returns:

Type	Description
`np.ndarray`	A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH

Source code in src/super_gradients/training/metrics/pose_estimation_utils.py

def compute_visible_bbox_xywh(joints: Tensor, visibility_mask: Tensor) -> np.ndarray:
    """
    Compute the bounding box (X,Y,W,H) of the visible joints for each instance.

    :param joints:  [Num Instances, Num Joints, 2+] last channel must have dimension of
                    at least 2 that is considered to contain (X,Y) coordinates of the keypoint
    :param visibility_mask: [Num Instances, Num Joints]
    :return: A numpy array [Num Instances, 4] where last dimension contains bbox in format XYWH
    """
    visibility_mask = visibility_mask > 0
    initial_value = 1_000_000

    x1 = torch.min(joints[:, :, 0], where=visibility_mask, initial=initial_value, dim=-1)
    y1 = torch.min(joints[:, :, 1], where=visibility_mask, initial=initial_value, dim=-1)

    x1[x1 == initial_value] = 0
    y1[y1 == initial_value] = 0

    x2 = torch.max(joints[:, :, 0], where=visibility_mask, initial=0, dim=-1)
    y2 = torch.max(joints[:, :, 1], where=visibility_mask, initial=0, dim=-1)

    w = x2 - x1
    h = y2 - y1

    return torch.stack([x1, y1, w, h], dim=-1)

`AbstractMetricsArgsPrepFn`

Bases: ABC

Abstract preprocess metrics arguments class.

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

class AbstractMetricsArgsPrepFn(ABC):
    """
    Abstract preprocess metrics arguments class.
    """

    @abstractmethod
    def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        All base classes must implement this function and return a tuple of torch tensors (predictions, target).
        """
        raise NotImplementedError()

`call(preds, target)` `abstractmethod`

All base classes must implement this function and return a tuple of torch tensors (predictions, target).

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

@abstractmethod
def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    All base classes must implement this function and return a tuple of torch tensors (predictions, target).
    """
    raise NotImplementedError()

`Dice`

Bases: torchmetrics.JaccardIndex

Dice Coefficient Metric

Args: num_classes: Number of classes in the dataset. ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. Has no effect if given an int that is not in the range [0, num_classes-1]. By default, no index is ignored, and all classes are used. IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error. threshold: Threshold value for binary or multi-label probabilities. reduction: a method to reduce metric score over labels:

    - ``'elementwise_mean'``: takes the mean (default)
    - ``'sum'``: takes the sum
    - ``'none'``: no reduction will be applied

metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
    By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

@register_metric(Metrics.DICE)
class Dice(torchmetrics.JaccardIndex):
    """
    Dice Coefficient Metric

    Args:
        num_classes: Number of classes in the dataset.
        ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore.
            If given, this class index does not contribute to the returned score, regardless of reduction method.
            Has no effect if given an int that is not in the range [0, num_classes-1].
            By default, no index is ignored, and all classes are used.
            IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error.
        threshold: Threshold value for binary or multi-label probabilities.
        reduction: a method to reduce metric score over labels:

            - ``'elementwise_mean'``: takes the mean (default)
            - ``'sum'``: takes the sum
            - ``'none'``: no reduction will be applied

        metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
            By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
    """

    def __init__(
        self,
        num_classes: int,
        dist_sync_on_step: bool = False,
        ignore_index: Optional[int] = None,
        reduction: str = "elementwise_mean",
        threshold: float = 0.5,
        metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None,
    ):

        if num_classes <= 1:
            raise ValueError(f"Dice class only for multi-class usage! For binary usage, please call {BinaryDice.__name__}")

        ignore_index, ignore_index_list, num_classes, unfiltered_num_classes = _handle_multiple_ignored_inds(ignore_index, num_classes)

        super().__init__(num_classes=num_classes, dist_sync_on_step=dist_sync_on_step, ignore_index=ignore_index, reduction=reduction, threshold=threshold)

        self.ignore_index_list = ignore_index_list
        self.unfiltered_num_classes = unfiltered_num_classes
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)
        self.greater_is_better = True

    def update(self, preds, target: torch.Tensor):
        preds, target = self.metrics_args_prep_fn(preds, target)
        if self.ignore_index_list is not None:
            target = _map_ignored_inds(target, self.ignore_index_list, self.unfiltered_num_classes)
            preds = _map_ignored_inds(preds, self.ignore_index_list, self.unfiltered_num_classes)
        super().update(preds=preds, target=target)

    def compute(self) -> torch.Tensor:
        """Computes Dice coefficient"""
        return _dice_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)

`compute()`

Computes Dice coefficient

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

def compute(self) -> torch.Tensor:
    """Computes Dice coefficient"""
    return _dice_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction)

`IoU`

Bases: torchmetrics.JaccardIndex

IoU Metric

Args: num_classes: Number of classes in the dataset. ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. Has no effect if given an int that is not in the range [0, num_classes-1]. By default, no index is ignored, and all classes are used. IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error. threshold: Threshold value for binary or multi-label probabilities. reduction: a method to reduce metric score over labels:

    - ``'elementwise_mean'``: takes the mean (default)
    - ``'sum'``: takes the sum
    - ``'none'``: no reduction will be applied

metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
    By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

@register_metric(Metrics.IOU)
class IoU(torchmetrics.JaccardIndex):
    """
    IoU Metric

    Args:
        num_classes: Number of classes in the dataset.
        ignore_index: Optional[Union[int, List[int]]], specifying a target class(es) to ignore.
            If given, this class index does not contribute to the returned score, regardless of reduction method.
            Has no effect if given an int that is not in the range [0, num_classes-1].
            By default, no index is ignored, and all classes are used.
            IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error.
        threshold: Threshold value for binary or multi-label probabilities.
        reduction: a method to reduce metric score over labels:

            - ``'elementwise_mean'``: takes the mean (default)
            - ``'sum'``: takes the sum
            - ``'none'``: no reduction will be applied

        metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
            By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
    """

    def __init__(
        self,
        num_classes: int,
        dist_sync_on_step: bool = False,
        ignore_index: Optional[Union[int, List[int]]] = None,
        reduction: str = "elementwise_mean",
        threshold: float = 0.5,
        metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None,
    ):

        if num_classes <= 1:
            raise ValueError(f"IoU class only for multi-class usage! For binary usage, please call {BinaryIOU.__name__}")
        if isinstance(ignore_index, typing.Iterable) and reduction == "none":
            raise ValueError("passing multiple ignore indices ")
        ignore_index, ignore_index_list, num_classes, unfiltered_num_classes = _handle_multiple_ignored_inds(ignore_index, num_classes)

        super().__init__(num_classes=num_classes, dist_sync_on_step=dist_sync_on_step, ignore_index=ignore_index, reduction=reduction, threshold=threshold)

        self.unfiltered_num_classes = unfiltered_num_classes
        self.ignore_index_list = ignore_index_list
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)
        self.greater_is_better = True

    def update(self, preds, target: torch.Tensor):
        preds, target = self.metrics_args_prep_fn(preds, target)
        if self.ignore_index_list is not None:
            target = _map_ignored_inds(target, self.ignore_index_list, self.unfiltered_num_classes)
            preds = _map_ignored_inds(preds, self.ignore_index_list, self.unfiltered_num_classes)
        super().update(preds=preds, target=target)

`PixelAccuracy`

Bases: Metric

Pixel Accuracy

Args: ignore_label: Optional[Union[int, List[int]]], specifying a target class(es) to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. Has no effect if given an int that is not in the range [0, num_classes-1]. By default, no index is ignored, and all classes are used. IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error. reduction: a method to reduce metric score over labels:

    - ``'elementwise_mean'``: takes the mean (default)
    - ``'sum'``: takes the sum
    - ``'none'``: no reduction will be applied

metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
    By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

@register_metric(Metrics.PIXEL_ACCURACY)
class PixelAccuracy(Metric):
    """
    Pixel Accuracy

    Args:
        ignore_label: Optional[Union[int, List[int]]], specifying a target class(es) to ignore.
            If given, this class index does not contribute to the returned score, regardless of reduction method.
            Has no effect if given an int that is not in the range [0, num_classes-1].
            By default, no index is ignored, and all classes are used.
            IMPORTANT: reduction="none" alongside with a list of ignored indices is not supported and will raise an error.
        reduction: a method to reduce metric score over labels:

            - ``'elementwise_mean'``: takes the mean (default)
            - ``'sum'``: takes the sum
            - ``'none'``: no reduction will be applied

        metrics_args_prep_fn: Callable, inputs preprocess function applied on preds, target before updating metrics.
            By default set to PreprocessSegmentationMetricsArgs(apply_arg_max=True)
    """

    def __init__(self, ignore_label: Union[int, List[int]] = -100, dist_sync_on_step=False, metrics_args_prep_fn: Optional[AbstractMetricsArgsPrepFn] = None):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.ignore_label = ignore_label
        self.greater_is_better = True
        self.add_state("total_correct", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state("total_label", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.metrics_args_prep_fn = metrics_args_prep_fn or PreprocessSegmentationMetricsArgs(apply_arg_max=True)

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        predict, target = self.metrics_args_prep_fn(preds, target)
        labeled_mask = self._handle_multiple_ignored_inds(target)

        pixel_labeled = torch.sum(labeled_mask)
        pixel_correct = torch.sum((predict == target) * labeled_mask)
        self.total_correct += pixel_correct
        self.total_label += pixel_labeled

    def _handle_multiple_ignored_inds(self, target):
        if isinstance(self.ignore_label, typing.Iterable):
            evaluated_classes_mask = torch.ones_like(target)
            for ignored_label in self.ignore_label:
                evaluated_classes_mask = evaluated_classes_mask.masked_fill(target.eq(ignored_label), 0)
        else:
            evaluated_classes_mask = target.ne(self.ignore_label)

        return evaluated_classes_mask

    def compute(self):
        _total_correct = self.total_correct.cpu().detach().numpy().astype("int64")
        _total_label = self.total_label.cpu().detach().numpy().astype("int64")
        pix_acc = np.float64(1.0) * _total_correct / (np.spacing(1, dtype=np.float64) + _total_label)
        return pix_acc

`PreprocessSegmentationMetricsArgs`

Bases: AbstractMetricsArgsPrepFn

Default segmentation inputs preprocess function before updating segmentation metrics, handles multiple inputs and apply normalizations.

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

class PreprocessSegmentationMetricsArgs(AbstractMetricsArgsPrepFn):
    """
    Default segmentation inputs preprocess function before updating segmentation metrics, handles multiple inputs and
    apply normalizations.
    """

    def __init__(self, apply_arg_max: bool = False, apply_sigmoid: bool = False):
        """
        :param apply_arg_max: Whether to apply argmax on predictions tensor.
        :param apply_sigmoid:  Whether to apply sigmoid on predictions tensor.
        """
        self.apply_arg_max = apply_arg_max
        self.apply_sigmoid = apply_sigmoid

    def __call__(self, preds, target: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        # WHEN DEALING WITH MULTIPLE OUTPUTS- OUTPUTS[0] IS THE MAIN SEGMENTATION MAP
        if isinstance(preds, (tuple, list)):
            preds = preds[0]
        if self.apply_arg_max:
            _, preds = torch.max(preds, 1)
        elif self.apply_sigmoid:
            preds = torch.sigmoid(preds)

        target = target.long()
        return preds, target

`init(apply_arg_max=False, apply_sigmoid=False)`

Parameters:

Name	Type	Description	Default
`apply_arg_max`	`bool`	Whether to apply argmax on predictions tensor.	`False`
`apply_sigmoid`	`bool`	Whether to apply sigmoid on predictions tensor.	`False`

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

def __init__(self, apply_arg_max: bool = False, apply_sigmoid: bool = False):
    """
    :param apply_arg_max: Whether to apply argmax on predictions tensor.
    :param apply_sigmoid:  Whether to apply sigmoid on predictions tensor.
    """
    self.apply_arg_max = apply_arg_max
    self.apply_sigmoid = apply_sigmoid

`batch_intersection_union(predict, target, nclass)`

Batch Intersection of Union

Parameters:

Name	Type	Description	Default
`predict`	`torch.Tensor`	input 4D tensor	required
`target`	`torch.Tensor`	label 3D tensor	required
`nclass`	`int`	number of categories (int)	required

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

def batch_intersection_union(predict: torch.Tensor, target: torch.Tensor, nclass: int) -> Tuple[float, float]:
    """Batch Intersection of Union

    :param predict: input 4D tensor
    :param target: label 3D tensor
    :param nclass: number of categories (int)
    """
    _, predict = torch.max(predict, 1)
    mini = 1
    maxi = nclass
    nbins = nclass
    predict = predict.cpu().numpy() + 1
    target = target.cpu().numpy() + 1

    predict = predict * (target > 0).astype(predict.dtype)
    intersection = predict * (predict == target)
    # areas of intersection and union
    area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
    area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
    area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
    area_union = area_pred + area_lab - area_inter
    assert (area_inter <= area_union).all(), "Intersection area should be smaller than Union area"
    return area_inter, area_union

`batch_pix_accuracy(predict, target)`

Batch Pixel Accuracy

Parameters:

Name	Type	Description	Default
`predict`	`torch.Tensor`	input 4D tensor	required
`target`	`torch.Tensor`	label 3D tensor	required

Source code in src/super_gradients/training/metrics/segmentation_metrics.py

def batch_pix_accuracy(predict: torch.Tensor, target: torch.Tensor) -> Tuple[float, float]:
    """Batch Pixel Accuracy

    :param predict: input 4D tensor
    :param target: label 3D tensor
    """
    _, predict = torch.max(predict, 1)
    predict = predict.cpu().numpy() + 1
    target = target.cpu().numpy() + 1
    pixel_labeled = np.sum(target > 0)
    pixel_correct = np.sum((predict == target) * (target > 0))
    assert pixel_correct <= pixel_labeled, "Correct area should be smaller than Labeled"
    return pixel_correct, pixel_labeled

Metrics

ToyTestClassificationMetric

accuracy(output, target, topk=(1))

Delta1

Delta2

Delta3

DeltaMetric

compute_delta_pixels(preds, target)

DepthEstimationMetricBase

process_preds_and_target(preds, target)

DepthMAE

DepthMAPE

DepthMSE

DepthMSLE

DepthRMSE

DetectionMetrics

compute()

update(preds, target, device, inputs, crowd_targets=None)

DetectionMetricsDistanceBased

update(preds, target, device, inputs, crowd_targets=None)

flatten_metrics_dict(metrics_dict)

get_logging_values(loss_loggings, metrics, criterion=None)

get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names)

get_metrics_results_tuple(metrics_collection)

get_metrics_titles(metrics_collection)

get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items)

PoseEstimationMetrics

__init__(post_prediction_callback, num_joints, max_objects_per_image=20, oks_sigmas=None, iou_thresholds=None, recall_thresholds=None, iou_thresholds_to_report=None)

compute()

update(preds, target, gt_joints=None, gt_iscrowd=None, gt_bboxes=None, gt_areas=None, gt_samples=None)

update_single_image(predicted_poses, predicted_scores, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)

compute_img_keypoint_matching(preds, pred_scores, targets, targets_visibilities, targets_areas, targets_bboxes, targets_ignored, crowd_targets, crowd_visibilities, crowd_targets_areas, crowd_targets_bboxes, iou_thresholds, sigmas, top_k)

compute_oks(pred_joints, gt_joints, gt_keypoint_visibility, sigmas, gt_areas=None, gt_bboxes=None)

compute_visible_bbox_xywh(joints, visibility_mask)

AbstractMetricsArgsPrepFn

__call__(preds, target) abstractmethod

Dice

compute()

IoU

PixelAccuracy

PreprocessSegmentationMetricsArgs

__init__(apply_arg_max=False, apply_sigmoid=False)

batch_intersection_union(predict, target, nclass)

batch_pix_accuracy(predict, target)

`ToyTestClassificationMetric`

`accuracy(output, target, topk=(1))`

`Delta1`

`Delta2`

`Delta3`

`DeltaMetric`

`compute_delta_pixels(preds, target)`

`DepthEstimationMetricBase`

`process_preds_and_target(preds, target)`

`DepthMAE`

`DepthMAPE`

`DepthMSE`

`DepthMSLE`

`DepthRMSE`

`DetectionMetrics`

`compute()`

`update(preds, target, device, inputs, crowd_targets=None)`

`DetectionMetricsDistanceBased`

`update(preds, target, device, inputs, crowd_targets=None)`

`flatten_metrics_dict(metrics_dict)`

`get_logging_values(loss_loggings, metrics, criterion=None)`

`get_metrics_dict(metrics_tuple, metrics_collection, loss_logging_item_names)`

`get_metrics_results_tuple(metrics_collection)`

`get_metrics_titles(metrics_collection)`

`get_train_loop_description_dict(metrics_tuple, metrics_collection, loss_logging_item_names, **log_items)`

`PoseEstimationMetrics`

`init(post_prediction_callback, num_joints, max_objects_per_image=20, oks_sigmas=None, iou_thresholds=None, recall_thresholds=None, iou_thresholds_to_report=None)`

`compute()`

`update(preds, target, gt_joints=None, gt_iscrowd=None, gt_bboxes=None, gt_areas=None, gt_samples=None)`

`update_single_image(predicted_poses, predicted_scores, gt_joints, gt_bboxes, gt_areas, gt_iscrowd)`

`compute_img_keypoint_matching(preds, pred_scores, targets, targets_visibilities, targets_areas, targets_bboxes, targets_ignored, crowd_targets, crowd_visibilities, crowd_targets_areas, crowd_targets_bboxes, iou_thresholds, sigmas, top_k)`

`compute_oks(pred_joints, gt_joints, gt_keypoint_visibility, sigmas, gt_areas=None, gt_bboxes=None)`

`compute_visible_bbox_xywh(joints, visibility_mask)`

`AbstractMetricsArgsPrepFn`

`call(preds, target)` `abstractmethod`

`Dice`

`compute()`

`IoU`

`PixelAccuracy`

`PreprocessSegmentationMetricsArgs`

`init(apply_arg_max=False, apply_sigmoid=False)`

`batch_intersection_union(predict, target, nclass)`

`batch_pix_accuracy(predict, target)`