Skip to content

Losses

BCEDiceLoss

Bases: torch.nn.Module

Binary Cross Entropy + Dice Loss

Weighted average of BCE and Dice loss

Parameters:

Name Type Description Default
loss_weights List[float]

List of size 2 s.t loss_weights[0], loss_weights[1] are the weights for BCE, Dice respectively.

[0.5, 0.5]
logits bool

Whether to use logits or not.

True
Source code in latest/src/super_gradients/training/losses/bce_dice_loss.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
@register_loss(Losses.BCE_DICE_LOSS)
class BCEDiceLoss(torch.nn.Module):
    """
    Binary Cross Entropy + Dice Loss

    Weighted average of BCE and Dice loss

    :param loss_weights: List of size 2 s.t loss_weights[0], loss_weights[1] are the weights for BCE, Dice respectively.
    :param logits:       Whether to use logits or not.
    """

    def __init__(self, loss_weights: List[float] = [0.5, 0.5], logits: bool = True):
        super(BCEDiceLoss, self).__init__()
        self.loss_weights = loss_weights
        self.bce = BCE()
        self.dice = BinaryDiceLoss(apply_sigmoid=logits)

    def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """

        :param input: Network's raw output shaped (N,1,H,W)
        :param target: Ground truth shaped (N,H,W)
        """

        return self.loss_weights[0] * self.bce(input, target) + self.loss_weights[1] * self.dice(input, target)

forward(input, target)

Parameters:

Name Type Description Default
input torch.Tensor

Network's raw output shaped (N,1,H,W)

required
target torch.Tensor

Ground truth shaped (N,H,W)

required
Source code in latest/src/super_gradients/training/losses/bce_dice_loss.py
27
28
29
30
31
32
33
34
def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    """

    :param input: Network's raw output shaped (N,1,H,W)
    :param target: Ground truth shaped (N,H,W)
    """

    return self.loss_weights[0] * self.bce(input, target) + self.loss_weights[1] * self.dice(input, target)

BCE

Bases: BCEWithLogitsLoss

Binary Cross Entropy Loss

Source code in latest/src/super_gradients/training/losses/bce_loss.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
class BCE(BCEWithLogitsLoss):
    """
    Binary Cross Entropy Loss
    """

    def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """

        :param input: Network's raw output shaped (N,1,*)
        :param target: Ground truth shaped (N,*)
        """
        return super(BCE, self).forward(input.squeeze(1), target.float())

forward(input, target)

Parameters:

Name Type Description Default
input torch.Tensor

Network's raw output shaped (N,1,*)

required
target torch.Tensor

Ground truth shaped (N,*)

required
Source code in latest/src/super_gradients/training/losses/bce_loss.py
10
11
12
13
14
15
16
def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    """

    :param input: Network's raw output shaped (N,1,*)
    :param target: Ground truth shaped (N,*)
    """
    return super(BCE, self).forward(input.squeeze(1), target.float())

ChannelWiseKnowledgeDistillationLoss

Bases: nn.Module

Implementation of Channel-wise Knowledge distillation loss.

paper: "Channel-wise Knowledge Distillation for Dense Prediction", https://arxiv.org/abs/2011.13256 Official implementation: https://github.com/irfanICMLL/TorchDistiller/tree/main/SemSeg-distill

Source code in latest/src/super_gradients/training/losses/cwd_loss.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class ChannelWiseKnowledgeDistillationLoss(nn.Module):
    """
    Implementation of Channel-wise Knowledge distillation loss.

    paper: "Channel-wise Knowledge Distillation for Dense Prediction", https://arxiv.org/abs/2011.13256
    Official implementation: https://github.com/irfanICMLL/TorchDistiller/tree/main/SemSeg-distill
    """

    def __init__(self, normalization_mode: str = "channel_wise", temperature: float = 4.0, ignore_index: Optional[int] = None):
        """
        :param normalization_mode: default is for `channel-wise` normalization as implemented in the original paper,
         softmax is applied upon the spatial dimensions. For vanilla normalization, to apply softmax upon the channel
         dimension, set this value as `spatial_wise`.
        :param temperature: temperature relaxation value applied upon the logits before the normalization. default value
         is set to `4.0` as the original implementation.
        """
        super().__init__()
        self.T = temperature
        self.ignore_index = ignore_index

        self.kl_div = nn.KLDivLoss(reduction="sum" if ignore_index is None else "none")

        if normalization_mode not in ["channel_wise", "spatial_wise"]:
            raise ValueError(f"Unsupported normalization mode: {normalization_mode}")

        self.normalization_mode = normalization_mode

    def forward(self, student_preds: torch.Tensor, teacher_preds: torch.Tensor, target: Optional[torch.Tensor] = None):
        B, C, H, W = student_preds.size()

        # set the normalization axis and the averaging scalar.
        norm_axis = -1 if self.normalization_mode == "channel_wise" else 1
        averaging_scalar = (B * C) if self.normalization_mode == "channel_wise" else (B * H * W)

        # Softmax normalization
        softmax_teacher = torch.softmax(teacher_preds.view(B, C, -1) / self.T, dim=norm_axis)
        log_softmax_student = torch.log_softmax(student_preds.view(B, C, -1) / self.T, dim=norm_axis)

        loss = self.kl_div(log_softmax_student, softmax_teacher)

        if self.ignore_index is not None:
            valid_mask = target.view(B, -1).ne(self.ignore_index).unsqueeze(1).expand_as(loss)
            loss = (loss * valid_mask).sum()

        loss = loss * (self.T**2) / averaging_scalar
        return loss

__init__(normalization_mode='channel_wise', temperature=4.0, ignore_index=None)

Parameters:

Name Type Description Default
normalization_mode str

default is for channel-wise normalization as implemented in the original paper, softmax is applied upon the spatial dimensions. For vanilla normalization, to apply softmax upon the channel dimension, set this value as spatial_wise.

'channel_wise'
temperature float

temperature relaxation value applied upon the logits before the normalization. default value is set to 4.0 as the original implementation.

4.0
Source code in latest/src/super_gradients/training/losses/cwd_loss.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, normalization_mode: str = "channel_wise", temperature: float = 4.0, ignore_index: Optional[int] = None):
    """
    :param normalization_mode: default is for `channel-wise` normalization as implemented in the original paper,
     softmax is applied upon the spatial dimensions. For vanilla normalization, to apply softmax upon the channel
     dimension, set this value as `spatial_wise`.
    :param temperature: temperature relaxation value applied upon the logits before the normalization. default value
     is set to `4.0` as the original implementation.
    """
    super().__init__()
    self.T = temperature
    self.ignore_index = ignore_index

    self.kl_div = nn.KLDivLoss(reduction="sum" if ignore_index is None else "none")

    if normalization_mode not in ["channel_wise", "spatial_wise"]:
        raise ValueError(f"Unsupported normalization mode: {normalization_mode}")

    self.normalization_mode = normalization_mode

DDRNetLoss

Bases: OhemCELoss

Source code in latest/src/super_gradients/training/losses/ddrnet_loss.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class DDRNetLoss(OhemCELoss):
    def __init__(
        self,
        threshold: float = 0.7,
        ohem_percentage: float = 0.1,
        weights: List[float] = [1.0, 0.4],
        ignore_label: int = 255,
        num_pixels_exclude_ignored: bool = False,
    ):
        """
        This loss is an extension of the Ohem (Online Hard Example Mining Cross Entropy) Loss.

        as define in paper:
        Accurate Semantic Segmentation of Road Scenes ( https://arxiv.org/pdf/2101.06085.pdf )

        :param threshold: threshold to th hard example mining algorithm
        :param ohem_percentage: minimum percentage of total pixels for the hard example mining algorithm
        (taking only the largest) losses
        :param weights: weights per each input of the loss. This loss supports a multi output (like in DDRNet with
        an auxiliary head). the losses of each head can be weighted.
        :param ignore_label: targets label to be ignored
        :param num_pixels_exclude_ignored: whether to exclude ignore pixels when calculating the mining percentage.
        see OhemCELoss doc for more details.
        """
        super().__init__(threshold=threshold, mining_percent=ohem_percentage, ignore_lb=ignore_label, num_pixels_exclude_ignored=num_pixels_exclude_ignored)
        self.weights = weights

    def forward(self, predictions_list: Union[list, tuple, torch.Tensor], targets: torch.Tensor):
        if isinstance(predictions_list, torch.Tensor):
            predictions_list = (predictions_list,)

        assert len(predictions_list) == len(self.weights), "num of prediction must be the same as num of loss weights"

        losses = []
        unweighted_losses = []
        for predictions, weight in zip(predictions_list, self.weights):
            unweighted_loss = super().forward(predictions, targets)
            unweighted_losses.append(unweighted_loss)
            losses.append(unweighted_loss * weight)
        total_loss = sum(losses)
        unweighted_losses.append(total_loss)

        return total_loss, torch.stack(unweighted_losses, dim=0).detach()

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        return ["main_loss", "aux_loss1", "loss"]

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(threshold=0.7, ohem_percentage=0.1, weights=[1.0, 0.4], ignore_label=255, num_pixels_exclude_ignored=False)

This loss is an extension of the Ohem (Online Hard Example Mining Cross Entropy) Loss.

as define in paper: Accurate Semantic Segmentation of Road Scenes ( https://arxiv.org/pdf/2101.06085.pdf )

Parameters:

Name Type Description Default
threshold float

threshold to th hard example mining algorithm

0.7
ohem_percentage float

minimum percentage of total pixels for the hard example mining algorithm (taking only the largest) losses

0.1
weights List[float]

weights per each input of the loss. This loss supports a multi output (like in DDRNet with an auxiliary head). the losses of each head can be weighted.

[1.0, 0.4]
ignore_label int

targets label to be ignored

255
num_pixels_exclude_ignored bool

whether to exclude ignore pixels when calculating the mining percentage. see OhemCELoss doc for more details.

False
Source code in latest/src/super_gradients/training/losses/ddrnet_loss.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def __init__(
    self,
    threshold: float = 0.7,
    ohem_percentage: float = 0.1,
    weights: List[float] = [1.0, 0.4],
    ignore_label: int = 255,
    num_pixels_exclude_ignored: bool = False,
):
    """
    This loss is an extension of the Ohem (Online Hard Example Mining Cross Entropy) Loss.

    as define in paper:
    Accurate Semantic Segmentation of Road Scenes ( https://arxiv.org/pdf/2101.06085.pdf )

    :param threshold: threshold to th hard example mining algorithm
    :param ohem_percentage: minimum percentage of total pixels for the hard example mining algorithm
    (taking only the largest) losses
    :param weights: weights per each input of the loss. This loss supports a multi output (like in DDRNet with
    an auxiliary head). the losses of each head can be weighted.
    :param ignore_label: targets label to be ignored
    :param num_pixels_exclude_ignored: whether to exclude ignore pixels when calculating the mining percentage.
    see OhemCELoss doc for more details.
    """
    super().__init__(threshold=threshold, mining_percent=ohem_percentage, ignore_lb=ignore_label, num_pixels_exclude_ignored=num_pixels_exclude_ignored)
    self.weights = weights

DEKRLoss

Bases: nn.Module

Implementation of the loss function from the "Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression" paper (https://arxiv.org/abs/2104.02300)

This loss should be used in conjunction with DEKRTargetsGenerator.

Source code in latest/src/super_gradients/training/losses/dekr_loss.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
@register_loss(Losses.DEKR_LOSS)
class DEKRLoss(nn.Module):
    """
    Implementation of the loss function from the "Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression"
    paper (https://arxiv.org/abs/2104.02300)

    This loss should be used in conjunction with DEKRTargetsGenerator.
    """

    def __init__(self, heatmap_loss_factor: float = 1.0, offset_loss_factor: float = 0.1, heatmap_loss: str = "mse"):
        """
        Instantiate the DEKR loss function. It is two-component loss function, consisting of a heatmap (MSE) loss and an offset (Smooth L1) losses.
        The total loss is the sum of the two individual losses, weighted by the corresponding factors.

        :param heatmap_loss_factor: Weighting factor for heatmap loss
        :param offset_loss_factor: Weighting factor for offset loss
        :param heatmap_loss: Type of heatmap loss to use. Can be "mse" (Used in DEKR paper) or "qfl" (Quality Focal Loss).
                             We use QFL in our recipe as it produces better results.
        """
        super().__init__()
        self.heatmap_loss_factor = float(heatmap_loss_factor)
        self.offset_loss_factor = float(offset_loss_factor)
        self.heatmap_loss = {"mse": self.heatmap_mse_loss, "qfl": self.heatmap_qfl_loss}[heatmap_loss]

    @property
    def component_names(self):
        """
        Names of individual loss components for logging during training.
        """
        return ["heatmap", "offset", "total"]

    def forward(self, predictions: Tuple[Tensor, Tensor], targets: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tuple[Tensor, Tensor]:
        """

        :param predictions: Tuple of (heatmap, offset) predictions.
            heatmap is of shape (B, NumJoints + 1, H, W)
            offset is of shape (B, NumJoints * 2, H, W)

        :param targets: Tuple of (heatmap, mask, offset, offset_weight).
            heatmap is of shape (B, NumJoints + 1, H, W)
            mask is of shape (B, NumJoints + 1, H, W)
            offset is of shape (B, NumJoints * 2, H, W)
            offset_weight is of shape (B, NumJoints * 2, H, W)

        :return: Tuple of (loss, loss_components)
            loss is a scalar tensor with the total loss
            loss_components is a tensor of shape (3,) containing the individual loss components for logging (detached from the graph)
        """
        pred_heatmap, pred_offset = predictions
        gt_heatmap, mask, gt_offset, offset_weight = targets

        heatmap_loss = self.heatmap_loss(pred_heatmap, gt_heatmap, mask) * self.heatmap_loss_factor
        offset_loss = self.offset_loss(pred_offset, gt_offset, offset_weight) * self.offset_loss_factor

        loss = heatmap_loss + offset_loss
        components = torch.cat(
            (
                heatmap_loss.unsqueeze(0),
                offset_loss.unsqueeze(0),
                loss.unsqueeze(0),
            )
        ).detach()

        return loss, components

    def heatmap_mse_loss(self, pred_heatmap, true_heatmap, mask):
        loss = torch.nn.functional.mse_loss(pred_heatmap, true_heatmap, reduction="none") * mask
        loss = loss.mean()
        return loss

    def heatmap_qfl_loss(self, pred_heatmap, true_heatmap, mask):
        scale_factor = (true_heatmap - pred_heatmap.sigmoid()).abs().pow(2)
        loss = torch.nn.functional.binary_cross_entropy_with_logits(pred_heatmap, true_heatmap, reduction="none") * scale_factor
        loss = loss.mean()
        return loss

    def offset_loss(self, pred_offsets, true_offsets, weights):
        num_pos = torch.nonzero(weights > 0).size()[0]
        loss = torch.nn.functional.smooth_l1_loss(pred_offsets, true_offsets, reduction="none", beta=1.0 / 9) * weights
        if num_pos == 0:
            num_pos = 1.0
        loss = loss.sum() / num_pos
        return loss

component_names property

Names of individual loss components for logging during training.

__init__(heatmap_loss_factor=1.0, offset_loss_factor=0.1, heatmap_loss='mse')

Instantiate the DEKR loss function. It is two-component loss function, consisting of a heatmap (MSE) loss and an offset (Smooth L1) losses. The total loss is the sum of the two individual losses, weighted by the corresponding factors.

Parameters:

Name Type Description Default
heatmap_loss_factor float

Weighting factor for heatmap loss

1.0
offset_loss_factor float

Weighting factor for offset loss

0.1
heatmap_loss str

Type of heatmap loss to use. Can be "mse" (Used in DEKR paper) or "qfl" (Quality Focal Loss). We use QFL in our recipe as it produces better results.

'mse'
Source code in latest/src/super_gradients/training/losses/dekr_loss.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, heatmap_loss_factor: float = 1.0, offset_loss_factor: float = 0.1, heatmap_loss: str = "mse"):
    """
    Instantiate the DEKR loss function. It is two-component loss function, consisting of a heatmap (MSE) loss and an offset (Smooth L1) losses.
    The total loss is the sum of the two individual losses, weighted by the corresponding factors.

    :param heatmap_loss_factor: Weighting factor for heatmap loss
    :param offset_loss_factor: Weighting factor for offset loss
    :param heatmap_loss: Type of heatmap loss to use. Can be "mse" (Used in DEKR paper) or "qfl" (Quality Focal Loss).
                         We use QFL in our recipe as it produces better results.
    """
    super().__init__()
    self.heatmap_loss_factor = float(heatmap_loss_factor)
    self.offset_loss_factor = float(offset_loss_factor)
    self.heatmap_loss = {"mse": self.heatmap_mse_loss, "qfl": self.heatmap_qfl_loss}[heatmap_loss]

forward(predictions, targets)

Parameters:

Name Type Description Default
predictions Tuple[Tensor, Tensor]

Tuple of (heatmap, offset) predictions. heatmap is of shape (B, NumJoints + 1, H, W) offset is of shape (B, NumJoints * 2, H, W)

required
targets Tuple[Tensor, Tensor, Tensor, Tensor]

Tuple of (heatmap, mask, offset, offset_weight). heatmap is of shape (B, NumJoints + 1, H, W) mask is of shape (B, NumJoints + 1, H, W) offset is of shape (B, NumJoints * 2, H, W) offset_weight is of shape (B, NumJoints * 2, H, W)

required

Returns:

Type Description
Tuple[Tensor, Tensor]

Tuple of (loss, loss_components) loss is a scalar tensor with the total loss loss_components is a tensor of shape (3,) containing the individual loss components for logging (detached from the graph)

Source code in latest/src/super_gradients/training/losses/dekr_loss.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def forward(self, predictions: Tuple[Tensor, Tensor], targets: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tuple[Tensor, Tensor]:
    """

    :param predictions: Tuple of (heatmap, offset) predictions.
        heatmap is of shape (B, NumJoints + 1, H, W)
        offset is of shape (B, NumJoints * 2, H, W)

    :param targets: Tuple of (heatmap, mask, offset, offset_weight).
        heatmap is of shape (B, NumJoints + 1, H, W)
        mask is of shape (B, NumJoints + 1, H, W)
        offset is of shape (B, NumJoints * 2, H, W)
        offset_weight is of shape (B, NumJoints * 2, H, W)

    :return: Tuple of (loss, loss_components)
        loss is a scalar tensor with the total loss
        loss_components is a tensor of shape (3,) containing the individual loss components for logging (detached from the graph)
    """
    pred_heatmap, pred_offset = predictions
    gt_heatmap, mask, gt_offset, offset_weight = targets

    heatmap_loss = self.heatmap_loss(pred_heatmap, gt_heatmap, mask) * self.heatmap_loss_factor
    offset_loss = self.offset_loss(pred_offset, gt_offset, offset_weight) * self.offset_loss_factor

    loss = heatmap_loss + offset_loss
    components = torch.cat(
        (
            heatmap_loss.unsqueeze(0),
            offset_loss.unsqueeze(0),
            loss.unsqueeze(0),
        )
    ).detach()

    return loss, components

DiceCEEdgeLoss

Bases: _Loss

Source code in latest/src/super_gradients/training/losses/dice_ce_edge_loss.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@register_loss(Losses.DICE_CE_EDGE_LOSS)
class DiceCEEdgeLoss(_Loss):
    def __init__(
        self,
        num_classes: int,
        num_aux_heads: int = 2,
        num_detail_heads: int = 1,
        weights: Union[tuple, list] = (1, 1, 1, 1),
        dice_ce_weights: Union[tuple, list] = (1, 1),
        ignore_index: int = -100,
        edge_kernel: int = 3,
        ce_edge_weights: Union[tuple, list] = (0.5, 0.5),
    ):
        """
        Total loss is computed as follows:

            Loss-cls-edge = λ1 * CE + λ2 * M * CE , where [λ1, λ2] are ce_edge_weights.

        For each Main feature maps and auxiliary heads the loss is calculated as:

            Loss-main-aux = λ3 * Loss-cls-edge + λ4 * Loss-Dice, where [λ3, λ4] are dice_ce_weights.

        For Feature maps defined as detail maps that predicts only the edge mask, the loss is computed as follow:

            Loss-detail = BinaryCE + BinaryDice

        Finally the total loss is computed as follows for the whole feature maps:

            Loss = Σw[i] * Loss-main-aux[i] + Σw[j] * Loss-detail[j], where `w` is defined as the `weights` argument
                `i` in [0, 1 + num_aux_heads], 1 is for the main feature map.
                `j` in [1 + num_aux_heads, 1 + num_aux_heads + num_detail_heads].


        :param num_aux_heads: num of auxiliary heads.
        :param num_detail_heads: num of detail heads.
        :param weights: Loss lambda weights.
        :param dice_ce_weights: weights lambdas between (Dice, CE) losses.
        :param edge_kernel: kernel size of dilation erosion convolutions for creating the edge feature map.
        :param ce_edge_weights: weights lambdas between regular CE and edge attention CE.
        """
        super().__init__()
        # Check that arguments are valid.
        assert len(weights) == num_aux_heads + num_detail_heads + 1, "Lambda loss weights must be in same size as loss items."
        assert len(dice_ce_weights) == 2, f"dice_ce_weights must an iterable with size 2, found: {len(dice_ce_weights)}"
        assert len(ce_edge_weights) == 2, f"dice_ce_weights must an iterable with size 2, found: {len(ce_edge_weights)}"

        self.edge_kernel = edge_kernel
        self.num_classes = num_classes
        self.ignore_index = ignore_index
        self.weights = weights
        self.dice_ce_weights = dice_ce_weights
        self.use_detail = num_detail_heads > 0

        self.num_aux_heads = num_aux_heads
        self.num_detail_heads = num_detail_heads

        if self.use_detail:
            self.bce = nn.BCEWithLogitsLoss()
            self.binary_dice = BinaryDiceLoss(apply_sigmoid=True)

        self.ce_edge = MaskAttentionLoss(criterion=nn.CrossEntropyLoss(reduction="none", ignore_index=ignore_index), loss_weights=ce_edge_weights)
        self.dice_loss = DiceLoss(apply_softmax=True, ignore_index=None if ignore_index < 0 else ignore_index)

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        names = ["main_loss"]
        # Append aux losses names
        names += [f"aux_loss{i}" for i in range(self.num_aux_heads)]
        # Append detail losses names
        names += [f"detail_loss{i}" for i in range(self.num_detail_heads)]
        names += ["loss"]
        return names

    def forward(self, preds: Tuple[torch.Tensor], target: torch.Tensor):
        """
        :param preds: Model output predictions, must be in the followed format:
         [Main-feats, Aux-feats[0], ..., Aux-feats[num_auxs-1], Detail-feats[0], ..., Detail-feats[num_details-1]
        """
        assert (
            len(preds) == self.num_aux_heads + self.num_detail_heads + 1
        ), f"Wrong num of predictions tensors, expected {self.num_aux_heads + self.num_detail_heads + 1} found {len(preds)}"

        edge_target = target_to_binary_edge(
            target, num_classes=self.num_classes, kernel_size=self.edge_kernel, ignore_index=self.ignore_index, flatten_channels=True
        )
        losses = []
        total_loss = 0
        # Main and auxiliaries feature maps losses
        for i in range(0, 1 + self.num_aux_heads):
            ce_loss = self.ce_edge(preds[i], target, edge_target)
            dice_loss = self.dice_loss(preds[i], target)

            loss = ce_loss * self.dice_ce_weights[0] + dice_loss * self.dice_ce_weights[1]
            total_loss += self.weights[i] * loss
            losses.append(loss)

        # Detail feature maps losses
        if self.use_detail:
            for i in range(1 + self.num_aux_heads, len(preds)):
                bce_loss = self.bce(preds[i], edge_target)
                dice_loss = self.binary_dice(preds[i], edge_target)

                loss = bce_loss * self.dice_ce_weights[0] + dice_loss * self.dice_ce_weights[1]
                total_loss += self.weights[i] * loss
                losses.append(loss)

        losses.append(total_loss)

        return total_loss, torch.stack(losses, dim=0).detach()

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(num_classes, num_aux_heads=2, num_detail_heads=1, weights=(1, 1, 1, 1), dice_ce_weights=(1, 1), ignore_index=-100, edge_kernel=3, ce_edge_weights=(0.5, 0.5))

Total loss is computed as follows:

Loss-cls-edge = λ1 * CE + λ2 * M * CE , where [λ1, λ2] are ce_edge_weights.

For each Main feature maps and auxiliary heads the loss is calculated as:

Loss-main-aux = λ3 * Loss-cls-edge + λ4 * Loss-Dice, where [λ3, λ4] are dice_ce_weights.

For Feature maps defined as detail maps that predicts only the edge mask, the loss is computed as follow:

Loss-detail = BinaryCE + BinaryDice

Finally the total loss is computed as follows for the whole feature maps:

Loss = Σw[i] * Loss-main-aux[i] + Σw[j] * Loss-detail[j], where `w` is defined as the `weights` argument
    `i` in [0, 1 + num_aux_heads], 1 is for the main feature map.
    `j` in [1 + num_aux_heads, 1 + num_aux_heads + num_detail_heads].

Parameters:

Name Type Description Default
num_aux_heads int

num of auxiliary heads.

2
num_detail_heads int

num of detail heads.

1
weights Union[tuple, list]

Loss lambda weights.

(1, 1, 1, 1)
dice_ce_weights Union[tuple, list]

weights lambdas between (Dice, CE) losses.

(1, 1)
edge_kernel int

kernel size of dilation erosion convolutions for creating the edge feature map.

3
ce_edge_weights Union[tuple, list]

weights lambdas between regular CE and edge attention CE.

(0.5, 0.5)
Source code in latest/src/super_gradients/training/losses/dice_ce_edge_loss.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __init__(
    self,
    num_classes: int,
    num_aux_heads: int = 2,
    num_detail_heads: int = 1,
    weights: Union[tuple, list] = (1, 1, 1, 1),
    dice_ce_weights: Union[tuple, list] = (1, 1),
    ignore_index: int = -100,
    edge_kernel: int = 3,
    ce_edge_weights: Union[tuple, list] = (0.5, 0.5),
):
    """
    Total loss is computed as follows:

        Loss-cls-edge = λ1 * CE + λ2 * M * CE , where [λ1, λ2] are ce_edge_weights.

    For each Main feature maps and auxiliary heads the loss is calculated as:

        Loss-main-aux = λ3 * Loss-cls-edge + λ4 * Loss-Dice, where [λ3, λ4] are dice_ce_weights.

    For Feature maps defined as detail maps that predicts only the edge mask, the loss is computed as follow:

        Loss-detail = BinaryCE + BinaryDice

    Finally the total loss is computed as follows for the whole feature maps:

        Loss = Σw[i] * Loss-main-aux[i] + Σw[j] * Loss-detail[j], where `w` is defined as the `weights` argument
            `i` in [0, 1 + num_aux_heads], 1 is for the main feature map.
            `j` in [1 + num_aux_heads, 1 + num_aux_heads + num_detail_heads].


    :param num_aux_heads: num of auxiliary heads.
    :param num_detail_heads: num of detail heads.
    :param weights: Loss lambda weights.
    :param dice_ce_weights: weights lambdas between (Dice, CE) losses.
    :param edge_kernel: kernel size of dilation erosion convolutions for creating the edge feature map.
    :param ce_edge_weights: weights lambdas between regular CE and edge attention CE.
    """
    super().__init__()
    # Check that arguments are valid.
    assert len(weights) == num_aux_heads + num_detail_heads + 1, "Lambda loss weights must be in same size as loss items."
    assert len(dice_ce_weights) == 2, f"dice_ce_weights must an iterable with size 2, found: {len(dice_ce_weights)}"
    assert len(ce_edge_weights) == 2, f"dice_ce_weights must an iterable with size 2, found: {len(ce_edge_weights)}"

    self.edge_kernel = edge_kernel
    self.num_classes = num_classes
    self.ignore_index = ignore_index
    self.weights = weights
    self.dice_ce_weights = dice_ce_weights
    self.use_detail = num_detail_heads > 0

    self.num_aux_heads = num_aux_heads
    self.num_detail_heads = num_detail_heads

    if self.use_detail:
        self.bce = nn.BCEWithLogitsLoss()
        self.binary_dice = BinaryDiceLoss(apply_sigmoid=True)

    self.ce_edge = MaskAttentionLoss(criterion=nn.CrossEntropyLoss(reduction="none", ignore_index=ignore_index), loss_weights=ce_edge_weights)
    self.dice_loss = DiceLoss(apply_softmax=True, ignore_index=None if ignore_index < 0 else ignore_index)

forward(preds, target)

Parameters:

Name Type Description Default
preds Tuple[torch.Tensor]

Model output predictions, must be in the followed format: [Main-feats, Aux-feats[0], ..., Aux-feats[num_auxs-1], Detail-feats[0], ..., Detail-feats[num_details-1]

required
Source code in latest/src/super_gradients/training/losses/dice_ce_edge_loss.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def forward(self, preds: Tuple[torch.Tensor], target: torch.Tensor):
    """
    :param preds: Model output predictions, must be in the followed format:
     [Main-feats, Aux-feats[0], ..., Aux-feats[num_auxs-1], Detail-feats[0], ..., Detail-feats[num_details-1]
    """
    assert (
        len(preds) == self.num_aux_heads + self.num_detail_heads + 1
    ), f"Wrong num of predictions tensors, expected {self.num_aux_heads + self.num_detail_heads + 1} found {len(preds)}"

    edge_target = target_to_binary_edge(
        target, num_classes=self.num_classes, kernel_size=self.edge_kernel, ignore_index=self.ignore_index, flatten_channels=True
    )
    losses = []
    total_loss = 0
    # Main and auxiliaries feature maps losses
    for i in range(0, 1 + self.num_aux_heads):
        ce_loss = self.ce_edge(preds[i], target, edge_target)
        dice_loss = self.dice_loss(preds[i], target)

        loss = ce_loss * self.dice_ce_weights[0] + dice_loss * self.dice_ce_weights[1]
        total_loss += self.weights[i] * loss
        losses.append(loss)

    # Detail feature maps losses
    if self.use_detail:
        for i in range(1 + self.num_aux_heads, len(preds)):
            bce_loss = self.bce(preds[i], edge_target)
            dice_loss = self.binary_dice(preds[i], edge_target)

            loss = bce_loss * self.dice_ce_weights[0] + dice_loss * self.dice_ce_weights[1]
            total_loss += self.weights[i] * loss
            losses.append(loss)

    losses.append(total_loss)

    return total_loss, torch.stack(losses, dim=0).detach()

BinaryDiceLoss

Bases: DiceLoss

Compute Dice Loss for binary class tasks (1 class only). Except target to be a binary map with 0 and 1 values.

Source code in latest/src/super_gradients/training/losses/dice_loss.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class BinaryDiceLoss(DiceLoss):
    """
    Compute Dice Loss for binary class tasks (1 class only).
    Except target to be a binary map with 0 and 1 values.
    """

    def __init__(self, apply_sigmoid: bool = True, smooth: float = 1.0, eps: float = 1e-5):
        """
        :param apply_sigmoid: Whether to apply sigmoid to the predictions.
        :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the dice
            coefficient is to 1, which can be used as a regularization effect.
            As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
        :param eps: epsilon value to avoid inf.
        """
        super().__init__(apply_softmax=False, ignore_index=None, smooth=smooth, eps=eps, reduce_over_batches=False)
        self.apply_sigmoid = apply_sigmoid

    def forward(self, predict: torch.tensor, target: torch.tensor) -> torch.tensor:
        if self.apply_sigmoid:
            predict = torch.sigmoid(predict)
        return super().forward(predict=predict, target=target)

__init__(apply_sigmoid=True, smooth=1.0, eps=1e-05)

Parameters:

Name Type Description Default
apply_sigmoid bool

Whether to apply sigmoid to the predictions.

True
smooth float

laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the dice coefficient is to 1, which can be used as a regularization effect. As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895

1.0
eps float

epsilon value to avoid inf.

1e-05
Source code in latest/src/super_gradients/training/losses/dice_loss.py
50
51
52
53
54
55
56
57
58
59
def __init__(self, apply_sigmoid: bool = True, smooth: float = 1.0, eps: float = 1e-5):
    """
    :param apply_sigmoid: Whether to apply sigmoid to the predictions.
    :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the dice
        coefficient is to 1, which can be used as a regularization effect.
        As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
    :param eps: epsilon value to avoid inf.
    """
    super().__init__(apply_softmax=False, ignore_index=None, smooth=smooth, eps=eps, reduce_over_batches=False)
    self.apply_sigmoid = apply_sigmoid

DiceLoss

Bases: AbstarctSegmentationStructureLoss

Compute average Dice loss between two tensors, It can support both multi-classes and binary tasks. Defined in the paper: "V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation"

Source code in latest/src/super_gradients/training/losses/dice_loss.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class DiceLoss(AbstarctSegmentationStructureLoss):
    """
    Compute average Dice loss between two tensors, It can support both multi-classes and binary tasks.
    Defined in the paper: "V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation"
    """

    def _calc_numerator_denominator(self, labels_one_hot: torch.tensor, predict: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
        """
        Calculate dice metric's numerator and denominator.

        :param labels_one_hot: target in one hot format.   shape: [BS, num_classes, img_width, img_height]
        :param predict: predictions tensor.                shape: [BS, num_classes, img_width, img_height]
        :return:
            numerator = intersection between predictions and target. shape: [BS, num_classes, img_width, img_height]
            denominator = sum of predictions and target areas.       shape: [BS, num_classes, img_width, img_height]
        """
        numerator = labels_one_hot * predict
        denominator = labels_one_hot + predict
        return numerator, denominator

    def _calc_loss(self, numerator: torch.tensor, denominator: torch.tensor) -> torch.tensor:
        """
        Calculate dice loss.
        All tensors are of shape [BS] if self.reduce_over_batches else [num_classes].

        :param numerator: intersection between predictions and target.
        :param denominator: total number of pixels of prediction and target.
        """
        loss = 1.0 - ((2.0 * numerator + self.smooth) / (denominator + self.eps + self.smooth))
        return loss

GeneralizedDiceLoss

Bases: DiceLoss

Compute the Generalised Dice loss, contribution of each label is normalized by the inverse of its volume, in order to deal with class imbalance. Defined in the paper: "Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations"

Parameters:

Name Type Description Default
smooth float

default value is 0, smooth laplacian is not recommended to be used with GeneralizedDiceLoss. because the weighted values to be added are very small.

0.0
eps float

default value is 1e-17, must be a very small value, because weighted intersection and denominator are very small after multiplication with 1 / counts ** 2

1e-17
Source code in latest/src/super_gradients/training/losses/dice_loss.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class GeneralizedDiceLoss(DiceLoss):
    """
    Compute the Generalised Dice loss, contribution of each label is normalized by the inverse of its volume, in order
     to deal with class imbalance.
    Defined in the paper: "Generalised Dice overlap as a deep learning loss function for highly unbalanced
     segmentations"

    :param smooth:  default value is 0, smooth laplacian is not recommended to be used with GeneralizedDiceLoss.
         because the weighted values to be added are very small.
    :param eps:     default value is 1e-17, must be a very small value, because weighted `intersection` and
        `denominator` are very small after multiplication with `1 / counts ** 2`
    """

    def __init__(
        self,
        apply_softmax: bool = True,
        ignore_index: int = None,
        smooth: float = 0.0,
        eps: float = 1e-17,
        reduce_over_batches: bool = False,
        reduction: Union[LossReduction, str] = "mean",
    ):
        """
        :param apply_softmax: Whether to apply softmax to the predictions.
        :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the dice
            coefficient is to 1, which can be used as a regularization effect.
            As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
        :param eps: epsilon value to avoid inf.
        :param reduce_over_batches: Whether to apply reduction over the batch axis if set True,
         default is `False` to average over the classes axis.
        :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
            `none`: no reduction will be applied.
            `mean`: the sum of the output will be divided by the number of elements in the output.
            `sum`: the output will be summed.
            Default: `mean`
        """
        super().__init__(
            apply_softmax=apply_softmax,
            ignore_index=ignore_index,
            smooth=smooth,
            eps=eps,
            reduce_over_batches=reduce_over_batches,
            generalized_metric=True,
            weight=None,
            reduction=reduction,
        )

__init__(apply_softmax=True, ignore_index=None, smooth=0.0, eps=1e-17, reduce_over_batches=False, reduction='mean')

Parameters:

Name Type Description Default
apply_softmax bool

Whether to apply softmax to the predictions.

True
smooth float

laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the dice coefficient is to 1, which can be used as a regularization effect. As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895

0.0
eps float

epsilon value to avoid inf.

1e-17
reduce_over_batches bool

Whether to apply reduction over the batch axis if set True, default is False to average over the classes axis.

False
reduction Union[LossReduction, str]

Specifies the reduction to apply to the output: none | mean | sum. none: no reduction will be applied. mean: the sum of the output will be divided by the number of elements in the output. sum: the output will be summed. Default: mean

'mean'
Source code in latest/src/super_gradients/training/losses/dice_loss.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def __init__(
    self,
    apply_softmax: bool = True,
    ignore_index: int = None,
    smooth: float = 0.0,
    eps: float = 1e-17,
    reduce_over_batches: bool = False,
    reduction: Union[LossReduction, str] = "mean",
):
    """
    :param apply_softmax: Whether to apply softmax to the predictions.
    :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the dice
        coefficient is to 1, which can be used as a regularization effect.
        As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
    :param eps: epsilon value to avoid inf.
    :param reduce_over_batches: Whether to apply reduction over the batch axis if set True,
     default is `False` to average over the classes axis.
    :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
        `none`: no reduction will be applied.
        `mean`: the sum of the output will be divided by the number of elements in the output.
        `sum`: the output will be summed.
        Default: `mean`
    """
    super().__init__(
        apply_softmax=apply_softmax,
        ignore_index=ignore_index,
        smooth=smooth,
        eps=eps,
        reduce_over_batches=reduce_over_batches,
        generalized_metric=True,
        weight=None,
        reduction=reduction,
    )

FocalLoss

Bases: _Loss

Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)

Source code in latest/src/super_gradients/training/losses/focal_loss.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class FocalLoss(_Loss):
    """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)"""

    def __init__(self, loss_fcn: nn.BCEWithLogitsLoss, gamma: float = 1.5, alpha: float = 0.25):
        super(FocalLoss, self).__init__()
        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = loss_fcn.reduction
        self.loss_fcn.reduction = "none"  # required to apply FocalLoss to each element

    def forward(self, pred: torch.tensor, true: torch.tensor) -> torch.tensor:
        loss = self.loss_fcn(pred, true)

        pred_prob = torch.sigmoid(pred)  # prob from logits
        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
        modulating_factor = (1.0 - p_t) ** self.gamma
        loss *= alpha_factor * modulating_factor

        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:  # 'none'
            return loss

BinaryIoULoss

Bases: IoULoss

Compute IoU Loss for binary class tasks (1 class only). Except target to be a binary map with 0 and 1 values.

Source code in latest/src/super_gradients/training/losses/iou_loss.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class BinaryIoULoss(IoULoss):
    """
    Compute IoU Loss for binary class tasks (1 class only).
    Except target to be a binary map with 0 and 1 values.
    """

    def __init__(self, apply_sigmoid: bool = True, smooth: float = 1.0, eps: float = 1e-5):
        """
        :param apply_sigmoid: Whether to apply sigmoid to the predictions.
        :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the IoU
            coefficient is to 1, which can be used as a regularization effect.
            As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
        :param eps: epsilon value to avoid inf.
        """
        super().__init__(apply_softmax=False, ignore_index=None, smooth=smooth, eps=eps, reduce_over_batches=False)
        self.apply_sigmoid = apply_sigmoid

    def forward(self, predict: torch.tensor, target: torch.tensor) -> torch.tensor:
        if self.apply_sigmoid:
            predict = torch.sigmoid(predict)
        return super().forward(predict=predict, target=target)

__init__(apply_sigmoid=True, smooth=1.0, eps=1e-05)

Parameters:

Name Type Description Default
apply_sigmoid bool

Whether to apply sigmoid to the predictions.

True
smooth float

laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the IoU coefficient is to 1, which can be used as a regularization effect. As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895

1.0
eps float

epsilon value to avoid inf.

1e-05
Source code in latest/src/super_gradients/training/losses/iou_loss.py
49
50
51
52
53
54
55
56
57
58
def __init__(self, apply_sigmoid: bool = True, smooth: float = 1.0, eps: float = 1e-5):
    """
    :param apply_sigmoid: Whether to apply sigmoid to the predictions.
    :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the IoU
        coefficient is to 1, which can be used as a regularization effect.
        As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
    :param eps: epsilon value to avoid inf.
    """
    super().__init__(apply_softmax=False, ignore_index=None, smooth=smooth, eps=eps, reduce_over_batches=False)
    self.apply_sigmoid = apply_sigmoid

GeneralizedIoULoss

Bases: IoULoss

Compute the Generalised IoU loss, contribution of each label is normalized by the inverse of its volume, in order to deal with class imbalance.

FIXME: Why duplicate some parats in class and init docstring ? (+they have different description)

Parameters:

Name Type Description Default
(float) smooth

default value is 0, smooth laplacian is not recommended to be used with GeneralizedIoULoss. because the weighted values to be added are very small.

required
Source code in latest/src/super_gradients/training/losses/iou_loss.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
class GeneralizedIoULoss(IoULoss):
    """
    Compute the Generalised IoU loss, contribution of each label is normalized by the inverse of its volume, in order
     to deal with class imbalance.

    # FIXME: Why duplicate some parats in class and __init__ docstring ? (+they have different description)
    :param smooth (float): default value is 0, smooth laplacian is not recommended to be used with GeneralizedIoULoss.
         because the weighted values to be added are very small.
    :param eps (float): default value is 1e-17, must be a very small value, because weighted `intersection` and
        `denominator` are very small after multiplication with `1 / counts ** 2`
    """

    def __init__(
        self,
        apply_softmax: bool = True,
        ignore_index: int = None,
        smooth: float = 0.0,
        eps: float = 1e-17,
        reduce_over_batches: bool = False,
        reduction: Union[LossReduction, str] = "mean",
    ):
        """
        :param apply_softmax: Whether to apply softmax to the predictions.
        :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the iou
            coefficient is to 1, which can be used as a regularization effect.
            As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
        :param eps: epsilon value to avoid inf.
        :param reduce_over_batches: Whether to apply reduction over the batch axis if set True,
         default is `False` to average over the classes axis.
        :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
            `none`: no reduction will be applied.
            `mean`: the sum of the output will be divided by the number of elements in the output.
            `sum`: the output will be summed.
            Default: `mean`
        """
        super().__init__(
            apply_softmax=apply_softmax,
            ignore_index=ignore_index,
            smooth=smooth,
            eps=eps,
            reduce_over_batches=reduce_over_batches,
            generalized_metric=True,
            weight=None,
            reduction=reduction,
        )

__init__(apply_softmax=True, ignore_index=None, smooth=0.0, eps=1e-17, reduce_over_batches=False, reduction='mean')

Parameters:

Name Type Description Default
apply_softmax bool

Whether to apply softmax to the predictions.

True
smooth float

laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the iou coefficient is to 1, which can be used as a regularization effect. As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895

0.0
eps float

epsilon value to avoid inf.

1e-17
reduce_over_batches bool

Whether to apply reduction over the batch axis if set True, default is False to average over the classes axis.

False
reduction Union[LossReduction, str]

Specifies the reduction to apply to the output: none | mean | sum. none: no reduction will be applied. mean: the sum of the output will be divided by the number of elements in the output. sum: the output will be summed. Default: mean

'mean'
Source code in latest/src/super_gradients/training/losses/iou_loss.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def __init__(
    self,
    apply_softmax: bool = True,
    ignore_index: int = None,
    smooth: float = 0.0,
    eps: float = 1e-17,
    reduce_over_batches: bool = False,
    reduction: Union[LossReduction, str] = "mean",
):
    """
    :param apply_softmax: Whether to apply softmax to the predictions.
    :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the iou
        coefficient is to 1, which can be used as a regularization effect.
        As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
    :param eps: epsilon value to avoid inf.
    :param reduce_over_batches: Whether to apply reduction over the batch axis if set True,
     default is `False` to average over the classes axis.
    :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
        `none`: no reduction will be applied.
        `mean`: the sum of the output will be divided by the number of elements in the output.
        `sum`: the output will be summed.
        Default: `mean`
    """
    super().__init__(
        apply_softmax=apply_softmax,
        ignore_index=ignore_index,
        smooth=smooth,
        eps=eps,
        reduce_over_batches=reduce_over_batches,
        generalized_metric=True,
        weight=None,
        reduction=reduction,
    )

IoULoss

Bases: AbstarctSegmentationStructureLoss

Compute average IoU loss between two tensors, It can support both multi-classes and binary tasks.

Source code in latest/src/super_gradients/training/losses/iou_loss.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class IoULoss(AbstarctSegmentationStructureLoss):
    """
    Compute average IoU loss between two tensors, It can support both multi-classes and binary tasks.
    """

    def _calc_numerator_denominator(self, labels_one_hot: torch.tensor, predict: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
        """
        Calculate iou metric's numerator and denominator.

        :param labels_one_hot: target in one hot format.   shape: [BS, num_classes, img_width, img_height]
        :param predict: predictions tensor.                shape: [BS, num_classes, img_width, img_height]
        :return:
            numerator = intersection between predictions and target.    shape: [BS, num_classes, img_width, img_height]
            denominator = area of union between predictions and target. shape: [BS, num_classes, img_width, img_height]
        """
        numerator = labels_one_hot * predict
        denominator = labels_one_hot + predict - numerator
        return numerator, denominator

    def _calc_loss(self, numerator, denominator):
        """
        Calculate iou loss.
        All tensors are of shape [BS] if self.reduce_over_batches else [num_classes]

        :param numerator: intersection between predictions and target.
        :param denominator: area of union between prediction pixels and target pixels.
        """
        loss = 1.0 - ((numerator + self.smooth) / (denominator + self.eps + self.smooth))
        return loss

KDLogitsLoss

Bases: _Loss

Knowledge distillation loss, wraps the task loss and distillation loss

Source code in latest/src/super_gradients/training/losses/kd_losses.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@register_loss(Losses.KD_LOSS)
class KDLogitsLoss(_Loss):
    """Knowledge distillation loss, wraps the task loss and distillation loss"""

    def __init__(self, task_loss_fn: _Loss, distillation_loss_fn: _Loss = KDklDivLoss(), distillation_loss_coeff: float = 0.5):
        """
        :param task_loss_fn: task loss. E.g., LabelSmoothingCrossEntropyLoss
        :param distillation_loss_fn: distillation loss. E.g., KLDivLoss
        :param distillation_loss_coeff:
        """

        super(KDLogitsLoss, self).__init__()
        self.task_loss_fn = task_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.distillation_loss_coeff = distillation_loss_coeff

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        return ["Loss", "Task Loss", "Distillation Loss"]

    def forward(self, kd_module_output, target):
        task_loss = self.task_loss_fn(kd_module_output.student_output, target)
        if isinstance(task_loss, tuple):  # SOME LOSS FUNCTIONS RETURNS LOSS AND LOG_ITEMS
            task_loss = task_loss[0]
        distillation_loss = self.distillation_loss_fn(kd_module_output.student_output, kd_module_output.teacher_output)
        loss = task_loss * (1 - self.distillation_loss_coeff) + distillation_loss * self.distillation_loss_coeff

        return loss, torch.cat((loss.unsqueeze(0), task_loss.unsqueeze(0), distillation_loss.unsqueeze(0))).detach()

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(task_loss_fn, distillation_loss_fn=KDklDivLoss(), distillation_loss_coeff=0.5)

Parameters:

Name Type Description Default
task_loss_fn _Loss

task loss. E.g., LabelSmoothingCrossEntropyLoss

required
distillation_loss_fn _Loss

distillation loss. E.g., KLDivLoss

KDklDivLoss()
distillation_loss_coeff float 0.5
Source code in latest/src/super_gradients/training/losses/kd_losses.py
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, task_loss_fn: _Loss, distillation_loss_fn: _Loss = KDklDivLoss(), distillation_loss_coeff: float = 0.5):
    """
    :param task_loss_fn: task loss. E.g., LabelSmoothingCrossEntropyLoss
    :param distillation_loss_fn: distillation loss. E.g., KLDivLoss
    :param distillation_loss_coeff:
    """

    super(KDLogitsLoss, self).__init__()
    self.task_loss_fn = task_loss_fn
    self.distillation_loss_fn = distillation_loss_fn
    self.distillation_loss_coeff = distillation_loss_coeff

KDklDivLoss

Bases: KLDivLoss

KL divergence wrapper for knowledge distillation

Source code in latest/src/super_gradients/training/losses/kd_losses.py
 8
 9
10
11
12
13
14
15
class KDklDivLoss(KLDivLoss):
    """KL divergence wrapper for knowledge distillation"""

    def __init__(self):
        super(KDklDivLoss, self).__init__(reduction="batchmean")

    def forward(self, student_output, teacher_output):
        return super(KDklDivLoss, self).forward(torch.log_softmax(student_output, dim=1), torch.softmax(teacher_output, dim=1))

LabelSmoothingCrossEntropyLoss

Bases: nn.CrossEntropyLoss

CrossEntropyLoss - with ability to recieve distrbution as targets, and optional label smoothing

Source code in latest/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@register_loss(Losses.CROSS_ENTROPY)
class LabelSmoothingCrossEntropyLoss(nn.CrossEntropyLoss):
    """CrossEntropyLoss - with ability to recieve distrbution as targets, and optional label smoothing"""

    def __init__(self, weight=None, ignore_index=-100, reduction="mean", smooth_eps=None, smooth_dist=None, from_logits=True):
        super(LabelSmoothingCrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction)
        self.smooth_eps = smooth_eps
        self.smooth_dist = smooth_dist
        self.from_logits = from_logits

    def forward(self, input, target, smooth_dist=None):
        if smooth_dist is None:
            smooth_dist = self.smooth_dist
        loss = cross_entropy(
            input,
            target,
            weight=self.weight,
            ignore_index=self.ignore_index,
            reduction=self.reduction,
            smooth_eps=self.smooth_eps,
            smooth_dist=smooth_dist,
            from_logits=self.from_logits,
        )
        # CHANGED TO THE CURRENT FORMAT- OUR CRITERION FUNCTIONS SHOULD ALL NPW RETURN A TUPLE OF (LOSS_FOR_BACKPROP, ADDITIONAL_ITEMS)
        # WHERE ADDITIONAL ITEMS ARE TORCH TENSORS OF SIZE (N_ITEMS,...) DETACHED FROM THEIR GRADIENTS FOR LOGGING
        return loss, loss.unsqueeze(0).detach()

cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mean', smooth_eps=None, smooth_dist=None, from_logits=True)

cross entropy loss, with support for target distributions and label smoothing https://arxiv.org/abs/1512.00567

Source code in latest/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction="mean", smooth_eps=None, smooth_dist=None, from_logits=True):  # noqa: C901
    """cross entropy loss, with support for target distributions and label smoothing https://arxiv.org/abs/1512.00567"""
    smooth_eps = smooth_eps or 0

    # ordinary log-liklihood - use cross_entropy from nn
    if _is_long(target) and smooth_eps == 0:
        if from_logits:
            return F.cross_entropy(inputs, target, weight, ignore_index=ignore_index, reduction=reduction)
        else:
            return F.nll_loss(inputs, target, weight, ignore_index=ignore_index, reduction=reduction)

    if from_logits:
        # log-softmax of inputs
        lsm = F.log_softmax(inputs, dim=-1)
    else:
        lsm = inputs

    masked_indices = None
    num_classes = inputs.size(-1)

    if _is_long(target) and ignore_index >= 0:
        masked_indices = target.eq(ignore_index)

    if smooth_eps > 0 and smooth_dist is not None:
        if _is_long(target):
            target = onehot(target, num_classes).type_as(inputs)
        if smooth_dist.dim() < target.dim():
            smooth_dist = smooth_dist.unsqueeze(0)
        target.lerp_(smooth_dist, smooth_eps)

    if weight is not None:
        lsm = lsm * weight.unsqueeze(0)

    if _is_long(target):
        eps_nll = 1.0 - smooth_eps
        likelihood = lsm.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1)
        loss = -(eps_nll * likelihood + smooth_eps * lsm.mean(-1))
    else:
        loss = -(target * lsm).sum(-1)

    if masked_indices is not None:
        loss.masked_fill_(masked_indices, 0)

    if reduction == "sum":
        loss = loss.sum()
    elif reduction == "mean":
        if masked_indices is None:
            loss = loss.mean()
        else:
            loss = loss.sum() / float(loss.size(0) - masked_indices.sum())

    return loss

onehot(indexes, N=None, ignore_index=None)

Creates a one-hot representation of indexes with N possible entries if N is not specified, it will suit the maximum index appearing. indexes is a long-tensor of indexes ignore_index will be zero in onehot representation

Source code in latest/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def onehot(indexes, N=None, ignore_index=None):
    """
    Creates a one-hot representation of indexes with N possible entries
    if N is not specified, it will suit the maximum index appearing.
    indexes is a long-tensor of indexes
    ignore_index will be zero in onehot representation
    """
    if N is None:
        N = indexes.max() + 1
    sz = list(indexes.size())
    output = indexes.new().byte().resize_(*sz, N).zero_()
    output.scatter_(-1, indexes.unsqueeze(-1), 1)
    if ignore_index is not None and ignore_index >= 0:
        output.masked_fill_(indexes.eq(ignore_index).unsqueeze(-1), 0)
    return output

MaskAttentionLoss

Bases: _Loss

Pixel mask attention loss. For semantic segmentation usages with 4D tensors.

Source code in latest/src/super_gradients/training/losses/mask_loss.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class MaskAttentionLoss(_Loss):
    """
    Pixel mask attention loss. For semantic segmentation usages with 4D tensors.
    """

    def __init__(self, criterion: _Loss, loss_weights: Union[list, tuple] = (1.0, 1.0), reduction: Union[LossReduction, str] = "mean"):
        """
        :param criterion: _Loss object, loss function that apply per pixel cost penalty are supported, i.e
            CrossEntropyLoss, BCEWithLogitsLoss, MSELoss, SL1Loss.
            criterion reduction must be `none`.
        :param loss_weights: Weight to apply for each part of the loss contributions,
            [regular loss, masked loss] respectively.
        :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
            `none`: no reduction will be applied.
            `mean`: the sum of the output will be divided by the number of elements in the output.
            `sum`: the output will be summed.
            Default: `mean`
        """
        super().__init__(reduction=reduction.value if isinstance(reduction, LossReduction) else reduction)
        # Check that the arguments are valid.
        if criterion.reduction != "none":
            raise ValueError(f"criterion reduction must be `none`, for computing the mask contribution loss values," f" found reduction: {criterion.reduction}")
        if len(loss_weights) != 2:
            raise ValueError(f"loss_weights must have 2 values, found: {len(loss_weights)}")
        if loss_weights[1] <= 0:
            raise ValueError("If no loss weight is applied on mask samples, consider using simply criterion")

        self.criterion = criterion
        self.loss_weights = loss_weights

    def forward(self, predict: torch.Tensor, target: torch.Tensor, mask: torch.Tensor):
        criterion_loss = self.criterion(predict, target)

        mask = self._broadcast_mask(mask, criterion_loss.size())
        mask_loss = criterion_loss * mask

        if self.reduction == LossReduction.NONE.value:
            return criterion_loss * self.loss_weights[0] + mask_loss * self.loss_weights[1]
        mask_loss = mask_loss[mask == 1]  # consider only mask samples for mask loss computing
        # If mask doesn't include foreground values, set mask_loss as 0.
        if mask_loss.numel() == 0:
            mask_loss = torch.tensor(0.0)

        mask_loss = apply_reduce(mask_loss, self.reduction)
        criterion_loss = apply_reduce(criterion_loss, self.reduction)

        loss = criterion_loss * self.loss_weights[0] + mask_loss * self.loss_weights[1]
        return loss

    def _broadcast_mask(self, mask: torch.Tensor, size: torch.Size):
        """
        Broadcast the mask tensor before elementwise multiplication.
        """
        # Assert that batch size and spatial size are the same.
        if mask.size()[-2:] != size[-2:] or mask.size(0) != size[0]:
            raise AssertionError(
                "Mask broadcast is allowed only in channels dimension, found shape mismatch between" f"mask shape: {mask.size()}, and target shape: {size}"
            )
        # when mask is [B, 1, H, W] | [B, H, W] and size is [B, H, W]
        # or when mask is [B, 1, H, W] | [B, H, W] and size is [B, 1, H, W]
        if len(size) == 3 or (len(size) == 4 and size[1] == 1):
            mask = mask.view(*size)

        # when mask is [B, C, H, W] | [B, 1, H, W] | [B, H, W] and size is [B, C, H, W]
        else:
            mask = mask if len(mask.size()) == 4 else mask.unsqueeze(1)
            if mask.size(1) not in [1, size[1]]:
                raise AssertionError(
                    f"Broadcast is not allowed, num mask channels must be 1 or same as target channels" f"mask shape: {mask.size()}, and target shape: {size}"
                )
            mask = mask if mask.size() == size else mask.expand(*size)
        return mask

__init__(criterion, loss_weights=(1.0, 1.0), reduction='mean')

Parameters:

Name Type Description Default
criterion _Loss

_Loss object, loss function that apply per pixel cost penalty are supported, i.e CrossEntropyLoss, BCEWithLogitsLoss, MSELoss, SL1Loss. criterion reduction must be none.

required
loss_weights Union[list, tuple]

Weight to apply for each part of the loss contributions, [regular loss, masked loss] respectively.

(1.0, 1.0)
reduction Union[LossReduction, str]

Specifies the reduction to apply to the output: none | mean | sum. none: no reduction will be applied. mean: the sum of the output will be divided by the number of elements in the output. sum: the output will be summed. Default: mean

'mean'
Source code in latest/src/super_gradients/training/losses/mask_loss.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, criterion: _Loss, loss_weights: Union[list, tuple] = (1.0, 1.0), reduction: Union[LossReduction, str] = "mean"):
    """
    :param criterion: _Loss object, loss function that apply per pixel cost penalty are supported, i.e
        CrossEntropyLoss, BCEWithLogitsLoss, MSELoss, SL1Loss.
        criterion reduction must be `none`.
    :param loss_weights: Weight to apply for each part of the loss contributions,
        [regular loss, masked loss] respectively.
    :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
        `none`: no reduction will be applied.
        `mean`: the sum of the output will be divided by the number of elements in the output.
        `sum`: the output will be summed.
        Default: `mean`
    """
    super().__init__(reduction=reduction.value if isinstance(reduction, LossReduction) else reduction)
    # Check that the arguments are valid.
    if criterion.reduction != "none":
        raise ValueError(f"criterion reduction must be `none`, for computing the mask contribution loss values," f" found reduction: {criterion.reduction}")
    if len(loss_weights) != 2:
        raise ValueError(f"loss_weights must have 2 values, found: {len(loss_weights)}")
    if loss_weights[1] <= 0:
        raise ValueError("If no loss weight is applied on mask samples, consider using simply criterion")

    self.criterion = criterion
    self.loss_weights = loss_weights

OhemBCELoss

Bases: OhemLoss

OhemBCELoss - Online Hard Example Mining Binary Cross Entropy Loss

Source code in latest/src/super_gradients/training/losses/ohem_ce_loss.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
class OhemBCELoss(OhemLoss):
    """
    OhemBCELoss - Online Hard Example Mining Binary Cross Entropy Loss
    """

    def __init__(
        self,
        threshold: float,
        mining_percent: float = 0.1,
        ignore_lb: int = -100,
        num_pixels_exclude_ignored: bool = True,
    ):
        super(OhemBCELoss, self).__init__(
            threshold=threshold,
            mining_percent=mining_percent,
            ignore_lb=ignore_lb,
            num_pixels_exclude_ignored=num_pixels_exclude_ignored,
            criteria=nn.BCEWithLogitsLoss(reduction="none"),
        )

    def forward(self, logits, labels):

        # REMOVE SINGLE CLASS CHANNEL WHEN DEALING WITH BINARY DATA
        if logits.shape[1] == 1:
            logits = logits.squeeze(1)
        return super(OhemBCELoss, self).forward(logits, labels.float())

OhemCELoss

Bases: OhemLoss

OhemLoss - Online Hard Example Mining Cross Entropy Loss

Source code in latest/src/super_gradients/training/losses/ohem_ce_loss.py
64
65
66
67
68
69
70
71
72
73
74
class OhemCELoss(OhemLoss):
    """
    OhemLoss - Online Hard Example Mining Cross Entropy Loss
    """

    def __init__(self, threshold: float, mining_percent: float = 0.1, ignore_lb: int = -100, num_pixels_exclude_ignored: bool = True):
        ignore_lb = -100 if ignore_lb is None or ignore_lb < 0 else ignore_lb
        criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction="none")
        super(OhemCELoss, self).__init__(
            threshold=threshold, mining_percent=mining_percent, ignore_lb=ignore_lb, num_pixels_exclude_ignored=num_pixels_exclude_ignored, criteria=criteria
        )

OhemLoss

Bases: _Loss

OhemLoss - Online Hard Example Mining Cross Entropy Loss

Source code in latest/src/super_gradients/training/losses/ohem_ce_loss.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class OhemLoss(_Loss):
    """
    OhemLoss - Online Hard Example Mining Cross Entropy Loss
    """

    def __init__(self, threshold: float, mining_percent: float = 0.1, ignore_lb: int = -100, num_pixels_exclude_ignored: bool = True, criteria: _Loss = None):
        """
        :param threshold: Sample below probability threshold, is considered hard.
        :param num_pixels_exclude_ignored: How to calculate total pixels from which extract mining percent of the
         samples.
        :param ignore_lb: label index to be ignored in loss calculation.
        :param criteria: loss to mine the examples from.

         i.e for num_pixels=100, ignore_pixels=30, mining_percent=0.1:
         num_pixels_exclude_ignored=False => num_mining = 100 * 0.1 = 10
         num_pixels_exclude_ignored=True  => num_mining = (100 - 30) * 0.1 = 7
        """
        super().__init__()

        if mining_percent < 0 or mining_percent > 1:
            raise IllegalRangeForLossAttributeException((0, 1), "mining percent")

        self.thresh = -torch.log(torch.tensor(threshold, dtype=torch.float))
        self.mining_percent = mining_percent
        self.ignore_lb = ignore_lb
        self.num_pixels_exclude_ignored = num_pixels_exclude_ignored

        if criteria.reduction != "none":
            raise RequiredLossComponentReductionException("criteria", criteria.reduction, "none")
        self.criteria = criteria

    def forward(self, logits, labels):
        loss = self.criteria(logits, labels).view(-1)
        if self.num_pixels_exclude_ignored:
            # remove ignore label elements
            loss = loss[labels.view(-1) != self.ignore_lb]
            # num pixels in a batch -> num_pixels = batch_size * width * height - ignore_pixels
            num_pixels = loss.numel()
        else:
            num_pixels = labels.numel()
        # if all pixels are ignore labels, return empty loss tensor
        if num_pixels == 0:
            return torch.tensor([0.0]).requires_grad_(True)

        num_mining = int(self.mining_percent * num_pixels)
        # in case mining_percent=1, prevent out of bound exception
        num_mining = min(num_mining, num_pixels - 1)

        self.thresh = self.thresh.to(logits.device)
        loss, _ = torch.sort(loss, descending=True)
        if loss[num_mining] > self.thresh:
            loss = loss[loss > self.thresh]
        else:
            loss = loss[:num_mining]
        return torch.mean(loss)

__init__(threshold, mining_percent=0.1, ignore_lb=-100, num_pixels_exclude_ignored=True, criteria=None)

Parameters:

Name Type Description Default
threshold float

Sample below probability threshold, is considered hard.

required
num_pixels_exclude_ignored bool

How to calculate total pixels from which extract mining percent of the samples.

True
ignore_lb int

label index to be ignored in loss calculation.

-100
criteria _Loss

loss to mine the examples from. i.e for num_pixels=100, ignore_pixels=30, mining_percent=0.1: num_pixels_exclude_ignored=False => num_mining = 100 * 0.1 = 10 num_pixels_exclude_ignored=True => num_mining = (100 - 30) * 0.1 = 7

None
Source code in latest/src/super_gradients/training/losses/ohem_ce_loss.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, threshold: float, mining_percent: float = 0.1, ignore_lb: int = -100, num_pixels_exclude_ignored: bool = True, criteria: _Loss = None):
    """
    :param threshold: Sample below probability threshold, is considered hard.
    :param num_pixels_exclude_ignored: How to calculate total pixels from which extract mining percent of the
     samples.
    :param ignore_lb: label index to be ignored in loss calculation.
    :param criteria: loss to mine the examples from.

     i.e for num_pixels=100, ignore_pixels=30, mining_percent=0.1:
     num_pixels_exclude_ignored=False => num_mining = 100 * 0.1 = 10
     num_pixels_exclude_ignored=True  => num_mining = (100 - 30) * 0.1 = 7
    """
    super().__init__()

    if mining_percent < 0 or mining_percent > 1:
        raise IllegalRangeForLossAttributeException((0, 1), "mining percent")

    self.thresh = -torch.log(torch.tensor(threshold, dtype=torch.float))
    self.mining_percent = mining_percent
    self.ignore_lb = ignore_lb
    self.num_pixels_exclude_ignored = num_pixels_exclude_ignored

    if criteria.reduction != "none":
        raise RequiredLossComponentReductionException("criteria", criteria.reduction, "none")
    self.criteria = criteria

ATSSAssigner

Bases: nn.Module

Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
class ATSSAssigner(nn.Module):
    """Bridging the Gap Between Anchor-based and Anchor-free Detection
    via Adaptive Training Sample Selection
    """

    __shared__ = ["num_classes"]

    def __init__(self, topk=9, num_classes=80, force_gt_matching=False, eps=1e-9):
        """

        :param topk: Maximum number of achors that is selected for each gt box
        :param num_classes:
        :param force_gt_matching: Guarantee that each gt box is matched to at least one anchor.
            If two gt boxes match to the same anchor, the one with the larger area will be selected.
            And the second-best achnor will be assigned to the other gt box.
        :param eps: Small constant for numerical stability
        """
        super(ATSSAssigner, self).__init__()
        self.topk = topk
        self.num_classes = num_classes
        self.force_gt_matching = force_gt_matching
        self.eps = eps

    def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list, pad_gt_mask):
        gt2anchor_distances_list = torch.split(gt2anchor_distances, num_anchors_list, dim=-1)
        num_anchors_index = np.cumsum(num_anchors_list).tolist()
        num_anchors_index = [
            0,
        ] + num_anchors_index[:-1]
        is_in_topk_list = []
        topk_idxs_list = []
        for distances, anchors_index in zip(gt2anchor_distances_list, num_anchors_index):
            num_anchors = distances.shape[-1]
            _, topk_idxs = torch.topk(distances, self.topk, dim=-1, largest=False)
            topk_idxs_list.append(topk_idxs + anchors_index)
            is_in_topk = torch.nn.functional.one_hot(topk_idxs, num_anchors).sum(dim=-2).type_as(gt2anchor_distances)
            is_in_topk_list.append(is_in_topk * pad_gt_mask)
        is_in_topk_list = torch.cat(is_in_topk_list, dim=-1)
        topk_idxs_list = torch.cat(topk_idxs_list, dim=-1)
        return is_in_topk_list, topk_idxs_list

    @torch.no_grad()
    def forward(
        self,
        anchor_bboxes: Tensor,
        num_anchors_list: list,
        gt_labels: Tensor,
        gt_bboxes: Tensor,
        pad_gt_mask: Tensor,
        bg_index: int,
        gt_scores: Optional[Tensor] = None,
        pred_bboxes: Optional[Tensor] = None,
    ) -> Tuple[Tensor, Tensor, Tensor]:
        """
        This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py

        The assignment is done in following steps
        1. compute iou between all bbox (bbox of all pyramid levels) and gt
        2. compute center distance between all bbox and gt
        3. on each pyramid level, for each gt, select k bbox whose center
           are closest to the gt center, so we total select k*l bbox as
           candidates for each gt
        4. get corresponding iou for the these candidates, and compute the
           mean and std, set mean + std as the iou threshold
        5. select these candidates whose iou are greater than or equal to
           the threshold as positive
        6. limit the positive sample's center in gt
        7. if an anchor box is assigned to multiple gts, the one with the
           highest iou will be selected.

        :param anchor_bboxes:       Tensor(float32) - pre-defined anchors, shape(L, 4), "xmin, xmax, ymin, ymax" format
        :param num_anchors_list:    Number of anchors in each level
        :param gt_labels:           Tensor (int64|int32) - Label of gt_bboxes, shape(B, n, 1)
        :param gt_bboxes:           Tensor (float32) - Ground truth bboxes, shape(B, n, 4)
        :param pad_gt_mask:         Tensor (float32) - 1 means bbox, 0 means no bbox, shape(B, n, 1)
        :param bg_index:            Background index
        :param gt_scores:           Tensor (float32) - Score of gt_bboxes, shape(B, n, 1), if None, then it will initialize with one_hot label
        :param pred_bboxes:         Tensor (float32) - predicted bounding boxes, shape(B, L, 4)
        :return:
            - assigned_labels: Tensor of shape (B, L)
            - assigned_bboxes: Tensor of shape (B, L, 4)
            - assigned_scores: Tensor of shape (B, L, C), if pred_bboxes is not None, then output ious
        """
        assert gt_labels.ndim == gt_bboxes.ndim and gt_bboxes.ndim == 3

        num_anchors, _ = anchor_bboxes.shape
        batch_size, num_max_boxes, _ = gt_bboxes.shape

        # negative batch
        if num_max_boxes == 0:
            assigned_labels = torch.full([batch_size, num_anchors], bg_index, dtype=torch.long, device=anchor_bboxes.device)
            assigned_bboxes = torch.zeros([batch_size, num_anchors, 4], device=anchor_bboxes.device)
            assigned_scores = torch.zeros([batch_size, num_anchors, self.num_classes], device=anchor_bboxes.device)
            return assigned_labels, assigned_bboxes, assigned_scores

        # 1. compute iou between gt and anchor bbox, [B, n, L]
        ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
        ious = ious.reshape([batch_size, -1, num_anchors])

        # 2. compute center distance between all anchors and gt, [B, n, L]
        gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
        anchor_centers = bbox_center(anchor_bboxes)
        # gt2anchor_distances = (
        #     (gt_centers - anchor_centers.unsqueeze(0)).norm(2, dim=-1).reshape([batch_size, -1, num_anchors])
        # )

        gt2anchor_distances = torch.norm(gt_centers - anchor_centers.unsqueeze(0), p=2, dim=-1).reshape([batch_size, -1, num_anchors])

        # 3. on each pyramid level, selecting top-k closest candidates
        # based on the center distance, [B, n, L]
        is_in_topk, topk_idxs = self._gather_topk_pyramid(gt2anchor_distances, num_anchors_list, pad_gt_mask)

        # 4. get corresponding iou for the these candidates, and compute the
        # mean and std, 5. set mean + std as the iou threshold
        iou_candidates = ious * is_in_topk

        iou_threshold = torch.gather(iou_candidates.flatten(end_dim=-2), dim=1, index=topk_idxs.flatten(end_dim=-2))

        iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
        iou_threshold = iou_threshold.mean(dim=-1, keepdim=True) + iou_threshold.std(dim=-1, keepdim=True)
        is_in_topk = torch.where(iou_candidates > iou_threshold, is_in_topk, torch.zeros_like(is_in_topk))

        # 6. check the positive sample's center in gt, [B, n, L]
        is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)

        # select positive sample, [B, n, L]
        mask_positive = is_in_topk * is_in_gts * pad_gt_mask

        # 7. if an anchor box is assigned to multiple gts,
        # the one with the highest iou will be selected.
        mask_positive_sum = mask_positive.sum(dim=-2)
        if mask_positive_sum.max() > 1:
            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile([1, num_max_boxes, 1])
            is_max_iou = compute_max_iou_anchor(ious)
            mask_positive = torch.where(mask_multiple_gts, is_max_iou, mask_positive)
            mask_positive_sum = mask_positive.sum(dim=-2)
        # 8. make sure every gt_bbox matches the anchor
        if self.force_gt_matching:
            is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
            mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile([1, num_max_boxes, 1])
            mask_positive = torch.where(mask_max_iou, is_max_iou, mask_positive)
            mask_positive_sum = mask_positive.sum(dim=-2)
        assigned_gt_index = mask_positive.argmax(dim=-2)

        # assigned target
        batch_ind = torch.arange(end=batch_size, dtype=gt_labels.dtype, device=gt_labels.device).unsqueeze(-1)
        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
        assigned_labels = torch.gather(gt_labels.flatten(), index=assigned_gt_index.flatten(), dim=0)
        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
        assigned_labels = torch.where(mask_positive_sum > 0, assigned_labels, torch.full_like(assigned_labels, bg_index))

        # assigned_bboxes = torch.gather(gt_bboxes.reshape([-1, 4]), index=assigned_gt_index.flatten(), dim=0)
        assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_index.flatten(), :]
        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])

        assigned_scores = torch.nn.functional.one_hot(assigned_labels, self.num_classes + 1).float()
        ind = list(range(self.num_classes + 1))
        ind.remove(bg_index)
        assigned_scores = torch.index_select(assigned_scores, index=torch.tensor(ind, device=assigned_scores.device), dim=-1)
        if pred_bboxes is not None:
            # assigned iou
            ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
            ious = ious.max(dim=-2).values.unsqueeze(-1)
            assigned_scores *= ious
        elif gt_scores is not None:
            gather_scores = torch.gather(gt_scores.flatten(), assigned_gt_index.flatten(), dim=0)
            gather_scores = gather_scores.reshape([batch_size, num_anchors])
            gather_scores = torch.where(mask_positive_sum > 0, gather_scores, torch.zeros_like(gather_scores))
            assigned_scores *= gather_scores.unsqueeze(-1)

        return assigned_labels, assigned_bboxes, assigned_scores

__init__(topk=9, num_classes=80, force_gt_matching=False, eps=1e-09)

Parameters:

Name Type Description Default
topk

Maximum number of achors that is selected for each gt box

9
num_classes 80
force_gt_matching

Guarantee that each gt box is matched to at least one anchor. If two gt boxes match to the same anchor, the one with the larger area will be selected. And the second-best achnor will be assigned to the other gt box.

False
eps

Small constant for numerical stability

1e-09
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def __init__(self, topk=9, num_classes=80, force_gt_matching=False, eps=1e-9):
    """

    :param topk: Maximum number of achors that is selected for each gt box
    :param num_classes:
    :param force_gt_matching: Guarantee that each gt box is matched to at least one anchor.
        If two gt boxes match to the same anchor, the one with the larger area will be selected.
        And the second-best achnor will be assigned to the other gt box.
    :param eps: Small constant for numerical stability
    """
    super(ATSSAssigner, self).__init__()
    self.topk = topk
    self.num_classes = num_classes
    self.force_gt_matching = force_gt_matching
    self.eps = eps

forward(anchor_bboxes, num_anchors_list, gt_labels, gt_bboxes, pad_gt_mask, bg_index, gt_scores=None, pred_bboxes=None)

This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py

The assignment is done in following steps 1. compute iou between all bbox (bbox of all pyramid levels) and gt 2. compute center distance between all bbox and gt 3. on each pyramid level, for each gt, select k bbox whose center are closest to the gt center, so we total select k*l bbox as candidates for each gt 4. get corresponding iou for the these candidates, and compute the mean and std, set mean + std as the iou threshold 5. select these candidates whose iou are greater than or equal to the threshold as positive 6. limit the positive sample's center in gt 7. if an anchor box is assigned to multiple gts, the one with the highest iou will be selected.

Parameters:

Name Type Description Default
anchor_bboxes Tensor

Tensor(float32) - pre-defined anchors, shape(L, 4), "xmin, xmax, ymin, ymax" format

required
num_anchors_list list

Number of anchors in each level

required
gt_labels Tensor

Tensor (int64|int32) - Label of gt_bboxes, shape(B, n, 1)

required
gt_bboxes Tensor

Tensor (float32) - Ground truth bboxes, shape(B, n, 4)

required
pad_gt_mask Tensor

Tensor (float32) - 1 means bbox, 0 means no bbox, shape(B, n, 1)

required
bg_index int

Background index

required
gt_scores Optional[Tensor]

Tensor (float32) - Score of gt_bboxes, shape(B, n, 1), if None, then it will initialize with one_hot label

None
pred_bboxes Optional[Tensor]

Tensor (float32) - predicted bounding boxes, shape(B, L, 4)

None

Returns:

Type Description
Tuple[Tensor, Tensor, Tensor]
  • assigned_labels: Tensor of shape (B, L) - assigned_bboxes: Tensor of shape (B, L, 4) - assigned_scores: Tensor of shape (B, L, C), if pred_bboxes is not None, then output ious
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
@torch.no_grad()
def forward(
    self,
    anchor_bboxes: Tensor,
    num_anchors_list: list,
    gt_labels: Tensor,
    gt_bboxes: Tensor,
    pad_gt_mask: Tensor,
    bg_index: int,
    gt_scores: Optional[Tensor] = None,
    pred_bboxes: Optional[Tensor] = None,
) -> Tuple[Tensor, Tensor, Tensor]:
    """
    This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py

    The assignment is done in following steps
    1. compute iou between all bbox (bbox of all pyramid levels) and gt
    2. compute center distance between all bbox and gt
    3. on each pyramid level, for each gt, select k bbox whose center
       are closest to the gt center, so we total select k*l bbox as
       candidates for each gt
    4. get corresponding iou for the these candidates, and compute the
       mean and std, set mean + std as the iou threshold
    5. select these candidates whose iou are greater than or equal to
       the threshold as positive
    6. limit the positive sample's center in gt
    7. if an anchor box is assigned to multiple gts, the one with the
       highest iou will be selected.

    :param anchor_bboxes:       Tensor(float32) - pre-defined anchors, shape(L, 4), "xmin, xmax, ymin, ymax" format
    :param num_anchors_list:    Number of anchors in each level
    :param gt_labels:           Tensor (int64|int32) - Label of gt_bboxes, shape(B, n, 1)
    :param gt_bboxes:           Tensor (float32) - Ground truth bboxes, shape(B, n, 4)
    :param pad_gt_mask:         Tensor (float32) - 1 means bbox, 0 means no bbox, shape(B, n, 1)
    :param bg_index:            Background index
    :param gt_scores:           Tensor (float32) - Score of gt_bboxes, shape(B, n, 1), if None, then it will initialize with one_hot label
    :param pred_bboxes:         Tensor (float32) - predicted bounding boxes, shape(B, L, 4)
    :return:
        - assigned_labels: Tensor of shape (B, L)
        - assigned_bboxes: Tensor of shape (B, L, 4)
        - assigned_scores: Tensor of shape (B, L, C), if pred_bboxes is not None, then output ious
    """
    assert gt_labels.ndim == gt_bboxes.ndim and gt_bboxes.ndim == 3

    num_anchors, _ = anchor_bboxes.shape
    batch_size, num_max_boxes, _ = gt_bboxes.shape

    # negative batch
    if num_max_boxes == 0:
        assigned_labels = torch.full([batch_size, num_anchors], bg_index, dtype=torch.long, device=anchor_bboxes.device)
        assigned_bboxes = torch.zeros([batch_size, num_anchors, 4], device=anchor_bboxes.device)
        assigned_scores = torch.zeros([batch_size, num_anchors, self.num_classes], device=anchor_bboxes.device)
        return assigned_labels, assigned_bboxes, assigned_scores

    # 1. compute iou between gt and anchor bbox, [B, n, L]
    ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
    ious = ious.reshape([batch_size, -1, num_anchors])

    # 2. compute center distance between all anchors and gt, [B, n, L]
    gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
    anchor_centers = bbox_center(anchor_bboxes)
    # gt2anchor_distances = (
    #     (gt_centers - anchor_centers.unsqueeze(0)).norm(2, dim=-1).reshape([batch_size, -1, num_anchors])
    # )

    gt2anchor_distances = torch.norm(gt_centers - anchor_centers.unsqueeze(0), p=2, dim=-1).reshape([batch_size, -1, num_anchors])

    # 3. on each pyramid level, selecting top-k closest candidates
    # based on the center distance, [B, n, L]
    is_in_topk, topk_idxs = self._gather_topk_pyramid(gt2anchor_distances, num_anchors_list, pad_gt_mask)

    # 4. get corresponding iou for the these candidates, and compute the
    # mean and std, 5. set mean + std as the iou threshold
    iou_candidates = ious * is_in_topk

    iou_threshold = torch.gather(iou_candidates.flatten(end_dim=-2), dim=1, index=topk_idxs.flatten(end_dim=-2))

    iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
    iou_threshold = iou_threshold.mean(dim=-1, keepdim=True) + iou_threshold.std(dim=-1, keepdim=True)
    is_in_topk = torch.where(iou_candidates > iou_threshold, is_in_topk, torch.zeros_like(is_in_topk))

    # 6. check the positive sample's center in gt, [B, n, L]
    is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)

    # select positive sample, [B, n, L]
    mask_positive = is_in_topk * is_in_gts * pad_gt_mask

    # 7. if an anchor box is assigned to multiple gts,
    # the one with the highest iou will be selected.
    mask_positive_sum = mask_positive.sum(dim=-2)
    if mask_positive_sum.max() > 1:
        mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile([1, num_max_boxes, 1])
        is_max_iou = compute_max_iou_anchor(ious)
        mask_positive = torch.where(mask_multiple_gts, is_max_iou, mask_positive)
        mask_positive_sum = mask_positive.sum(dim=-2)
    # 8. make sure every gt_bbox matches the anchor
    if self.force_gt_matching:
        is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
        mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile([1, num_max_boxes, 1])
        mask_positive = torch.where(mask_max_iou, is_max_iou, mask_positive)
        mask_positive_sum = mask_positive.sum(dim=-2)
    assigned_gt_index = mask_positive.argmax(dim=-2)

    # assigned target
    batch_ind = torch.arange(end=batch_size, dtype=gt_labels.dtype, device=gt_labels.device).unsqueeze(-1)
    assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
    assigned_labels = torch.gather(gt_labels.flatten(), index=assigned_gt_index.flatten(), dim=0)
    assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
    assigned_labels = torch.where(mask_positive_sum > 0, assigned_labels, torch.full_like(assigned_labels, bg_index))

    # assigned_bboxes = torch.gather(gt_bboxes.reshape([-1, 4]), index=assigned_gt_index.flatten(), dim=0)
    assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_index.flatten(), :]
    assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])

    assigned_scores = torch.nn.functional.one_hot(assigned_labels, self.num_classes + 1).float()
    ind = list(range(self.num_classes + 1))
    ind.remove(bg_index)
    assigned_scores = torch.index_select(assigned_scores, index=torch.tensor(ind, device=assigned_scores.device), dim=-1)
    if pred_bboxes is not None:
        # assigned iou
        ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
        ious = ious.max(dim=-2).values.unsqueeze(-1)
        assigned_scores *= ious
    elif gt_scores is not None:
        gather_scores = torch.gather(gt_scores.flatten(), assigned_gt_index.flatten(), dim=0)
        gather_scores = gather_scores.reshape([batch_size, num_anchors])
        gather_scores = torch.where(mask_positive_sum > 0, gather_scores, torch.zeros_like(gather_scores))
        assigned_scores *= gather_scores.unsqueeze(-1)

    return assigned_labels, assigned_bboxes, assigned_scores

GIoULoss

Bases: object

Generalized Intersection over Union, see https://arxiv.org/abs/1902.09630

Parameters:

Name Type Description Default
loss_weight float

giou loss weight, default as 1

1.0
eps float

epsilon to avoid divide by zero, default as 1e-10

1e-10
reduction str

Options are "none", "mean" and "sum". default as none

'none'
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
class GIoULoss(object):
    """
    Generalized Intersection over Union, see https://arxiv.org/abs/1902.09630

    :param loss_weight: giou loss weight, default as 1
    :param eps:         epsilon to avoid divide by zero, default as 1e-10
    :param reduction:   Options are "none", "mean" and "sum". default as none
    """

    def __init__(self, loss_weight: float = 1.0, eps: float = 1e-10, reduction: str = "none"):
        self.loss_weight = loss_weight
        self.eps = eps
        assert reduction in ("none", "mean", "sum")
        self.reduction = reduction

    def bbox_overlap(self, box1: Tensor, box2: Tensor, eps: float = 1e-10) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Calculate the iou of box1 and box2.

        :param box1:    box1 with the shape (..., 4)
        :param box2:    box1 with the shape (..., 4)
        :param eps:     epsilon to avoid divide by zero
        :return:
            - iou:      iou of box1 and box2
            - overlap:  overlap of box1 and box2
            - union:    union of box1 and box2
        """
        x1, y1, x2, y2 = box1
        x1g, y1g, x2g, y2g = box2

        xkis1 = torch.maximum(x1, x1g)
        ykis1 = torch.maximum(y1, y1g)
        xkis2 = torch.minimum(x2, x2g)
        ykis2 = torch.minimum(y2, y2g)
        w_inter = (xkis2 - xkis1).clip(0)
        h_inter = (ykis2 - ykis1).clip(0)
        overlap = w_inter * h_inter

        area1 = (x2 - x1) * (y2 - y1)
        area2 = (x2g - x1g) * (y2g - y1g)
        union = area1 + area2 - overlap + eps
        iou = overlap / union

        return iou, overlap, union

    def __call__(self, pbox: Tensor, gbox: Tensor, iou_weight=1.0, loc_reweight=None):
        # x1, y1, x2, y2 = torch.split(pbox, split_size_or_sections=4, dim=-1)
        # x1g, y1g, x2g, y2g = torch.split(gbox, split_size_or_sections=4, dim=-1)

        x1, y1, x2, y2 = pbox.chunk(4, dim=-1)
        x1g, y1g, x2g, y2g = gbox.chunk(4, dim=-1)

        box1 = [x1, y1, x2, y2]
        box2 = [x1g, y1g, x2g, y2g]
        iou, overlap, union = self.bbox_overlap(box1, box2, self.eps)
        xc1 = torch.minimum(x1, x1g)
        yc1 = torch.minimum(y1, y1g)
        xc2 = torch.maximum(x2, x2g)
        yc2 = torch.maximum(y2, y2g)

        area_c = (xc2 - xc1) * (yc2 - yc1) + self.eps
        miou = iou - ((area_c - union) / area_c)
        if loc_reweight is not None:
            loc_reweight = torch.reshape(loc_reweight, shape=(-1, 1))
            loc_thresh = 0.9
            giou = 1 - (1 - loc_thresh) * miou - loc_thresh * miou * loc_reweight
        else:
            giou = 1 - miou
        if self.reduction == "none":
            loss = giou
        elif self.reduction == "sum":
            loss = torch.sum(giou * iou_weight)
        else:
            loss = torch.mean(giou * iou_weight)
        return loss * self.loss_weight

bbox_overlap(box1, box2, eps=1e-10)

Calculate the iou of box1 and box2.

Parameters:

Name Type Description Default
box1 Tensor

box1 with the shape (..., 4)

required
box2 Tensor

box1 with the shape (..., 4)

required
eps float

epsilon to avoid divide by zero

1e-10

Returns:

Type Description
Tuple[Tensor, Tensor, Tensor]
  • iou: iou of box1 and box2 - overlap: overlap of box1 and box2 - union: union of box1 and box2
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
def bbox_overlap(self, box1: Tensor, box2: Tensor, eps: float = 1e-10) -> Tuple[Tensor, Tensor, Tensor]:
    """
    Calculate the iou of box1 and box2.

    :param box1:    box1 with the shape (..., 4)
    :param box2:    box1 with the shape (..., 4)
    :param eps:     epsilon to avoid divide by zero
    :return:
        - iou:      iou of box1 and box2
        - overlap:  overlap of box1 and box2
        - union:    union of box1 and box2
    """
    x1, y1, x2, y2 = box1
    x1g, y1g, x2g, y2g = box2

    xkis1 = torch.maximum(x1, x1g)
    ykis1 = torch.maximum(y1, y1g)
    xkis2 = torch.minimum(x2, x2g)
    ykis2 = torch.minimum(y2, y2g)
    w_inter = (xkis2 - xkis1).clip(0)
    h_inter = (ykis2 - ykis1).clip(0)
    overlap = w_inter * h_inter

    area1 = (x2 - x1) * (y2 - y1)
    area2 = (x2g - x1g) * (y2g - y1g)
    union = area1 + area2 - overlap + eps
    iou = overlap / union

    return iou, overlap, union

PPYoloELoss

Bases: nn.Module

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
@register_loss(Losses.PPYOLOE_LOSS)
class PPYoloELoss(nn.Module):
    def __init__(
        self,
        num_classes: int,
        use_varifocal_loss: bool = True,
        use_static_assigner: bool = True,
        reg_max: int = 16,
        classification_loss_weight: float = 1.0,
        iou_loss_weight: float = 2.5,
        dfl_loss_weight: float = 0.5,
    ):
        """
        :param num_classes: Number of classes
        :param use_varifocal_loss: Whether to use Varifocal loss for classification loss; otherwise use Focal loss
        :param static_assigner_epoch: Whether to use static assigner or Task-Aligned assigner
        :param classification_loss_weight: Classification loss weight
        :param iou_loss_weight: IoU loss weight
        :param dfl_loss_weight: DFL loss weight
        :param reg_max: Number of regression bins (Must match the number of bins in the PPYoloE head)
        """
        super().__init__()
        self.use_varifocal_loss = use_varifocal_loss
        self.classification_loss_weight = classification_loss_weight
        self.dfl_loss_weight = dfl_loss_weight
        self.iou_loss_weight = iou_loss_weight

        self.iou_loss = GIoULoss()
        self.static_assigner = ATSSAssigner(topk=9, num_classes=num_classes)
        self.assigner = TaskAlignedAssigner(topk=13, alpha=1.0, beta=6.0)
        self.use_static_assigner = use_static_assigner
        self.reg_max = reg_max
        self.num_classes = num_classes

        # Same as in PPYoloE head
        proj = torch.linspace(0, self.reg_max, self.reg_max + 1).reshape([1, self.reg_max + 1, 1, 1])
        self.register_buffer("proj_conv", proj)

    @torch.no_grad()
    def _yolox_targets_to_ppyolo(self, targets: torch.Tensor, batch_size: int) -> Mapping[str, torch.Tensor]:
        """
        Convert targets from YoloX format to PPYolo since its the easiest (not the cleanest) way to
        have PP Yolo training & metrics computed

        :param targets: (N, 6) format of bboxes is meant to be LABEL_CXCYWH (index, c, cx, cy, w, h)
        :return: (Dictionary [str,Tensor]) with keys:
         - gt_class: (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
         - gt_bbox: (Tensor, float32): Ground truth bboxes, shape(B, n, 4) in x1y1x2y2 format
         - pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
        """
        image_index = targets[:, 0]
        gt_class = targets[:, 1:2].long()
        gt_bbox = cxcywh_to_xyxy(targets[:, 2:6], image_shape=None)

        per_image_class = []
        per_image_bbox = []
        per_image_pad_mask = []

        max_boxes = 0
        for i in range(batch_size):
            mask = image_index == i

            image_labels = gt_class[mask]
            image_bboxes = gt_bbox[mask, :]
            valid_bboxes = image_bboxes.sum(dim=1, keepdims=True) > 0

            per_image_class.append(image_labels)
            per_image_bbox.append(image_bboxes)
            per_image_pad_mask.append(valid_bboxes)

            max_boxes = max(max_boxes, mask.sum().item())

        for i in range(batch_size):
            elements_to_pad = max_boxes - len(per_image_class[i])
            padding_left = 0
            padding_right = 0
            padding_top = 0
            padding_bottom = elements_to_pad
            pad = padding_left, padding_right, padding_top, padding_bottom
            per_image_class[i] = F.pad(per_image_class[i], pad, mode="constant", value=0)
            per_image_bbox[i] = F.pad(per_image_bbox[i], pad, mode="constant", value=0)
            per_image_pad_mask[i] = F.pad(per_image_pad_mask[i], pad, mode="constant", value=0)

        return {
            "gt_class": torch.stack(per_image_class, dim=0),
            "gt_bbox": torch.stack(per_image_bbox, dim=0),
            "pad_gt_mask": torch.stack(per_image_pad_mask, dim=0),
        }

    def forward(
        self,
        outputs: Union[
            Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor], Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]]
        ],
        targets: Tensor,
    ) -> Mapping[str, Tensor]:
        """
        :param outputs: Tuple of pred_scores, pred_distri, anchors, anchor_points, num_anchors_list, stride_tensor
        :param targets: (Dictionary [str,Tensor]) with keys:
         - gt_class: (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
         - gt_bbox: (Tensor, float32): Ground truth bboxes, shape(B, n, 4) in x1y1x2y2 format
         - pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
        :return:
        """
        # in test/eval mode the model outputs a tuple where the second item is the raw predictions
        if isinstance(outputs, tuple) and len(outputs) == 2:
            # in test/eval mode the Yolo model outputs a tuple where the second item is the raw predictions
            _, predictions = outputs
        else:
            predictions = outputs

        (
            pred_scores,
            pred_distri,
            anchors,
            anchor_points,
            num_anchors_list,
            stride_tensor,
        ) = predictions

        targets = self._yolox_targets_to_ppyolo(targets, batch_size=pred_scores.size(0))  # yolox -> ppyolo

        anchor_points_s = anchor_points / stride_tensor
        pred_bboxes = self._bbox_decode(anchor_points_s, pred_distri)

        gt_labels = targets["gt_class"]
        gt_bboxes = targets["gt_bbox"]
        pad_gt_mask = targets["pad_gt_mask"]
        # label assignment
        if self.use_static_assigner:
            assigned_labels, assigned_bboxes, assigned_scores = self.static_assigner(
                anchor_bboxes=anchors,
                num_anchors_list=num_anchors_list,
                gt_labels=gt_labels,
                gt_bboxes=gt_bboxes,
                pad_gt_mask=pad_gt_mask,
                bg_index=self.num_classes,
                pred_bboxes=pred_bboxes.detach() * stride_tensor,
            )
            alpha_l = 0.25
        else:
            assigned_labels, assigned_bboxes, assigned_scores = self.assigner(
                pred_scores=pred_scores.detach().sigmoid(),  # Pred scores are logits on training for numerical stability
                pred_bboxes=pred_bboxes.detach() * stride_tensor,
                anchor_points=anchor_points,
                num_anchors_list=num_anchors_list,
                gt_labels=gt_labels,
                gt_bboxes=gt_bboxes,
                pad_gt_mask=pad_gt_mask,
                bg_index=self.num_classes,
            )
            alpha_l = -1
        # rescale bbox
        assigned_bboxes /= stride_tensor
        # cls loss
        if self.use_varifocal_loss:
            one_hot_label = torch.nn.functional.one_hot(assigned_labels, self.num_classes + 1)[..., :-1]
            loss_cls = self._varifocal_loss(pred_scores, assigned_scores, one_hot_label)
        else:
            loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l)

        assigned_scores_sum = assigned_scores.sum()
        if super_gradients.is_distributed():
            torch.distributed.all_reduce(assigned_scores_sum, op=torch.distributed.ReduceOp.SUM)
            assigned_scores_sum /= get_world_size()
        assigned_scores_sum = torch.clip(assigned_scores_sum, min=1.0)
        loss_cls /= assigned_scores_sum

        loss_iou, loss_dfl = self._bbox_loss(
            pred_distri,
            pred_bboxes,
            anchor_points_s,
            assigned_labels,
            assigned_bboxes,
            assigned_scores,
            assigned_scores_sum,
        )

        loss = self.classification_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + self.dfl_loss_weight * loss_dfl
        log_losses = torch.stack([loss_cls.detach(), loss_iou.detach(), loss_dfl.detach(), loss.detach()])

        return loss, log_losses

    @property
    def component_names(self):
        return ["loss_cls", "loss_iou", "loss_dfl", "loss"]

    def _df_loss(self, pred_dist: Tensor, target: Tensor) -> Tensor:
        target_left = target.long()
        target_right = target_left + 1
        weight_left = target_right.float() - target
        weight_right = 1 - weight_left

        # [B,L,C] -> [B,C,L] to make compatible with torch.nn.functional.cross_entropy
        # which expects channel dim to be at index 1
        pred_dist = torch.moveaxis(pred_dist, -1, 1)

        loss_left = torch.nn.functional.cross_entropy(pred_dist, target_left, reduction="none") * weight_left
        loss_right = torch.nn.functional.cross_entropy(pred_dist, target_right, reduction="none") * weight_right
        return (loss_left + loss_right).mean(dim=-1, keepdim=True)

    def _bbox_loss(
        self,
        pred_dist,
        pred_bboxes,
        anchor_points,
        assigned_labels,
        assigned_bboxes,
        assigned_scores,
        assigned_scores_sum,
    ):
        # select positive samples mask
        mask_positive = assigned_labels != self.num_classes
        num_pos = mask_positive.sum()
        # pos/neg loss
        if num_pos > 0:
            # l1 + iou
            bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 4])
            pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape([-1, 4])
            assigned_bboxes_pos = torch.masked_select(assigned_bboxes, bbox_mask).reshape([-1, 4])
            bbox_weight = torch.masked_select(assigned_scores.sum(-1), mask_positive).unsqueeze(-1)

            loss_iou = self.iou_loss(pred_bboxes_pos, assigned_bboxes_pos) * bbox_weight
            loss_iou = loss_iou.sum() / assigned_scores_sum

            dist_mask = mask_positive.unsqueeze(-1).tile([1, 1, (self.reg_max + 1) * 4])
            pred_dist_pos = torch.masked_select(pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1])
            assigned_ltrb = self._bbox2distance(anchor_points, assigned_bboxes)
            assigned_ltrb_pos = torch.masked_select(assigned_ltrb, bbox_mask).reshape([-1, 4])
            loss_dfl = self._df_loss(pred_dist_pos, assigned_ltrb_pos) * bbox_weight
            loss_dfl = loss_dfl.sum() / assigned_scores_sum
        else:
            loss_iou = torch.zeros([], device=pred_bboxes.device)
            loss_dfl = pred_dist.sum() * 0.0
        return loss_iou, loss_dfl

    def _bbox_decode(self, anchor_points: Tensor, pred_dist: Tensor):
        b, l, *_ = pred_dist.size()
        pred_dist = torch.softmax(pred_dist.reshape([b, l, 4, self.reg_max + 1]), dim=-1)
        pred_dist = torch.nn.functional.conv2d(pred_dist.permute(0, 3, 1, 2), self.proj_conv).squeeze(1)
        return batch_distance2bbox(anchor_points, pred_dist)

    def _bbox2distance(self, points, bbox):
        x1y1, x2y2 = torch.split(bbox, 2, -1)
        lt = points - x1y1
        rb = x2y2 - points
        return torch.cat([lt, rb], dim=-1).clip(0, self.reg_max - 0.01)

    @staticmethod
    def _focal_loss(pred_logits: Tensor, label: Tensor, alpha=0.25, gamma=2.0) -> Tensor:
        pred_score = pred_logits.sigmoid()
        weight = (pred_score - label).pow(gamma)
        if alpha > 0:
            alpha_t = alpha * label + (1 - alpha) * (1 - label)
            weight *= alpha_t
        loss = -weight * (label * torch.nn.functional.logsigmoid(pred_logits) + (1 - label) * torch.nn.functional.logsigmoid(-pred_logits))
        return loss.sum()

    @staticmethod
    def _varifocal_loss(pred_logits: Tensor, gt_score: Tensor, label: Tensor, alpha=0.75, gamma=2.0) -> Tensor:
        pred_score = pred_logits.sigmoid()
        weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
        loss = -weight * (gt_score * torch.nn.functional.logsigmoid(pred_logits) + (1 - gt_score) * torch.nn.functional.logsigmoid(-pred_logits))
        return loss.sum()

__init__(num_classes, use_varifocal_loss=True, use_static_assigner=True, reg_max=16, classification_loss_weight=1.0, iou_loss_weight=2.5, dfl_loss_weight=0.5)

Parameters:

Name Type Description Default
num_classes int

Number of classes

required
use_varifocal_loss bool

Whether to use Varifocal loss for classification loss; otherwise use Focal loss

True
static_assigner_epoch

Whether to use static assigner or Task-Aligned assigner

required
classification_loss_weight float

Classification loss weight

1.0
iou_loss_weight float

IoU loss weight

2.5
dfl_loss_weight float

DFL loss weight

0.5
reg_max int

Number of regression bins (Must match the number of bins in the PPYoloE head)

16
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
def __init__(
    self,
    num_classes: int,
    use_varifocal_loss: bool = True,
    use_static_assigner: bool = True,
    reg_max: int = 16,
    classification_loss_weight: float = 1.0,
    iou_loss_weight: float = 2.5,
    dfl_loss_weight: float = 0.5,
):
    """
    :param num_classes: Number of classes
    :param use_varifocal_loss: Whether to use Varifocal loss for classification loss; otherwise use Focal loss
    :param static_assigner_epoch: Whether to use static assigner or Task-Aligned assigner
    :param classification_loss_weight: Classification loss weight
    :param iou_loss_weight: IoU loss weight
    :param dfl_loss_weight: DFL loss weight
    :param reg_max: Number of regression bins (Must match the number of bins in the PPYoloE head)
    """
    super().__init__()
    self.use_varifocal_loss = use_varifocal_loss
    self.classification_loss_weight = classification_loss_weight
    self.dfl_loss_weight = dfl_loss_weight
    self.iou_loss_weight = iou_loss_weight

    self.iou_loss = GIoULoss()
    self.static_assigner = ATSSAssigner(topk=9, num_classes=num_classes)
    self.assigner = TaskAlignedAssigner(topk=13, alpha=1.0, beta=6.0)
    self.use_static_assigner = use_static_assigner
    self.reg_max = reg_max
    self.num_classes = num_classes

    # Same as in PPYoloE head
    proj = torch.linspace(0, self.reg_max, self.reg_max + 1).reshape([1, self.reg_max + 1, 1, 1])
    self.register_buffer("proj_conv", proj)

forward(outputs, targets)

Parameters:

Name Type Description Default
outputs Union[Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor], Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]]]

Tuple of pred_scores, pred_distri, anchors, anchor_points, num_anchors_list, stride_tensor

required
targets Tensor

(Dictionary [str,Tensor]) with keys: - gt_class: (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1) - gt_bbox: (Tensor, float32): Ground truth bboxes, shape(B, n, 4) in x1y1x2y2 format - pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)

required

Returns:

Type Description
Mapping[str, Tensor]
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
def forward(
    self,
    outputs: Union[
        Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor], Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]]
    ],
    targets: Tensor,
) -> Mapping[str, Tensor]:
    """
    :param outputs: Tuple of pred_scores, pred_distri, anchors, anchor_points, num_anchors_list, stride_tensor
    :param targets: (Dictionary [str,Tensor]) with keys:
     - gt_class: (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
     - gt_bbox: (Tensor, float32): Ground truth bboxes, shape(B, n, 4) in x1y1x2y2 format
     - pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
    :return:
    """
    # in test/eval mode the model outputs a tuple where the second item is the raw predictions
    if isinstance(outputs, tuple) and len(outputs) == 2:
        # in test/eval mode the Yolo model outputs a tuple where the second item is the raw predictions
        _, predictions = outputs
    else:
        predictions = outputs

    (
        pred_scores,
        pred_distri,
        anchors,
        anchor_points,
        num_anchors_list,
        stride_tensor,
    ) = predictions

    targets = self._yolox_targets_to_ppyolo(targets, batch_size=pred_scores.size(0))  # yolox -> ppyolo

    anchor_points_s = anchor_points / stride_tensor
    pred_bboxes = self._bbox_decode(anchor_points_s, pred_distri)

    gt_labels = targets["gt_class"]
    gt_bboxes = targets["gt_bbox"]
    pad_gt_mask = targets["pad_gt_mask"]
    # label assignment
    if self.use_static_assigner:
        assigned_labels, assigned_bboxes, assigned_scores = self.static_assigner(
            anchor_bboxes=anchors,
            num_anchors_list=num_anchors_list,
            gt_labels=gt_labels,
            gt_bboxes=gt_bboxes,
            pad_gt_mask=pad_gt_mask,
            bg_index=self.num_classes,
            pred_bboxes=pred_bboxes.detach() * stride_tensor,
        )
        alpha_l = 0.25
    else:
        assigned_labels, assigned_bboxes, assigned_scores = self.assigner(
            pred_scores=pred_scores.detach().sigmoid(),  # Pred scores are logits on training for numerical stability
            pred_bboxes=pred_bboxes.detach() * stride_tensor,
            anchor_points=anchor_points,
            num_anchors_list=num_anchors_list,
            gt_labels=gt_labels,
            gt_bboxes=gt_bboxes,
            pad_gt_mask=pad_gt_mask,
            bg_index=self.num_classes,
        )
        alpha_l = -1
    # rescale bbox
    assigned_bboxes /= stride_tensor
    # cls loss
    if self.use_varifocal_loss:
        one_hot_label = torch.nn.functional.one_hot(assigned_labels, self.num_classes + 1)[..., :-1]
        loss_cls = self._varifocal_loss(pred_scores, assigned_scores, one_hot_label)
    else:
        loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l)

    assigned_scores_sum = assigned_scores.sum()
    if super_gradients.is_distributed():
        torch.distributed.all_reduce(assigned_scores_sum, op=torch.distributed.ReduceOp.SUM)
        assigned_scores_sum /= get_world_size()
    assigned_scores_sum = torch.clip(assigned_scores_sum, min=1.0)
    loss_cls /= assigned_scores_sum

    loss_iou, loss_dfl = self._bbox_loss(
        pred_distri,
        pred_bboxes,
        anchor_points_s,
        assigned_labels,
        assigned_bboxes,
        assigned_scores,
        assigned_scores_sum,
    )

    loss = self.classification_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + self.dfl_loss_weight * loss_dfl
    log_losses = torch.stack([loss_cls.detach(), loss_iou.detach(), loss_dfl.detach(), loss.detach()])

    return loss, log_losses

TaskAlignedAssigner

Bases: nn.Module

TOOD: Task-aligned One-stage Object Detection

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
class TaskAlignedAssigner(nn.Module):
    """TOOD: Task-aligned One-stage Object Detection"""

    def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
        """

        :param topk: Maximum number of achors that is selected for each gt box
        :param alpha: Power factor for class probabilities of predicted boxes (Used compute alignment metric)
        :param beta: Power factor for IoU score of predicted boxes (Used compute alignment metric)
        :param eps: Small constant for numerical stability
        """
        super(TaskAlignedAssigner, self).__init__()
        self.topk = topk
        self.alpha = alpha
        self.beta = beta
        self.eps = eps

    @torch.no_grad()
    def forward(
        self,
        pred_scores: Tensor,
        pred_bboxes: Tensor,
        anchor_points: Tensor,
        num_anchors_list: list,
        gt_labels: Tensor,
        gt_bboxes: Tensor,
        pad_gt_mask: Tensor,
        bg_index: int,
        gt_scores: Optional[Tensor] = None,
    ):
        """
        This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py

        The assignment is done in following steps
        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
        2. select top-k bbox as candidates for each gt
        3. limit the positive sample's center in gt (because the anchor-free detector
           only can predict positive distance)
        4. if an anchor box is assigned to multiple gts, the one with the
           highest iou will be selected.

        :param pred_scores: Tensor (float32): predicted class probability, shape(B, L, C)
        :param pred_bboxes: Tensor (float32): predicted bounding boxes, shape(B, L, 4)
        :param anchor_points: Tensor (float32): pre-defined anchors, shape(L, 2), "cxcy" format
        :param num_anchors_list: List ( num of anchors in each level, shape(L)
        :param gt_labels: Tensor (int64|int32): Label of gt_bboxes, shape(B, n, 1)
        :param gt_bboxes: Tensor (float32): Ground truth bboxes, shape(B, n, 4)
        :param pad_gt_mask: Tensor (float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
        :param bg_index: int ( background index
        :param gt_scores: Tensor (one, float32) Score of gt_bboxes, shape(B, n, 1)
        :return:
            - assigned_labels, Tensor of shape (B, L)
            - assigned_bboxes, Tensor of shape (B, L, 4)
            - assigned_scores, Tensor of shape (B, L, C)
        """
        assert pred_scores.ndim == pred_bboxes.ndim
        assert gt_labels.ndim == gt_bboxes.ndim and gt_bboxes.ndim == 3

        batch_size, num_anchors, num_classes = pred_scores.shape
        _, num_max_boxes, _ = gt_bboxes.shape

        # negative batch
        if num_max_boxes == 0:
            assigned_labels = torch.full([batch_size, num_anchors], bg_index, dtype=torch.long, device=gt_labels.device)
            assigned_bboxes = torch.zeros([batch_size, num_anchors, 4], device=gt_labels.device)
            assigned_scores = torch.zeros([batch_size, num_anchors, num_classes], device=gt_labels.device)
            return assigned_labels, assigned_bboxes, assigned_scores

        # compute iou between gt and pred bbox, [B, n, L]
        ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
        # gather pred bboxes class score
        pred_scores = torch.permute(pred_scores, [0, 2, 1])
        batch_ind = torch.arange(end=batch_size, dtype=gt_labels.dtype, device=gt_labels.device).unsqueeze(-1)
        gt_labels_ind = torch.stack([batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)], dim=-1)

        bbox_cls_scores = pred_scores[gt_labels_ind[..., 0], gt_labels_ind[..., 1]]

        # compute alignment metrics, [B, n, L]
        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(self.beta)

        # check the positive sample's center in gt, [B, n, L]
        is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)

        # select topk largest alignment metrics pred bbox as candidates
        # for each gt, [B, n, L]
        is_in_topk = gather_topk_anchors(alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)

        # select positive sample, [B, n, L]
        mask_positive = is_in_topk * is_in_gts * pad_gt_mask

        # if an anchor box is assigned to multiple gts,
        # the one with the highest iou will be selected, [B, n, L]
        mask_positive_sum = mask_positive.sum(dim=-2)
        if mask_positive_sum.max() > 1:
            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile([1, num_max_boxes, 1])
            is_max_iou = compute_max_iou_anchor(ious)
            mask_positive = torch.where(mask_multiple_gts, is_max_iou, mask_positive)
            mask_positive_sum = mask_positive.sum(dim=-2)
        assigned_gt_index = mask_positive.argmax(dim=-2)

        # assigned target
        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
        assigned_labels = torch.gather(gt_labels.flatten(), index=assigned_gt_index.flatten(), dim=0)
        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
        assigned_labels = torch.where(mask_positive_sum > 0, assigned_labels, torch.full_like(assigned_labels, bg_index))

        assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_index.flatten(), :]
        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])

        assigned_scores = torch.nn.functional.one_hot(assigned_labels, num_classes + 1)
        ind = list(range(num_classes + 1))
        ind.remove(bg_index)
        assigned_scores = torch.index_select(assigned_scores, index=torch.tensor(ind, device=assigned_scores.device, dtype=torch.long), dim=-1)
        # rescale alignment metrics
        alignment_metrics *= mask_positive
        max_metrics_per_instance = alignment_metrics.max(dim=-1, keepdim=True).values
        max_ious_per_instance = (ious * mask_positive).max(dim=-1, keepdim=True).values
        alignment_metrics = alignment_metrics / (max_metrics_per_instance + self.eps) * max_ious_per_instance
        alignment_metrics = alignment_metrics.max(dim=-2).values.unsqueeze(-1)
        assigned_scores = assigned_scores * alignment_metrics

        return assigned_labels, assigned_bboxes, assigned_scores

__init__(topk=13, alpha=1.0, beta=6.0, eps=1e-09)

Parameters:

Name Type Description Default
topk

Maximum number of achors that is selected for each gt box

13
alpha

Power factor for class probabilities of predicted boxes (Used compute alignment metric)

1.0
beta

Power factor for IoU score of predicted boxes (Used compute alignment metric)

6.0
eps

Small constant for numerical stability

1e-09
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
433
434
435
436
437
438
439
440
441
442
443
444
445
def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
    """

    :param topk: Maximum number of achors that is selected for each gt box
    :param alpha: Power factor for class probabilities of predicted boxes (Used compute alignment metric)
    :param beta: Power factor for IoU score of predicted boxes (Used compute alignment metric)
    :param eps: Small constant for numerical stability
    """
    super(TaskAlignedAssigner, self).__init__()
    self.topk = topk
    self.alpha = alpha
    self.beta = beta
    self.eps = eps

forward(pred_scores, pred_bboxes, anchor_points, num_anchors_list, gt_labels, gt_bboxes, pad_gt_mask, bg_index, gt_scores=None)

This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py

The assignment is done in following steps 1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt 2. select top-k bbox as candidates for each gt 3. limit the positive sample's center in gt (because the anchor-free detector only can predict positive distance) 4. if an anchor box is assigned to multiple gts, the one with the highest iou will be selected.

Parameters:

Name Type Description Default
pred_scores Tensor

Tensor (float32): predicted class probability, shape(B, L, C)

required
pred_bboxes Tensor

Tensor (float32): predicted bounding boxes, shape(B, L, 4)

required
anchor_points Tensor

Tensor (float32): pre-defined anchors, shape(L, 2), "cxcy" format

required
num_anchors_list list

List ( num of anchors in each level, shape(L)

required
gt_labels Tensor

Tensor (int64|int32): Label of gt_bboxes, shape(B, n, 1)

required
gt_bboxes Tensor

Tensor (float32): Ground truth bboxes, shape(B, n, 4)

required
pad_gt_mask Tensor

Tensor (float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)

required
bg_index int

int ( background index

required
gt_scores Optional[Tensor]

Tensor (one, float32) Score of gt_bboxes, shape(B, n, 1)

None

Returns:

Type Description
  • assigned_labels, Tensor of shape (B, L) - assigned_bboxes, Tensor of shape (B, L, 4) - assigned_scores, Tensor of shape (B, L, C)
Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
@torch.no_grad()
def forward(
    self,
    pred_scores: Tensor,
    pred_bboxes: Tensor,
    anchor_points: Tensor,
    num_anchors_list: list,
    gt_labels: Tensor,
    gt_bboxes: Tensor,
    pad_gt_mask: Tensor,
    bg_index: int,
    gt_scores: Optional[Tensor] = None,
):
    """
    This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py

    The assignment is done in following steps
    1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
    2. select top-k bbox as candidates for each gt
    3. limit the positive sample's center in gt (because the anchor-free detector
       only can predict positive distance)
    4. if an anchor box is assigned to multiple gts, the one with the
       highest iou will be selected.

    :param pred_scores: Tensor (float32): predicted class probability, shape(B, L, C)
    :param pred_bboxes: Tensor (float32): predicted bounding boxes, shape(B, L, 4)
    :param anchor_points: Tensor (float32): pre-defined anchors, shape(L, 2), "cxcy" format
    :param num_anchors_list: List ( num of anchors in each level, shape(L)
    :param gt_labels: Tensor (int64|int32): Label of gt_bboxes, shape(B, n, 1)
    :param gt_bboxes: Tensor (float32): Ground truth bboxes, shape(B, n, 4)
    :param pad_gt_mask: Tensor (float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
    :param bg_index: int ( background index
    :param gt_scores: Tensor (one, float32) Score of gt_bboxes, shape(B, n, 1)
    :return:
        - assigned_labels, Tensor of shape (B, L)
        - assigned_bboxes, Tensor of shape (B, L, 4)
        - assigned_scores, Tensor of shape (B, L, C)
    """
    assert pred_scores.ndim == pred_bboxes.ndim
    assert gt_labels.ndim == gt_bboxes.ndim and gt_bboxes.ndim == 3

    batch_size, num_anchors, num_classes = pred_scores.shape
    _, num_max_boxes, _ = gt_bboxes.shape

    # negative batch
    if num_max_boxes == 0:
        assigned_labels = torch.full([batch_size, num_anchors], bg_index, dtype=torch.long, device=gt_labels.device)
        assigned_bboxes = torch.zeros([batch_size, num_anchors, 4], device=gt_labels.device)
        assigned_scores = torch.zeros([batch_size, num_anchors, num_classes], device=gt_labels.device)
        return assigned_labels, assigned_bboxes, assigned_scores

    # compute iou between gt and pred bbox, [B, n, L]
    ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
    # gather pred bboxes class score
    pred_scores = torch.permute(pred_scores, [0, 2, 1])
    batch_ind = torch.arange(end=batch_size, dtype=gt_labels.dtype, device=gt_labels.device).unsqueeze(-1)
    gt_labels_ind = torch.stack([batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)], dim=-1)

    bbox_cls_scores = pred_scores[gt_labels_ind[..., 0], gt_labels_ind[..., 1]]

    # compute alignment metrics, [B, n, L]
    alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(self.beta)

    # check the positive sample's center in gt, [B, n, L]
    is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)

    # select topk largest alignment metrics pred bbox as candidates
    # for each gt, [B, n, L]
    is_in_topk = gather_topk_anchors(alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)

    # select positive sample, [B, n, L]
    mask_positive = is_in_topk * is_in_gts * pad_gt_mask

    # if an anchor box is assigned to multiple gts,
    # the one with the highest iou will be selected, [B, n, L]
    mask_positive_sum = mask_positive.sum(dim=-2)
    if mask_positive_sum.max() > 1:
        mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile([1, num_max_boxes, 1])
        is_max_iou = compute_max_iou_anchor(ious)
        mask_positive = torch.where(mask_multiple_gts, is_max_iou, mask_positive)
        mask_positive_sum = mask_positive.sum(dim=-2)
    assigned_gt_index = mask_positive.argmax(dim=-2)

    # assigned target
    assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
    assigned_labels = torch.gather(gt_labels.flatten(), index=assigned_gt_index.flatten(), dim=0)
    assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
    assigned_labels = torch.where(mask_positive_sum > 0, assigned_labels, torch.full_like(assigned_labels, bg_index))

    assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_index.flatten(), :]
    assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])

    assigned_scores = torch.nn.functional.one_hot(assigned_labels, num_classes + 1)
    ind = list(range(num_classes + 1))
    ind.remove(bg_index)
    assigned_scores = torch.index_select(assigned_scores, index=torch.tensor(ind, device=assigned_scores.device, dtype=torch.long), dim=-1)
    # rescale alignment metrics
    alignment_metrics *= mask_positive
    max_metrics_per_instance = alignment_metrics.max(dim=-1, keepdim=True).values
    max_ious_per_instance = (ious * mask_positive).max(dim=-1, keepdim=True).values
    alignment_metrics = alignment_metrics / (max_metrics_per_instance + self.eps) * max_ious_per_instance
    alignment_metrics = alignment_metrics.max(dim=-2).values.unsqueeze(-1)
    assigned_scores = assigned_scores * alignment_metrics

    return assigned_labels, assigned_bboxes, assigned_scores

batch_iou_similarity(box1, box2, eps=1e-09)

Calculate iou of box1 and box2 in batch. Bboxes are expected to be in x1y1x2y2 format.

Parameters:

Name Type Description Default
box1 torch.Tensor

box with the shape [N, M1, 4]

required
box2 torch.Tensor

box with the shape [N, M2, 4]

required

Returns:

Type Description
float

iou between box1 and box2 with the shape [N, M1, M2]

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def batch_iou_similarity(box1: torch.Tensor, box2: torch.Tensor, eps: float = 1e-9) -> float:
    """Calculate iou of box1 and box2 in batch. Bboxes are expected to be in x1y1x2y2 format.

    :param box1: box with the shape [N, M1, 4]
    :param box2: box with the shape [N, M2, 4]
    :return iou: iou between box1 and box2 with the shape [N, M1, M2]

    """
    box1 = box1.unsqueeze(2)  # [N, M1, 4] -> [N, M1, 1, 4]
    box2 = box2.unsqueeze(1)  # [N, M2, 4] -> [N, 1, M2, 4]
    px1y1, px2y2 = box1[:, :, :, 0:2], box1[:, :, :, 2:4]
    gx1y1, gx2y2 = box2[:, :, :, 0:2], box2[:, :, :, 2:4]
    x1y1 = torch.maximum(px1y1, gx1y1)
    x2y2 = torch.minimum(px2y2, gx2y2)
    overlap = (x2y2 - x1y1).clip(0).prod(-1)
    area1 = (px2y2 - px1y1).clip(0).prod(-1)
    area2 = (gx2y2 - gx1y1).clip(0).prod(-1)
    union = area1 + area2 - overlap + eps
    return overlap / union

bbox_center(boxes)

Get bbox centers from boxes.

Parameters:

Name Type Description Default
boxes Tensor

Boxes with shape (..., 4), "xmin, ymin, xmax, ymax" format.

required

Returns:

Type Description
Tensor

Boxes centers with shape (..., 2), "cx, cy" format.

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
232
233
234
235
236
237
238
239
240
241
def bbox_center(boxes: Tensor) -> Tensor:
    """
    Get bbox centers from boxes.

    :param boxes:   Boxes with shape (..., 4), "xmin, ymin, xmax, ymax" format.
    :return:        Boxes centers with shape (..., 2), "cx, cy" format.
    """
    boxes_cx = (boxes[..., 0] + boxes[..., 2]) / 2
    boxes_cy = (boxes[..., 1] + boxes[..., 3]) / 2
    return torch.stack([boxes_cx, boxes_cy], dim=-1)

bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-06)

Calculate overlap between two set of bboxes.

If is_aligned is False, then calculate the overlaps between each bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned pair of bboxes1 and bboxes2.

Parameters:

Name Type Description Default
bboxes1 torch.Tensor

shape (B, m, 4) in format or empty.

required
bboxes2 torch.Tensor

shape (B, n, 4) in format or empty. B indicates the batch dim, in shape (B1, B2, ..., Bn). If is_aligned is True, then m and n must be equal.

required
mode str

Either "iou" (intersection over union) or "iof" (intersection over foreground).

'iou'
is_aligned bool

If True, then m and n must be equal. Default False.

False
eps float

A value added to the denominator for numerical stability. Default 1e-6.

1e-06

Returns:

Type Description
torch.Tensor

Tensor of shape (m, n) if is_aligned is False else shape (m,)

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def bbox_overlaps(bboxes1: torch.Tensor, bboxes2: torch.Tensor, mode: str = "iou", is_aligned: bool = False, eps: float = 1e-6) -> torch.Tensor:
    """
    Calculate overlap between two set of bboxes.

    If ``is_aligned `` is ``False``, then calculate the overlaps between each
    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
    pair of bboxes1 and bboxes2.

    :param bboxes1:     shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
    :param bboxes2:     shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
                                B indicates the batch dim, in shape (B1, B2, ..., Bn).
                                If ``is_aligned `` is ``True``, then m and n must be equal.
    :param mode:        Either "iou" (intersection over union) or "iof" (intersection over foreground).
    :param is_aligned:  If True, then m and n must be equal. Default False.
    :param eps:         A value added to the denominator for numerical stability. Default 1e-6.
    :return:            Tensor of shape (m, n) if ``is_aligned `` is False else shape (m,)
    """
    assert mode in ["iou", "iof", "giou"], "Unsupported mode {}".format(mode)
    # Either the boxes are empty or the length of boxes's last dimenstion is 4
    assert bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0
    assert bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0

    # Batch dim must be the same
    # Batch dim: (B1, B2, ... Bn)
    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
    batch_shape = bboxes1.shape[:-2]

    rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
    cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
    if is_aligned:
        assert rows == cols

    if rows * cols == 0:
        if is_aligned:
            return np.random.random(batch_shape + (rows,))
        else:
            return np.random.random(batch_shape + (rows, cols))

    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])

    if is_aligned:
        lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]
        rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]

        wh = (rb - lt).clip(min=0)  # [B, rows, 2]
        overlap = wh[..., 0] * wh[..., 1]

        if mode in ["iou", "giou"]:
            union = area1 + area2 - overlap
        else:
            union = area1
        if mode == "giou":
            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
    else:
        lt = np.maximum(bboxes1[..., :, None, :2], bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
        rb = np.minimum(bboxes1[..., :, None, 2:], bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]

        wh = (rb - lt).clip(min=0)  # [B, rows, cols, 2]
        overlap = wh[..., 0] * wh[..., 1]

        if mode in ["iou", "giou"]:
            union = area1[..., None] + area2[..., None, :] - overlap
        else:
            union = area1[..., None]
        if mode == "giou":
            enclosed_lt = np.minimum(bboxes1[..., :, None, :2], bboxes2[..., None, :, :2])
            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:], bboxes2[..., None, :, 2:])

    eps = np.array([eps])
    union = np.maximum(union, eps)
    ious = overlap / union
    if mode in ["iou", "iof"]:
        return ious
    # calculate gious
    enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
    enclose_area = np.maximum(enclose_area, eps)
    gious = ious - (enclose_area - union) / enclose_area
    return gious

check_points_inside_bboxes(points, bboxes, center_radius_tensor=None, eps=1e-09)

Parameters:

Name Type Description Default
points Tensor

Tensor (float32) of shape[L, 2], "xy" format, L: num_anchors

required
bboxes Tensor

Tensor (float32) of shape[B, n, 4], "xmin, ymin, xmax, ymax" format

required
center_radius_tensor Optional[Tensor]

Tensor (float32) of shape [L, 1]. Default: None.

None
eps float

Default: 1e-9

1e-09

Returns:

Type Description
Tensor

Tensor (float32) of shape[B, n, L], value=1. means selected

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def check_points_inside_bboxes(points: Tensor, bboxes: Tensor, center_radius_tensor: Optional[Tensor] = None, eps: float = 1e-9) -> Tensor:
    """

    :param points:                  Tensor (float32) of shape[L, 2], "xy" format, L: num_anchors
    :param bboxes:                  Tensor (float32) of shape[B, n, 4], "xmin, ymin, xmax, ymax" format
    :param center_radius_tensor:    Tensor (float32) of shape [L, 1]. Default: None.
    :param eps:                     Default: 1e-9

    :return is_in_bboxes: Tensor (float32) of shape[B, n, L], value=1. means selected
    """
    points = points.unsqueeze(0).unsqueeze(0)
    x, y = points.chunk(2, dim=-1)
    xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, dim=-1)
    # check whether `points` is in `bboxes`
    left = x - xmin
    top = y - ymin
    right = xmax - x
    bottom = ymax - y
    delta_ltrb = torch.cat([left, top, right, bottom], dim=-1)
    is_in_bboxes = delta_ltrb.min(dim=-1).values > eps
    if center_radius_tensor is not None:
        # check whether `points` is in `center_radius`
        center_radius_tensor = center_radius_tensor.unsqueeze(0).unsqueeze(0)
        cx = (xmin + xmax) * 0.5
        cy = (ymin + ymax) * 0.5
        left = x - (cx - center_radius_tensor)
        top = y - (cy - center_radius_tensor)
        right = (cx + center_radius_tensor) - x
        bottom = (cy + center_radius_tensor) - y
        delta_ltrb_c = torch.cat([left, top, right, bottom], dim=-1)
        is_in_center = delta_ltrb_c.min(dim=-1) > eps
        return (torch.logical_and(is_in_bboxes, is_in_center), torch.logical_or(is_in_bboxes, is_in_center))

    return is_in_bboxes.type_as(bboxes)

compute_max_iou_anchor(ious)

For each anchor, find the GT with the largest IOU.

Parameters:

Name Type Description Default
ious Tensor

Tensor (float32) of shape[B, n, L], n: num_gts, L: num_anchors

required

Returns:

Type Description
Tensor

is_max_iou is Tensor (float32) of shape[B, n, L], value=1. means selected

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
164
165
166
167
168
169
170
171
172
173
174
def compute_max_iou_anchor(ious: Tensor) -> Tensor:
    r"""
    For each anchor, find the GT with the largest IOU.

    :param ious: Tensor (float32) of shape[B, n, L], n: num_gts, L: num_anchors
    :return: is_max_iou is Tensor (float32) of shape[B, n, L], value=1. means selected
    """
    num_max_boxes = ious.shape[-2]
    max_iou_index = ious.argmax(dim=-2)
    is_max_iou: Tensor = torch.nn.functional.one_hot(max_iou_index, num_max_boxes).permute([0, 2, 1])
    return is_max_iou.type_as(ious)

compute_max_iou_gt(ious)

For each GT, find the anchor with the largest IOU.

Parameters:

Name Type Description Default
ious Tensor

Tensor (float32) of shape[B, n, L], n: num_gts, L: num_anchors

required

Returns:

Type Description
Tensor

is_max_iou, Tensor (float32) of shape[B, n, L], value=1. means selected

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
244
245
246
247
248
249
250
251
252
253
254
def compute_max_iou_gt(ious: Tensor) -> Tensor:
    """
    For each GT, find the anchor with the largest IOU.

    :param ious: Tensor (float32) of shape[B, n, L], n: num_gts, L: num_anchors
    :return:    is_max_iou, Tensor (float32) of shape[B, n, L], value=1. means selected
    """
    num_anchors = ious.shape[-1]
    max_iou_index = ious.argmax(dim=-1)
    is_max_iou = torch.nn.functional.one_hot(max_iou_index, num_anchors)
    return is_max_iou.astype(ious.dtype)

gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-09)

Parameters:

Name Type Description Default
metrics Tensor

Tensor(float32) of shape[B, n, L], n: num_gts, L: num_anchors

required
topk int

The number of top elements to look for along the axis.

required
largest bool

If set to true, algorithm will sort by descending order, otherwise sort by ascending order.

True
topk_mask Optional[Tensor]

Tensor(float32) of shape[B, n, 1], ignore bbox mask,

None
eps float

Default: 1e-9

1e-09

Returns:

Type Description
Tensor

is_in_topk, Tensor (float32) of shape[B, n, L], value=1. means selected

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def gather_topk_anchors(metrics: Tensor, topk: int, largest: bool = True, topk_mask: Optional[Tensor] = None, eps: float = 1e-9) -> Tensor:
    """

    :param metrics:     Tensor(float32) of shape[B, n, L], n: num_gts, L: num_anchors
    :param topk:        The number of top elements to look for along the axis.
    :param largest:     If set to true, algorithm will sort by descending order, otherwise sort by ascending order.
    :param topk_mask:   Tensor(float32) of shape[B, n, 1], ignore bbox mask,
    :param eps:         Default: 1e-9

    :return: is_in_topk, Tensor (float32) of shape[B, n, L], value=1. means selected
    """
    num_anchors = metrics.shape[-1]
    topk_metrics, topk_idxs = torch.topk(metrics, topk, dim=-1, largest=largest)
    if topk_mask is None:
        topk_mask = (topk_metrics.max(dim=-1, keepdim=True).values > eps).type_as(metrics)
    is_in_topk = torch.nn.functional.one_hot(topk_idxs, num_anchors).sum(dim=-2).type_as(metrics)
    return is_in_topk * topk_mask

iou_similarity(box1, box2, eps=1e-10)

Calculate iou of box1 and box2. Bboxes are expected to be in x1y1x2y2 format.

Parameters:

Name Type Description Default
box1 torch.Tensor

box with the shape [M1, 4]

required
box2 torch.Tensor

box with the shape [M2, 4]

required

Returns:

Type Description
float

iou between box1 and box2 with the shape [M1, M2]

Source code in latest/src/super_gradients/training/losses/ppyolo_loss.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def iou_similarity(box1: torch.Tensor, box2: torch.Tensor, eps: float = 1e-10) -> float:
    """
    Calculate iou of box1 and box2. Bboxes are expected to be in x1y1x2y2 format.

    :param box1: box with the shape [M1, 4]
    :param box2: box with the shape [M2, 4]

    :return iou: iou between box1 and box2 with the shape [M1, M2]
    """
    box1 = box1.unsqueeze(1)  # [M1, 4] -> [M1, 1, 4]
    box2 = box2.unsqueeze(0)  # [M2, 4] -> [1, M2, 4]
    px1y1, px2y2 = box1[:, :, 0:2], box1[:, :, 2:4]
    gx1y1, gx2y2 = box2[:, :, 0:2], box2[:, :, 2:4]
    x1y1 = torch.maximum(px1y1, gx1y1)
    x2y2 = torch.minimum(px2y2, gx2y2)
    overlap = (x2y2 - x1y1).clip(0).prod(-1)
    area1 = (px2y2 - px1y1).clip(0).prod(-1)
    area2 = (gx2y2 - gx1y1).clip(0).prod(-1)
    union = area1 + area2 - overlap + eps
    return overlap / union

RSquaredLoss

Bases: _Loss

Source code in latest/src/super_gradients/training/losses/r_squared_loss.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
@register_loss(Losses.R_SQUARED_LOSS)
class RSquaredLoss(_Loss):
    def forward(self, output, target):
        # FIXME - THIS NEEDS TO BE CHANGED SUCH THAT THIS CLASS INHERETS FROM _Loss (TAKE A LOOK AT YoLoV3DetectionLoss)
        """Computes the R-squared for the output and target values
        :param output: Tensor / Numpy / List
            The prediction
        :param target: Tensor / Numpy / List
            The corresponding lables
        """
        # Convert to tensor
        output = convert_to_tensor(output)
        target = convert_to_tensor(target)

        criterion_mse = nn.MSELoss()
        return 1 - criterion_mse(output, target).item() / torch.var(target).item()

forward(output, target)

Computes the R-squared for the output and target values

Parameters:

Name Type Description Default
output

Tensor / Numpy / List The prediction

required
target

Tensor / Numpy / List The corresponding lables

required
Source code in latest/src/super_gradients/training/losses/r_squared_loss.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def forward(self, output, target):
    # FIXME - THIS NEEDS TO BE CHANGED SUCH THAT THIS CLASS INHERETS FROM _Loss (TAKE A LOOK AT YoLoV3DetectionLoss)
    """Computes the R-squared for the output and target values
    :param output: Tensor / Numpy / List
        The prediction
    :param target: Tensor / Numpy / List
        The corresponding lables
    """
    # Convert to tensor
    output = convert_to_tensor(output)
    target = convert_to_tensor(target)

    criterion_mse = nn.MSELoss()
    return 1 - criterion_mse(output, target).item() / torch.var(target).item()

RescoringLoss

Bases: nn.Module

Source code in latest/src/super_gradients/training/losses/rescoring_loss.py
10
11
12
13
14
15
16
17
18
19
20
21
22
@register_loss(Losses.RESCORING_LOSS)
class RescoringLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, predictions: Tuple[Tensor, Tensor], targets):
        """

        :param predictions: Tuple of (poses, scores)
        :param targets: Target scores
        :return: KD loss between predicted scores and target scores
        """
        return torch.nn.functional.binary_cross_entropy_with_logits(predictions[1], targets)

forward(predictions, targets)

Parameters:

Name Type Description Default
predictions Tuple[Tensor, Tensor]

Tuple of (poses, scores)

required
targets

Target scores

required

Returns:

Type Description

KD loss between predicted scores and target scores

Source code in latest/src/super_gradients/training/losses/rescoring_loss.py
15
16
17
18
19
20
21
22
def forward(self, predictions: Tuple[Tensor, Tensor], targets):
    """

    :param predictions: Tuple of (poses, scores)
    :param targets: Target scores
    :return: KD loss between predicted scores and target scores
    """
    return torch.nn.functional.binary_cross_entropy_with_logits(predictions[1], targets)

SegKDLoss

Bases: nn.Module

Wrapper loss for semantic segmentation KD. This loss includes two loss components, ce_loss i.e CrossEntropyLoss, and kd_loss i.e ChannelWiseKnowledgeDistillationLoss.

Source code in latest/src/super_gradients/training/losses/seg_kd_loss.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class SegKDLoss(nn.Module):
    """
    Wrapper loss for semantic segmentation KD.
    This loss includes two loss components, `ce_loss` i.e CrossEntropyLoss, and `kd_loss` i.e
    `ChannelWiseKnowledgeDistillationLoss`.
    """

    def __init__(self, kd_loss: nn.Module, ce_loss: nn.Module, weights: Union[tuple, list], kd_loss_weights: Union[tuple, list]):
        """
        :param kd_loss: knowledge distillation criteria, such as, ChannelWiseKnowledgeDistillationLoss.
         This loss should except as input a triplet of the predictions from the model with shape [B, C, H, W],
         the teacher model predictions with shape [B, C, H, W] and the target labels with shape [B, H, W].
        :param ce_loss: classification criteria, such as, CE, OHEM, MaskAttention, SL1, etc.
         This loss should except as input the predictions from the model with shape [B, C, H, W], and the target labels
         with shape [B, H, W].
        :param weights: lambda weights to apply upon each prediction map heads.
        :param kd_loss_weights: lambda weights to apply upon each criterion. 2 values are excepted as follows,
         [ce_loss_weight, kd_loss_weight].
        """
        super().__init__()
        self.kd_loss_weights = kd_loss_weights
        self.weights = weights

        self.kd_loss = kd_loss
        self.ce_loss = ce_loss

        self._validate_arguments()

    def _validate_arguments(self):
        # Check num of loss weights
        if len(self.kd_loss_weights) != 2:
            raise ValueError(f"kd_loss_weights is expected to be an iterable with size 2," f" found: {len(self.kd_loss_weights)}")

    def forward(self, preds: KDOutput, target: torch.Tensor):
        if not isinstance(preds, KDOutput):
            raise RuntimeError(
                "Predictions argument for `SegKDLoss` forward method is expected to be a `KDOutput` to"
                " include the predictions from both the student and the teacher models."
            )
        teacher_preds = preds.teacher_output
        student_preds = preds.student_output

        if isinstance(teacher_preds, torch.Tensor):
            teacher_preds = (teacher_preds,)
        if isinstance(student_preds, torch.Tensor):
            student_preds = (student_preds,)

        losses = []
        total_loss = 0
        # Main and auxiliaries feature maps losses
        for i in range(len(self.weights)):
            ce_loss = self.ce_loss(student_preds[i], target)
            cwd_loss = self.kd_loss(student_preds[i], teacher_preds[i], target)

            loss = self.kd_loss_weights[0] * ce_loss + self.kd_loss_weights[1] * cwd_loss
            total_loss += self.weights[i] * loss
            losses += [ce_loss, cwd_loss]

        losses.append(total_loss)

        return total_loss, torch.stack(losses, dim=0).detach()

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        component_names = []
        for i in range(len(self.weights)):
            component_names += [f"Head-{i}_CE_Loss", f"Head-{i}_KD_Loss"]
        component_names.append("Total_Loss")
        return component_names

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(kd_loss, ce_loss, weights, kd_loss_weights)

Parameters:

Name Type Description Default
kd_loss nn.Module

knowledge distillation criteria, such as, ChannelWiseKnowledgeDistillationLoss. This loss should except as input a triplet of the predictions from the model with shape [B, C, H, W], the teacher model predictions with shape [B, C, H, W] and the target labels with shape [B, H, W].

required
ce_loss nn.Module

classification criteria, such as, CE, OHEM, MaskAttention, SL1, etc. This loss should except as input the predictions from the model with shape [B, C, H, W], and the target labels with shape [B, H, W].

required
weights Union[tuple, list]

lambda weights to apply upon each prediction map heads.

required
kd_loss_weights Union[tuple, list]

lambda weights to apply upon each criterion. 2 values are excepted as follows, [ce_loss_weight, kd_loss_weight].

required
Source code in latest/src/super_gradients/training/losses/seg_kd_loss.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, kd_loss: nn.Module, ce_loss: nn.Module, weights: Union[tuple, list], kd_loss_weights: Union[tuple, list]):
    """
    :param kd_loss: knowledge distillation criteria, such as, ChannelWiseKnowledgeDistillationLoss.
     This loss should except as input a triplet of the predictions from the model with shape [B, C, H, W],
     the teacher model predictions with shape [B, C, H, W] and the target labels with shape [B, H, W].
    :param ce_loss: classification criteria, such as, CE, OHEM, MaskAttention, SL1, etc.
     This loss should except as input the predictions from the model with shape [B, C, H, W], and the target labels
     with shape [B, H, W].
    :param weights: lambda weights to apply upon each prediction map heads.
    :param kd_loss_weights: lambda weights to apply upon each criterion. 2 values are excepted as follows,
     [ce_loss_weight, kd_loss_weight].
    """
    super().__init__()
    self.kd_loss_weights = kd_loss_weights
    self.weights = weights

    self.kd_loss = kd_loss
    self.ce_loss = ce_loss

    self._validate_arguments()

ShelfNetOHEMLoss

Bases: OhemCELoss

Source code in latest/src/super_gradients/training/losses/shelfnet_ohem_loss.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
@register_loss(Losses.SHELFNET_OHEM_LOSS)
class ShelfNetOHEMLoss(OhemCELoss):
    def __init__(self, threshold: float = 0.7, mining_percent: float = 1e-4, ignore_lb: int = 255):
        """
        This loss is an extension of the Ohem (Online Hard Example Mining Cross Entropy) Loss.
        :param threshold: threshold to th hard example mining algorithm
        :param mining_percent: minimum percentage of total pixels for the hard example mining algorithm
        (taking only the largest) losses.
        Default is 1e-4, according to legacy settings, number of 400 pixels for typical input of (512x512) and batch of
         16.
        :param ignore_lb: targets label to be ignored
        """
        super().__init__(threshold=threshold, mining_percent=mining_percent, ignore_lb=ignore_lb)

    def forward(self, predictions_list: list, targets):
        losses = []
        for predictions in predictions_list:
            losses.append(super().forward(predictions, targets))
        total_loss = sum(losses)
        losses.append(total_loss)

        return total_loss, torch.stack(losses, dim=0).detach()

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        return ["Loss1/4", "Loss1/8", "Loss1/16", "Loss"]

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(threshold=0.7, mining_percent=0.0001, ignore_lb=255)

This loss is an extension of the Ohem (Online Hard Example Mining Cross Entropy) Loss.

Parameters:

Name Type Description Default
threshold float

threshold to th hard example mining algorithm

0.7
mining_percent float

minimum percentage of total pixels for the hard example mining algorithm (taking only the largest) losses. Default is 1e-4, according to legacy settings, number of 400 pixels for typical input of (512x512) and batch of 16.

0.0001
ignore_lb int

targets label to be ignored

255
Source code in latest/src/super_gradients/training/losses/shelfnet_ohem_loss.py
10
11
12
13
14
15
16
17
18
19
20
def __init__(self, threshold: float = 0.7, mining_percent: float = 1e-4, ignore_lb: int = 255):
    """
    This loss is an extension of the Ohem (Online Hard Example Mining Cross Entropy) Loss.
    :param threshold: threshold to th hard example mining algorithm
    :param mining_percent: minimum percentage of total pixels for the hard example mining algorithm
    (taking only the largest) losses.
    Default is 1e-4, according to legacy settings, number of 400 pixels for typical input of (512x512) and batch of
     16.
    :param ignore_lb: targets label to be ignored
    """
    super().__init__(threshold=threshold, mining_percent=mining_percent, ignore_lb=ignore_lb)

ShelfNetSemanticEncodingLoss

Bases: nn.CrossEntropyLoss

2D Cross Entropy Loss with Auxilary Loss

Source code in latest/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
@register_loss(Losses.SHELFNET_SE_LOSS)
class ShelfNetSemanticEncodingLoss(nn.CrossEntropyLoss):
    """2D Cross Entropy Loss with Auxilary Loss"""

    # FIXME - THIS LOSS SHOULD BE CHANGED TO SUPPORT APEX
    def __init__(self, se_weight=0.2, nclass=21, aux_weight=0.4, weight=None, ignore_index=-1):
        super().__init__(weight, None, ignore_index)
        self.nclass = nclass
        self.se_weight = se_weight
        self.aux_weight = aux_weight

        # FIXME - TEST CODE LOTEM, CHANGED IN ORDER TO WORK WITH apex.amp
        self.bcewithlogitsloss = nn.BCELoss(weight)

    def forward(self, logits, labels):
        pred1, se_pred, pred2 = logits

        batch = labels.size(0)
        se_target = Variable(torch.zeros(batch, self.nclass))
        # FIXME - THIS IS WHAT apex MIGHT BE FAILING TO WORK WITH
        for i in range(batch):
            hist = torch.histc(labels[i].cpu().data.float(), bins=self.nclass, min=0, max=self.nclass - 1)
            vect = hist > 0
            se_target[i] = vect

        loss1 = super().forward(pred1, labels)
        loss2 = super().forward(pred2, labels)
        loss3 = self.bcewithlogitsloss(torch.sigmoid(se_pred), se_target.data.cuda())  # FIXME - MAYBE CHANGE TO SIGMOID
        total_loss = loss1 + self.aux_weight * loss2 + self.se_weight * loss3
        losses = [loss1, loss2, loss3, total_loss]
        return total_loss, torch.stack(losses, dim=0).detach()

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        return ["loss1", "loss2", "loss3", "total_loss"]

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

HardMiningCrossEntropyLoss

Bases: _Loss

L_cls = [CE of all positives] + [CE of the hardest backgrounds] where the second term is built from [neg_pos_ratio * positive pairs] background cells with the highest CE (the hardest background cells)

Source code in latest/src/super_gradients/training/losses/ssd_loss.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class HardMiningCrossEntropyLoss(_Loss):
    """
    L_cls = [CE of all positives] + [CE of the hardest backgrounds]
    where the second term is built from [neg_pos_ratio * positive pairs] background cells with the highest CE
    (the hardest background cells)
    """

    def __init__(self, neg_pos_ratio: float):
        """
        :param neg_pos_ratio:   a ratio of negative samples to positive samples in the loss
                                (unlike positives, not all negatives will be used:
                                for each positive the [neg_pos_ratio] hardest negatives will be selected)
        """
        super().__init__()
        self.neg_pos_ratio = neg_pos_ratio
        self.ce = nn.CrossEntropyLoss(reduce=False)

    def forward(self, pred_labels, target_labels):
        mask = target_labels > 0  # not background
        pos_num = mask.sum(dim=1)

        # HARD NEGATIVE MINING
        con = self.ce(pred_labels, target_labels)

        # POSITIVE MASK WILL NOT BE SELECTED
        # set 0. loss for all positive objects, leave the loss where the object is background
        con_neg = con.clone()
        con_neg[mask] = 0
        # sort background cells by CE loss value (bigger_first)
        _, con_idx = con_neg.sort(dim=1, descending=True)
        # restore cells order, get each cell's order (rank) in CE loss sorting
        _, con_rank = con_idx.sort(dim=1)

        # NUMBER OF NEGATIVE THREE TIMES POSITIVE
        neg_num = torch.clamp(self.neg_pos_ratio * pos_num, max=mask.size(1)).unsqueeze(-1)
        # for each image into neg mask we'll take (3 * positive pairs) background objects with the highest CE
        neg_mask = con_rank < neg_num

        closs = (con * (mask.float() + neg_mask.float())).sum(dim=1)
        return closs

__init__(neg_pos_ratio)

Parameters:

Name Type Description Default
neg_pos_ratio float

a ratio of negative samples to positive samples in the loss (unlike positives, not all negatives will be used: for each positive the [neg_pos_ratio] hardest negatives will be selected)

required
Source code in latest/src/super_gradients/training/losses/ssd_loss.py
20
21
22
23
24
25
26
27
28
def __init__(self, neg_pos_ratio: float):
    """
    :param neg_pos_ratio:   a ratio of negative samples to positive samples in the loss
                            (unlike positives, not all negatives will be used:
                            for each positive the [neg_pos_ratio] hardest negatives will be selected)
    """
    super().__init__()
    self.neg_pos_ratio = neg_pos_ratio
    self.ce = nn.CrossEntropyLoss(reduce=False)

SSDLoss

Bases: _Loss

Implements the loss as the sum of the followings:
1. Confidence Loss: All labels, with hard negative mining
2. Localization Loss: Only on positive labels

L = (2 - alpha) * L_l1 + alpha * L_cls, where * L_cls is HardMiningCrossEntropyLoss * L_l1 = [SmoothL1Loss for all positives]

Source code in latest/src/super_gradients/training/losses/ssd_loss.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
@register_loss(Losses.SSD_LOSS)
class SSDLoss(_Loss):
    """
        Implements the loss as the sum of the followings:
        1. Confidence Loss: All labels, with hard negative mining
        2. Localization Loss: Only on positive labels

    L = (2 - alpha) * L_l1 + alpha * L_cls, where
        * L_cls is HardMiningCrossEntropyLoss
        * L_l1 = [SmoothL1Loss for all positives]
    """

    def __init__(self, dboxes: DefaultBoxes, alpha: float = 1.0, iou_thresh: float = 0.5, neg_pos_ratio: float = 3.0):
        """
        :param dboxes:          model anchors, shape [Num Grid Cells * Num anchors x 4]
        :param alpha:           a weighting factor between classification and regression loss
        :param iou_thresh:      a threshold for matching of anchors in each grid cell to GTs
                                (a match should have IoU > iou_thresh)
        :param neg_pos_ratio:   a ratio for HardMiningCrossEntropyLoss
        """
        super(SSDLoss, self).__init__()
        self.scale_xy = dboxes.scale_xy
        self.scale_wh = dboxes.scale_wh
        self.alpha = alpha
        self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0), requires_grad=False)
        self.sl1_loss = nn.SmoothL1Loss(reduce=False)

        self.con_loss = HardMiningCrossEntropyLoss(neg_pos_ratio)
        self.iou_thresh = iou_thresh

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        return ["smooth_l1", "closs", "Loss"]

    def _norm_relative_bbox(self, loc):
        """
        convert bbox locations into relative locations (relative to the dboxes)
        :param loc a tensor of shape [batch, 4, num_boxes]
        """
        gxy = (
            (loc[:, :2, :] - self.dboxes[:, :2, :])
            / self.dboxes[
                :,
                2:,
            ]
        ) / self.scale_xy
        gwh = (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log() / self.scale_wh
        return torch.cat((gxy, gwh), dim=1).contiguous()

    def match_dboxes(self, targets):
        """
        creates tensors with target boxes and labels for each dboxes, so with the same len as dboxes.

        * Each GT is assigned with a grid cell with the highest IoU, this creates a pair for each GT and some cells;
        * The rest of grid cells are assigned to a GT with the highest IoU, assuming it's > self.iou_thresh;
          If this condition is not met the grid cell is marked as background

        GT-wise: one to many
        Grid-cell-wise: one to one

        :param targets: a tensor containing the boxes for a single image;
                        shape [num_boxes, 6] (image_id, label, x, y, w, h)
        :return:        two tensors
                        boxes - shape of dboxes [4, num_dboxes] (x,y,w,h)
                        labels - sahpe [num_dboxes]
        """
        device = targets.device
        each_cell_target_locations = self.dboxes.data.clone().squeeze()
        each_cell_target_labels = torch.zeros((self.dboxes.data.shape[2])).to(device)

        if len(targets) > 0:
            target_boxes = targets[:, 2:]
            target_labels = targets[:, 1]
            ious = calculate_bbox_iou_matrix(target_boxes, self.dboxes.data.squeeze().T, x1y1x2y2=False)

            # one best GT for EACH cell (does not guarantee that all GTs will be used)
            best_target_per_cell, best_target_per_cell_index = ious.max(0)

            # one best grid cell (anchor in it) for EACH target
            best_cell_per_target, best_cell_per_target_index = ious.max(1)
            # make sure EACH target has a grid cell assigned
            best_target_per_cell_index[best_cell_per_target_index] = torch.arange(len(targets)).to(device)
            # 2. is higher than any IoU, so it is guaranteed to pass any IoU threshold
            # which ensures that the pairs selected for each target will be included in the mask below
            # while the threshold will only affect other grid cell anchors that aren't pre-assigned to any target
            best_target_per_cell[best_cell_per_target_index] = 2.0

            mask = best_target_per_cell > self.iou_thresh
            each_cell_target_locations[:, mask] = target_boxes[best_target_per_cell_index[mask]].T
            each_cell_target_labels[mask] = target_labels[best_target_per_cell_index[mask]] + 1

        return each_cell_target_locations, each_cell_target_labels

    def forward(self, predictions: Tuple, targets):
        """
        Compute the loss
            :param predictions - predictions tensor coming from the network,
            tuple with shapes ([Batch Size, 4, num_dboxes], [Batch Size, num_classes + 1, num_dboxes])
            were predictions have logprobs for background and other classes
            :param targets - targets for the batch. [num targets, 6] (index in batch, label, x,y,w,h)
        """
        if isinstance(predictions, tuple) and isinstance(predictions[1], tuple):
            # Calculate loss in a validation mode
            predictions = predictions[1]
        batch_target_locations = []
        batch_target_labels = []
        (ploc, plabel) = predictions
        targets = targets.to(self.dboxes.device)
        for i in range(ploc.shape[0]):
            target_locations, target_labels = self.match_dboxes(targets[targets[:, 0] == i])
            batch_target_locations.append(target_locations)
            batch_target_labels.append(target_labels)
        batch_target_locations = torch.stack(batch_target_locations)
        batch_target_labels = torch.stack(batch_target_labels).type(torch.long)

        mask = batch_target_labels > 0  # not background
        pos_num = mask.sum(dim=1)

        vec_gd = self._norm_relative_bbox(batch_target_locations)

        # SUM ON FOUR COORDINATES, AND MASK
        sl1 = self.sl1_loss(ploc, vec_gd).sum(dim=1)
        sl1 = (mask.float() * sl1).sum(dim=1)

        closs = self.con_loss(plabel, batch_target_labels)

        # AVOID NO OBJECT DETECTED
        total_loss = (2 - self.alpha) * sl1 + self.alpha * closs
        num_mask = (pos_num > 0).float()  # a mask with 0 for images that have no positive pairs at all
        pos_num = pos_num.float().clamp(min=1e-6)
        ret = (total_loss * num_mask / pos_num).mean(dim=0)  # normalize by the number of positive pairs

        return ret, torch.cat((sl1.mean().unsqueeze(0), closs.mean().unsqueeze(0), ret.unsqueeze(0))).detach()

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(dboxes, alpha=1.0, iou_thresh=0.5, neg_pos_ratio=3.0)

Parameters:

Name Type Description Default
dboxes DefaultBoxes

model anchors, shape [Num Grid Cells * Num anchors x 4]

required
alpha float

a weighting factor between classification and regression loss

1.0
iou_thresh float

a threshold for matching of anchors in each grid cell to GTs (a match should have IoU > iou_thresh)

0.5
neg_pos_ratio float

a ratio for HardMiningCrossEntropyLoss

3.0
Source code in latest/src/super_gradients/training/losses/ssd_loss.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def __init__(self, dboxes: DefaultBoxes, alpha: float = 1.0, iou_thresh: float = 0.5, neg_pos_ratio: float = 3.0):
    """
    :param dboxes:          model anchors, shape [Num Grid Cells * Num anchors x 4]
    :param alpha:           a weighting factor between classification and regression loss
    :param iou_thresh:      a threshold for matching of anchors in each grid cell to GTs
                            (a match should have IoU > iou_thresh)
    :param neg_pos_ratio:   a ratio for HardMiningCrossEntropyLoss
    """
    super(SSDLoss, self).__init__()
    self.scale_xy = dboxes.scale_xy
    self.scale_wh = dboxes.scale_wh
    self.alpha = alpha
    self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0), requires_grad=False)
    self.sl1_loss = nn.SmoothL1Loss(reduce=False)

    self.con_loss = HardMiningCrossEntropyLoss(neg_pos_ratio)
    self.iou_thresh = iou_thresh

forward(predictions, targets)

Compute the loss :param predictions - predictions tensor coming from the network, tuple with shapes ([Batch Size, 4, num_dboxes], [Batch Size, num_classes + 1, num_dboxes]) were predictions have logprobs for background and other classes :param targets - targets for the batch. [num targets, 6] (index in batch, label, x,y,w,h)

Source code in latest/src/super_gradients/training/losses/ssd_loss.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def forward(self, predictions: Tuple, targets):
    """
    Compute the loss
        :param predictions - predictions tensor coming from the network,
        tuple with shapes ([Batch Size, 4, num_dboxes], [Batch Size, num_classes + 1, num_dboxes])
        were predictions have logprobs for background and other classes
        :param targets - targets for the batch. [num targets, 6] (index in batch, label, x,y,w,h)
    """
    if isinstance(predictions, tuple) and isinstance(predictions[1], tuple):
        # Calculate loss in a validation mode
        predictions = predictions[1]
    batch_target_locations = []
    batch_target_labels = []
    (ploc, plabel) = predictions
    targets = targets.to(self.dboxes.device)
    for i in range(ploc.shape[0]):
        target_locations, target_labels = self.match_dboxes(targets[targets[:, 0] == i])
        batch_target_locations.append(target_locations)
        batch_target_labels.append(target_labels)
    batch_target_locations = torch.stack(batch_target_locations)
    batch_target_labels = torch.stack(batch_target_labels).type(torch.long)

    mask = batch_target_labels > 0  # not background
    pos_num = mask.sum(dim=1)

    vec_gd = self._norm_relative_bbox(batch_target_locations)

    # SUM ON FOUR COORDINATES, AND MASK
    sl1 = self.sl1_loss(ploc, vec_gd).sum(dim=1)
    sl1 = (mask.float() * sl1).sum(dim=1)

    closs = self.con_loss(plabel, batch_target_labels)

    # AVOID NO OBJECT DETECTED
    total_loss = (2 - self.alpha) * sl1 + self.alpha * closs
    num_mask = (pos_num > 0).float()  # a mask with 0 for images that have no positive pairs at all
    pos_num = pos_num.float().clamp(min=1e-6)
    ret = (total_loss * num_mask / pos_num).mean(dim=0)  # normalize by the number of positive pairs

    return ret, torch.cat((sl1.mean().unsqueeze(0), closs.mean().unsqueeze(0), ret.unsqueeze(0))).detach()

match_dboxes(targets)

creates tensors with target boxes and labels for each dboxes, so with the same len as dboxes.

  • Each GT is assigned with a grid cell with the highest IoU, this creates a pair for each GT and some cells;
  • The rest of grid cells are assigned to a GT with the highest IoU, assuming it's > self.iou_thresh; If this condition is not met the grid cell is marked as background

GT-wise: one to many Grid-cell-wise: one to one

Parameters:

Name Type Description Default
targets

a tensor containing the boxes for a single image; shape [num_boxes, 6] (image_id, label, x, y, w, h)

required

Returns:

Type Description

two tensors boxes - shape of dboxes [4, num_dboxes] (x,y,w,h) labels - sahpe [num_dboxes]

Source code in latest/src/super_gradients/training/losses/ssd_loss.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def match_dboxes(self, targets):
    """
    creates tensors with target boxes and labels for each dboxes, so with the same len as dboxes.

    * Each GT is assigned with a grid cell with the highest IoU, this creates a pair for each GT and some cells;
    * The rest of grid cells are assigned to a GT with the highest IoU, assuming it's > self.iou_thresh;
      If this condition is not met the grid cell is marked as background

    GT-wise: one to many
    Grid-cell-wise: one to one

    :param targets: a tensor containing the boxes for a single image;
                    shape [num_boxes, 6] (image_id, label, x, y, w, h)
    :return:        two tensors
                    boxes - shape of dboxes [4, num_dboxes] (x,y,w,h)
                    labels - sahpe [num_dboxes]
    """
    device = targets.device
    each_cell_target_locations = self.dboxes.data.clone().squeeze()
    each_cell_target_labels = torch.zeros((self.dboxes.data.shape[2])).to(device)

    if len(targets) > 0:
        target_boxes = targets[:, 2:]
        target_labels = targets[:, 1]
        ious = calculate_bbox_iou_matrix(target_boxes, self.dboxes.data.squeeze().T, x1y1x2y2=False)

        # one best GT for EACH cell (does not guarantee that all GTs will be used)
        best_target_per_cell, best_target_per_cell_index = ious.max(0)

        # one best grid cell (anchor in it) for EACH target
        best_cell_per_target, best_cell_per_target_index = ious.max(1)
        # make sure EACH target has a grid cell assigned
        best_target_per_cell_index[best_cell_per_target_index] = torch.arange(len(targets)).to(device)
        # 2. is higher than any IoU, so it is guaranteed to pass any IoU threshold
        # which ensures that the pairs selected for each target will be included in the mask below
        # while the threshold will only affect other grid cell anchors that aren't pre-assigned to any target
        best_target_per_cell[best_cell_per_target_index] = 2.0

        mask = best_target_per_cell > self.iou_thresh
        each_cell_target_locations[:, mask] = target_boxes[best_target_per_cell_index[mask]].T
        each_cell_target_labels[mask] = target_labels[best_target_per_cell_index[mask]] + 1

    return each_cell_target_locations, each_cell_target_labels

DetailAggregateModule

Bases: nn.Module

DetailAggregateModule to create ground-truth spatial details map. Given ground-truth segmentation masks and using laplacian kernels this module create feature-maps with special attention to classes edges aka details.

Source code in latest/src/super_gradients/training/losses/stdc_loss.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class DetailAggregateModule(nn.Module):
    """
    DetailAggregateModule to create ground-truth spatial details map. Given ground-truth segmentation masks and using
     laplacian kernels this module create feature-maps with special attention to classes edges aka details.
    """

    _LAPLACIAN_KERNEL = [-1, -1, -1, -1, 8, -1, -1, -1, -1]
    _INITIAL_FUSE_KERNEL = [[6.0 / 10], [3.0 / 10], [1.0 / 10]]

    def __init__(self, num_classes: int, ignore_label: int, detail_threshold: float = 1.0, learnable_fusing_kernel: bool = True):
        """
        :param detail_threshold: threshold to define a pixel as edge after laplacian. must be a value between 1 and 8,
            lower value for smooth edges, high value for fine edges.
        :param learnable_fusing_kernel: whether the 1x1 conv map of strided maps is learnable or not.
        """
        super().__init__()
        assert 1 <= detail_threshold <= 8, f"Detail threshold must be a value between 1 and 8, found: {detail_threshold}"

        self.device = None
        self.detail_threshold = detail_threshold
        self.num_classes = num_classes
        self.ignore_label = ignore_label

        # laplacian dw-convolution, each channel is a class label. apply laplacian filter once for each channel.
        self.laplacian_kernel = torch.tensor(self._LAPLACIAN_KERNEL, dtype=torch.float32).reshape(1, 1, 3, 3).expand(num_classes, 1, 3, 3).requires_grad_(False)
        # init param for 1x1 conv of strided gaussian feature maps.
        self.fuse_kernel = torch.tensor(self._INITIAL_FUSE_KERNEL, dtype=torch.float32).reshape(1, 3, 1, 1).requires_grad_(learnable_fusing_kernel)
        if learnable_fusing_kernel:
            self.fuse_kernel = torch.nn.Parameter(self.fuse_kernel)

    def forward(self, gt_masks: torch.Tensor):
        if self.device is None:
            self._set_kernels_to_device(gt_masks.device)
        if self.num_classes > 1:
            one_hot = to_one_hot(gt_masks, self.num_classes, self.ignore_label).float()
        else:
            one_hot = gt_masks.unsqueeze(1).float()
        # create binary detail maps using filters withs strides of 1, 2 and 4.
        boundary_targets = F.conv2d(one_hot, self.laplacian_kernel, stride=1, padding=1, groups=self.num_classes)
        boundary_targets_x2 = F.conv2d(one_hot, self.laplacian_kernel, stride=2, padding=1, groups=self.num_classes)
        boundary_targets_x4 = F.conv2d(one_hot, self.laplacian_kernel, stride=4, padding=1, groups=self.num_classes)

        boundary_targets = self._to_one_channel_binary(boundary_targets, self.detail_threshold)
        boundary_targets_x2 = self._to_one_channel_binary(boundary_targets_x2, self.detail_threshold)
        boundary_targets_x4 = self._to_one_channel_binary(boundary_targets_x4, self.detail_threshold)

        boundary_targets_x4 = F.interpolate(boundary_targets_x4, boundary_targets.shape[2:], mode="nearest")
        boundary_targets_x2 = F.interpolate(boundary_targets_x2, boundary_targets.shape[2:], mode="nearest")

        boundary_targets = torch.cat((boundary_targets, boundary_targets_x2, boundary_targets_x4), dim=1)

        boundary_targets = F.conv2d(boundary_targets, self.fuse_kernel)
        boundary_targets = self._to_one_channel_binary(boundary_targets, 0.3)

        return boundary_targets

    def _set_kernels_to_device(self, device: str):
        self.device = device
        self.laplacian_kernel = self.laplacian_kernel.to(device)
        self.fuse_kernel = self.fuse_kernel.to(device)

    @staticmethod
    def _to_one_channel_binary(x: torch.Tensor, threshold: float):
        """
        Flatten channels, and turn to binary tensor. if at least one pixel class is above threshold, flatten value is 1,
        'or' operator.
        """
        x = x.max(dim=1, keepdim=True)[0]
        x[x < threshold] = 0
        x[x >= threshold] = 1
        return x

__init__(num_classes, ignore_label, detail_threshold=1.0, learnable_fusing_kernel=True)

Parameters:

Name Type Description Default
detail_threshold float

threshold to define a pixel as edge after laplacian. must be a value between 1 and 8, lower value for smooth edges, high value for fine edges.

1.0
learnable_fusing_kernel bool

whether the 1x1 conv map of strided maps is learnable or not.

True
Source code in latest/src/super_gradients/training/losses/stdc_loss.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, num_classes: int, ignore_label: int, detail_threshold: float = 1.0, learnable_fusing_kernel: bool = True):
    """
    :param detail_threshold: threshold to define a pixel as edge after laplacian. must be a value between 1 and 8,
        lower value for smooth edges, high value for fine edges.
    :param learnable_fusing_kernel: whether the 1x1 conv map of strided maps is learnable or not.
    """
    super().__init__()
    assert 1 <= detail_threshold <= 8, f"Detail threshold must be a value between 1 and 8, found: {detail_threshold}"

    self.device = None
    self.detail_threshold = detail_threshold
    self.num_classes = num_classes
    self.ignore_label = ignore_label

    # laplacian dw-convolution, each channel is a class label. apply laplacian filter once for each channel.
    self.laplacian_kernel = torch.tensor(self._LAPLACIAN_KERNEL, dtype=torch.float32).reshape(1, 1, 3, 3).expand(num_classes, 1, 3, 3).requires_grad_(False)
    # init param for 1x1 conv of strided gaussian feature maps.
    self.fuse_kernel = torch.tensor(self._INITIAL_FUSE_KERNEL, dtype=torch.float32).reshape(1, 3, 1, 1).requires_grad_(learnable_fusing_kernel)
    if learnable_fusing_kernel:
        self.fuse_kernel = torch.nn.Parameter(self.fuse_kernel)

DetailLoss

Bases: _Loss

STDC DetailLoss applied on details features from higher resolution and ground-truth details map. Loss combination of BCE loss and BinaryDice loss

Source code in latest/src/super_gradients/training/losses/stdc_loss.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class DetailLoss(_Loss):
    """
    STDC DetailLoss applied on  details features from higher resolution and ground-truth details map.
    Loss combination of BCE loss and BinaryDice loss
    """

    def __init__(self, weights: list = [1.0, 1.0]):
        """
        :param weights: weight to apply for each part of the loss contributions, [BCE, Dice] respectively.
        """
        super().__init__()
        assert len(weights) == 2, f"Only 2 weight elements are required for BCE-Dice loss combo, found: {len(weights)}"
        self.weights = weights
        self.bce_with_logits = nn.BCEWithLogitsLoss()
        self.dice_loss = BinaryDiceLoss(apply_sigmoid=True)

    def forward(self, detail_out: torch.Tensor, detail_target: torch.Tensor):
        """
        :param detail_out: predicted detail map.
        :param detail_target: ground-truth detail loss, output of DetailAggregateModule.
        """
        bce_loss = self.bce_with_logits(detail_out, detail_target)
        dice_loss = self.dice_loss(detail_out, detail_target)
        return self.weights[0] * bce_loss + self.weights[1] * dice_loss

__init__(weights=[1.0, 1.0])

Parameters:

Name Type Description Default
weights list

weight to apply for each part of the loss contributions, [BCE, Dice] respectively.

[1.0, 1.0]
Source code in latest/src/super_gradients/training/losses/stdc_loss.py
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(self, weights: list = [1.0, 1.0]):
    """
    :param weights: weight to apply for each part of the loss contributions, [BCE, Dice] respectively.
    """
    super().__init__()
    assert len(weights) == 2, f"Only 2 weight elements are required for BCE-Dice loss combo, found: {len(weights)}"
    self.weights = weights
    self.bce_with_logits = nn.BCEWithLogitsLoss()
    self.dice_loss = BinaryDiceLoss(apply_sigmoid=True)

forward(detail_out, detail_target)

Parameters:

Name Type Description Default
detail_out torch.Tensor

predicted detail map.

required
detail_target torch.Tensor

ground-truth detail loss, output of DetailAggregateModule.

required
Source code in latest/src/super_gradients/training/losses/stdc_loss.py
104
105
106
107
108
109
110
111
def forward(self, detail_out: torch.Tensor, detail_target: torch.Tensor):
    """
    :param detail_out: predicted detail map.
    :param detail_target: ground-truth detail loss, output of DetailAggregateModule.
    """
    bce_loss = self.bce_with_logits(detail_out, detail_target)
    dice_loss = self.dice_loss(detail_out, detail_target)
    return self.weights[0] * bce_loss + self.weights[1] * dice_loss

STDCLoss

Bases: _Loss

Loss class of STDC-Seg training.

Source code in latest/src/super_gradients/training/losses/stdc_loss.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
@register_loss(Losses.STDC_LOSS)
class STDCLoss(_Loss):
    """
    Loss class of STDC-Seg training.
    """

    def __init__(
        self,
        num_classes: int,
        threshold: float = 0.7,
        num_aux_heads: int = 2,
        num_detail_heads: int = 1,
        weights: Union[tuple, list] = (1, 1, 1, 1),
        detail_weights: Union[tuple, list] = (1, 1),
        mining_percent: float = 0.1,
        detail_threshold: float = 1.0,
        learnable_fusing_kernel: bool = True,
        ignore_index: int = None,
        ohem_criteria: OhemLoss = None,
    ):
        """
        :param threshold: Online hard-mining probability threshold.
        :param num_aux_heads: num of auxiliary heads.
        :param num_detail_heads: num of detail heads.
        :param weights: Loss lambda weights.
        :param detail_weights: weights for (Dice, BCE) losses parts in DetailLoss.
        :param mining_percent: mining percentage.
        :param detail_threshold: detail threshold to create binary details features in DetailLoss.
        :param learnable_fusing_kernel: whether DetailAggregateModule params are learnable or not.
        :param ohem_criteria: OhemLoss criterion component of STDC. When none is given, it will be derrived according
         to num_classes (i.e OhemCELoss if num_classes > 1 and OhemBCELoss otherwise).
        """
        super().__init__()

        assert len(weights) == num_aux_heads + num_detail_heads + 1, "Lambda loss weights must be in same size as loss items."

        self.weights = weights
        self.use_detail = num_detail_heads > 0

        self.num_aux_heads = num_aux_heads
        self.num_detail_heads = num_detail_heads

        if self.use_detail:
            self.detail_module = DetailAggregateModule(
                num_classes=num_classes, detail_threshold=detail_threshold, ignore_label=ignore_index, learnable_fusing_kernel=learnable_fusing_kernel
            )
            self.detail_loss = DetailLoss(weights=detail_weights)

        if ohem_criteria is None:
            if num_classes > 1:
                ohem_criteria = OhemCELoss(threshold=threshold, mining_percent=mining_percent, ignore_lb=ignore_index)
            else:
                ohem_criteria = OhemBCELoss(threshold=threshold, mining_percent=mining_percent)

        self.ce_ohem = ohem_criteria
        self.num_classes = num_classes

    @property
    def component_names(self):
        """
        Component names for logging during training.
        These correspond to 2nd item in the tuple returned in self.forward(...).
        See super_gradients.Trainer.train() docs for more info.
        """
        return ["main_loss", "aux_loss1", "aux_loss2", "detail_loss", "loss"]

    def forward(self, preds: Tuple[torch.Tensor], target: torch.Tensor):
        """
        :param preds: Model output predictions, must be in the followed format:
         [Main-feats, Aux-feats[0], ..., Aux-feats[num_auxs-1], Detail-feats[0], ..., Detail-feats[num_details-1]
        """
        assert (
            len(preds) == self.num_aux_heads + self.num_detail_heads + 1
        ), f"Wrong num of predictions tensors for STDC loss, expected {self.num_aux_heads + self.num_detail_heads + 1} found {len(preds)}"
        losses = []
        total_loss = 0

        # classification and auxiliary loss
        for i in range(0, 1 + self.num_aux_heads):
            ce_loss = self.ce_ohem(preds[i], target)
            total_loss += ce_loss * self.weights[i]
            losses.append(ce_loss)

        # detail heads loss
        if self.use_detail:
            gt_binary_mask = self.detail_module(target)
            for i in range(1 + self.num_aux_heads, len(preds)):
                detail_loss = self.detail_loss(preds[i], gt_binary_mask)
                total_loss += self.weights[i] * detail_loss
                losses.append(detail_loss)

        losses.append(total_loss)

        return total_loss, torch.stack(losses, dim=0).detach()

    def get_train_named_params(self):
        """
        Expose DetailAggregateModule learnable parameters to be passed to the optimizer.
        """
        if self.use_detail:
            return list(self.detail_module.named_parameters())

component_names property

Component names for logging during training. These correspond to 2nd item in the tuple returned in self.forward(...). See super_gradients.Trainer.train() docs for more info.

__init__(num_classes, threshold=0.7, num_aux_heads=2, num_detail_heads=1, weights=(1, 1, 1, 1), detail_weights=(1, 1), mining_percent=0.1, detail_threshold=1.0, learnable_fusing_kernel=True, ignore_index=None, ohem_criteria=None)

Parameters:

Name Type Description Default
threshold float

Online hard-mining probability threshold.

0.7
num_aux_heads int

num of auxiliary heads.

2
num_detail_heads int

num of detail heads.

1
weights Union[tuple, list]

Loss lambda weights.

(1, 1, 1, 1)
detail_weights Union[tuple, list]

weights for (Dice, BCE) losses parts in DetailLoss.

(1, 1)
mining_percent float

mining percentage.

0.1
detail_threshold float

detail threshold to create binary details features in DetailLoss.

1.0
learnable_fusing_kernel bool

whether DetailAggregateModule params are learnable or not.

True
ohem_criteria OhemLoss

OhemLoss criterion component of STDC. When none is given, it will be derrived according to num_classes (i.e OhemCELoss if num_classes > 1 and OhemBCELoss otherwise).

None
Source code in latest/src/super_gradients/training/losses/stdc_loss.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def __init__(
    self,
    num_classes: int,
    threshold: float = 0.7,
    num_aux_heads: int = 2,
    num_detail_heads: int = 1,
    weights: Union[tuple, list] = (1, 1, 1, 1),
    detail_weights: Union[tuple, list] = (1, 1),
    mining_percent: float = 0.1,
    detail_threshold: float = 1.0,
    learnable_fusing_kernel: bool = True,
    ignore_index: int = None,
    ohem_criteria: OhemLoss = None,
):
    """
    :param threshold: Online hard-mining probability threshold.
    :param num_aux_heads: num of auxiliary heads.
    :param num_detail_heads: num of detail heads.
    :param weights: Loss lambda weights.
    :param detail_weights: weights for (Dice, BCE) losses parts in DetailLoss.
    :param mining_percent: mining percentage.
    :param detail_threshold: detail threshold to create binary details features in DetailLoss.
    :param learnable_fusing_kernel: whether DetailAggregateModule params are learnable or not.
    :param ohem_criteria: OhemLoss criterion component of STDC. When none is given, it will be derrived according
     to num_classes (i.e OhemCELoss if num_classes > 1 and OhemBCELoss otherwise).
    """
    super().__init__()

    assert len(weights) == num_aux_heads + num_detail_heads + 1, "Lambda loss weights must be in same size as loss items."

    self.weights = weights
    self.use_detail = num_detail_heads > 0

    self.num_aux_heads = num_aux_heads
    self.num_detail_heads = num_detail_heads

    if self.use_detail:
        self.detail_module = DetailAggregateModule(
            num_classes=num_classes, detail_threshold=detail_threshold, ignore_label=ignore_index, learnable_fusing_kernel=learnable_fusing_kernel
        )
        self.detail_loss = DetailLoss(weights=detail_weights)

    if ohem_criteria is None:
        if num_classes > 1:
            ohem_criteria = OhemCELoss(threshold=threshold, mining_percent=mining_percent, ignore_lb=ignore_index)
        else:
            ohem_criteria = OhemBCELoss(threshold=threshold, mining_percent=mining_percent)

    self.ce_ohem = ohem_criteria
    self.num_classes = num_classes

forward(preds, target)

Parameters:

Name Type Description Default
preds Tuple[torch.Tensor]

Model output predictions, must be in the followed format: [Main-feats, Aux-feats[0], ..., Aux-feats[num_auxs-1], Detail-feats[0], ..., Detail-feats[num_details-1]

required
Source code in latest/src/super_gradients/training/losses/stdc_loss.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def forward(self, preds: Tuple[torch.Tensor], target: torch.Tensor):
    """
    :param preds: Model output predictions, must be in the followed format:
     [Main-feats, Aux-feats[0], ..., Aux-feats[num_auxs-1], Detail-feats[0], ..., Detail-feats[num_details-1]
    """
    assert (
        len(preds) == self.num_aux_heads + self.num_detail_heads + 1
    ), f"Wrong num of predictions tensors for STDC loss, expected {self.num_aux_heads + self.num_detail_heads + 1} found {len(preds)}"
    losses = []
    total_loss = 0

    # classification and auxiliary loss
    for i in range(0, 1 + self.num_aux_heads):
        ce_loss = self.ce_ohem(preds[i], target)
        total_loss += ce_loss * self.weights[i]
        losses.append(ce_loss)

    # detail heads loss
    if self.use_detail:
        gt_binary_mask = self.detail_module(target)
        for i in range(1 + self.num_aux_heads, len(preds)):
            detail_loss = self.detail_loss(preds[i], gt_binary_mask)
            total_loss += self.weights[i] * detail_loss
            losses.append(detail_loss)

    losses.append(total_loss)

    return total_loss, torch.stack(losses, dim=0).detach()

get_train_named_params()

Expose DetailAggregateModule learnable parameters to be passed to the optimizer.

Source code in latest/src/super_gradients/training/losses/stdc_loss.py
209
210
211
212
213
214
def get_train_named_params(self):
    """
    Expose DetailAggregateModule learnable parameters to be passed to the optimizer.
    """
    if self.use_detail:
        return list(self.detail_module.named_parameters())

AbstarctSegmentationStructureLoss

Bases: _Loss, ABC

Abstract computation of structure loss between two tensors, It can support both multi-classes and binary tasks.

Source code in latest/src/super_gradients/training/losses/structure_loss.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class AbstarctSegmentationStructureLoss(_Loss, ABC):
    """
    Abstract computation of structure loss between two tensors, It can support both multi-classes and binary tasks.
    """

    def __init__(
        self,
        apply_softmax: bool = True,
        ignore_index: int = None,
        smooth: float = 1.0,
        eps: float = 1e-5,
        reduce_over_batches: bool = False,
        generalized_metric: bool = False,
        weight: Optional[torch.Tensor] = None,
        reduction: Union[LossReduction, str] = "mean",
    ):
        """
        :param apply_softmax: Whether to apply softmax to the predictions.
        :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the metric
            coefficient is to 1, which can be used as a regularization effect.
            As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
        :param eps: epsilon value to avoid inf.
        :param reduce_over_batches: Whether to average metric over the batch axis if set True,
         default is `False` to average over the classes axis.
        :param generalized_metric: Whether to apply normalization by the volume of each class.
        :param weight: a manual rescaling weight given to each class. If given, it has to be a Tensor of size `C`.
        :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
            `none`: no reduction will be applied.
            `mean`: the sum of the output will be divided by the number of elements in the output.
            `sum`: the output will be summed.
            Default: `mean`
        """
        super().__init__(reduction=reduction)
        self.ignore_index = ignore_index
        self.apply_softmax = apply_softmax
        self.eps = eps
        self.smooth = smooth
        self.reduce_over_batches = reduce_over_batches
        self.generalized_metric = generalized_metric
        self.weight = weight
        if self.generalized_metric:
            assert self.weight is None, "Cannot use structured Loss with weight classes and generalized normalization"
            if self.eps > 1e-12:
                logger.warning("When using GeneralizedLoss, it is recommended to use eps below 1e-12, to not affect" "small values normalized terms.")
            if self.smooth != 0:
                logger.warning("When using GeneralizedLoss, it is recommended to set smooth value as 0.")

    @abstractmethod
    def _calc_numerator_denominator(self, labels_one_hot, predict) -> (torch.Tensor, torch.Tensor):
        """
        All base classes must implement this function.
        Return: 2 tensor of shape [BS, num_classes, img_width, img_height].
        """
        raise NotImplementedError()

    @abstractmethod
    def _calc_loss(self, numerator, denominator) -> torch.Tensor:
        """
        All base classes must implement this function.
        Return a tensors of shape [BS] if self.reduce_over_batches else [num_classes].
        """
        raise NotImplementedError()

    def forward(self, predict, target):
        if self.apply_softmax:
            predict = torch.softmax(predict, dim=1)
        # target to one hot format
        if target.size() == predict.size():
            labels_one_hot = target
        elif target.dim() == 3:  # if target tensor is in class indexes format.
            if predict.size(1) == 1 and self.ignore_index is None:  # if one class prediction task
                labels_one_hot = target.unsqueeze(1)
            else:
                labels_one_hot = to_one_hot(target, num_classes=predict.shape[1], ignore_index=self.ignore_index)
        else:
            raise AssertionError(
                f"Mismatch of target shape: {target.size()} and prediction shape: {predict.size()},"
                f" target must be [NxWxH] tensor for to_one_hot conversion"
                f" or to have the same num of channels like prediction tensor"
            )

        reduce_spatial_dims = list(range(2, len(predict.shape)))
        reduce_dims = [1] + reduce_spatial_dims if self.reduce_over_batches else [0] + reduce_spatial_dims

        # Calculate the numerator and denominator of the chosen metric
        numerator, denominator = self._calc_numerator_denominator(labels_one_hot, predict)

        # exclude ignore labels from numerator and denominator, false positive predicted on ignore samples
        # are not included in the total calculation.
        if self.ignore_index is not None:
            valid_mask = target.ne(self.ignore_index).unsqueeze(1).expand_as(denominator)
            numerator *= valid_mask
            denominator *= valid_mask

        numerator = torch.sum(numerator, dim=reduce_dims)
        denominator = torch.sum(denominator, dim=reduce_dims)

        if self.generalized_metric:
            weights = 1.0 / (torch.sum(labels_one_hot, dim=reduce_dims) ** 2)
            # if some classes are not in batch, weights will be inf.
            infs = torch.isinf(weights)
            weights[infs] = 0.0
            numerator *= weights
            denominator *= weights

        # Calculate the loss of the chosen metric
        losses = self._calc_loss(numerator, denominator)
        if self.weight is not None:
            losses *= self.weight
        return apply_reduce(losses, reduction=self.reduction)

__init__(apply_softmax=True, ignore_index=None, smooth=1.0, eps=1e-05, reduce_over_batches=False, generalized_metric=False, weight=None, reduction='mean')

Parameters:

Name Type Description Default
apply_softmax bool

Whether to apply softmax to the predictions.

True
smooth float

laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the metric coefficient is to 1, which can be used as a regularization effect. As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895

1.0
eps float

epsilon value to avoid inf.

1e-05
reduce_over_batches bool

Whether to average metric over the batch axis if set True, default is False to average over the classes axis.

False
generalized_metric bool

Whether to apply normalization by the volume of each class.

False
weight Optional[torch.Tensor]

a manual rescaling weight given to each class. If given, it has to be a Tensor of size C.

None
reduction Union[LossReduction, str]

Specifies the reduction to apply to the output: none | mean | sum. none: no reduction will be applied. mean: the sum of the output will be divided by the number of elements in the output. sum: the output will be summed. Default: mean

'mean'
Source code in latest/src/super_gradients/training/losses/structure_loss.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(
    self,
    apply_softmax: bool = True,
    ignore_index: int = None,
    smooth: float = 1.0,
    eps: float = 1e-5,
    reduce_over_batches: bool = False,
    generalized_metric: bool = False,
    weight: Optional[torch.Tensor] = None,
    reduction: Union[LossReduction, str] = "mean",
):
    """
    :param apply_softmax: Whether to apply softmax to the predictions.
    :param smooth: laplace smoothing, also known as additive smoothing. The larger smooth value is, closer the metric
        coefficient is to 1, which can be used as a regularization effect.
        As mentioned in: https://github.com/pytorch/pytorch/issues/1249#issuecomment-337999895
    :param eps: epsilon value to avoid inf.
    :param reduce_over_batches: Whether to average metric over the batch axis if set True,
     default is `False` to average over the classes axis.
    :param generalized_metric: Whether to apply normalization by the volume of each class.
    :param weight: a manual rescaling weight given to each class. If given, it has to be a Tensor of size `C`.
    :param reduction: Specifies the reduction to apply to the output: `none` | `mean` | `sum`.
        `none`: no reduction will be applied.
        `mean`: the sum of the output will be divided by the number of elements in the output.
        `sum`: the output will be summed.
        Default: `mean`
    """
    super().__init__(reduction=reduction)
    self.ignore_index = ignore_index
    self.apply_softmax = apply_softmax
    self.eps = eps
    self.smooth = smooth
    self.reduce_over_batches = reduce_over_batches
    self.generalized_metric = generalized_metric
    self.weight = weight
    if self.generalized_metric:
        assert self.weight is None, "Cannot use structured Loss with weight classes and generalized normalization"
        if self.eps > 1e-12:
            logger.warning("When using GeneralizedLoss, it is recommended to use eps below 1e-12, to not affect" "small values normalized terms.")
        if self.smooth != 0:
            logger.warning("When using GeneralizedLoss, it is recommended to set smooth value as 0.")

Based on https://github.com/Megvii-BaseDetection/YOLOX (Apache-2.0 license)

IOUloss

Bases: nn.Module

IoU loss with the following supported loss types:

Parameters:

Name Type Description Default
reduction str

One of ["mean", "sum", "none"] reduction to apply to the computed loss (Default="none")

'none'
loss_type str

One of ["iou", "giou"] where: * 'iou' for (1 - iou^2) * 'giou' according to "Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression" (1 - giou), where giou = iou - (cover_box - union_box)/cover_box

'iou'
Source code in latest/src/super_gradients/training/losses/yolox_loss.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class IOUloss(nn.Module):
    """
    IoU loss with the following supported loss types:
    :param reduction: One of ["mean", "sum", "none"] reduction to apply to the computed loss (Default="none")
    :param loss_type: One of ["iou", "giou"] where:
            * 'iou' for
                (1 - iou^2)
            * 'giou' according to "Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression"
                (1 - giou), where giou = iou - (cover_box - union_box)/cover_box
    """

    def __init__(self, reduction: str = "none", loss_type: str = "iou"):
        super(IOUloss, self).__init__()
        self._validate_args(loss_type, reduction)
        self.reduction = reduction
        self.loss_type = loss_type

    @staticmethod
    def _validate_args(loss_type, reduction):
        supported_losses = ["iou", "giou"]
        supported_reductions = ["mean", "sum", "none"]
        if loss_type not in supported_losses:
            raise ValueError("Illegal loss_type value: " + loss_type + ", expected one of: " + str(supported_losses))
        if reduction not in supported_reductions:
            raise ValueError("Illegal reduction value: " + reduction + ", expected one of: " + str(supported_reductions))

    def forward(self, pred, target):
        assert pred.shape[0] == target.shape[0]

        pred = pred.view(-1, 4)
        target = target.view(-1, 4)
        tl = torch.max((pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2))
        br = torch.min((pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2))

        area_p = torch.prod(pred[:, 2:], 1)
        area_g = torch.prod(target[:, 2:], 1)

        en = (tl < br).type(tl.type()).prod(dim=1)
        area_i = torch.prod(br - tl, 1) * en
        area_u = area_p + area_g - area_i
        iou = (area_i) / (area_u + 1e-16)

        if self.loss_type == "iou":
            loss = 1 - iou**2
        elif self.loss_type == "giou":
            c_tl = torch.min((pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2))
            c_br = torch.max((pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2))
            area_c = torch.prod(c_br - c_tl, 1)
            giou = iou - (area_c - area_u) / area_c.clamp(1e-16