Skip to content

Processing

ComposeProcessing

Bases: Processing

Compose a list of Processing objects into a single Processing object.

Source code in V3_1/src/super_gradients/training/processing/processing.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@register_processing(Processings.ComposeProcessing)
class ComposeProcessing(Processing):
    """Compose a list of Processing objects into a single Processing object."""

    def __init__(self, processings: List[Processing]):
        self.processings = processings

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, ComposeProcessingMetadata]:
        """Processing an image, before feeding it to the network."""
        processed_image, metadata_lst = image.copy(), []
        for processing in self.processings:
            processed_image, metadata = processing.preprocess_image(image=processed_image)
            metadata_lst.append(metadata)
        return processed_image, ComposeProcessingMetadata(metadata_lst=metadata_lst)

    def postprocess_predictions(self, predictions: Prediction, metadata: ComposeProcessingMetadata) -> Prediction:
        """Postprocess the model output predictions."""
        postprocessed_predictions = predictions
        for processing, metadata in zip(self.processings[::-1], metadata.metadata_lst[::-1]):
            postprocessed_predictions = processing.postprocess_predictions(postprocessed_predictions, metadata)
        return postprocessed_predictions

postprocess_predictions(predictions, metadata)

Postprocess the model output predictions.

Source code in V3_1/src/super_gradients/training/processing/processing.py
78
79
80
81
82
83
def postprocess_predictions(self, predictions: Prediction, metadata: ComposeProcessingMetadata) -> Prediction:
    """Postprocess the model output predictions."""
    postprocessed_predictions = predictions
    for processing, metadata in zip(self.processings[::-1], metadata.metadata_lst[::-1]):
        postprocessed_predictions = processing.postprocess_predictions(postprocessed_predictions, metadata)
    return postprocessed_predictions

preprocess_image(image)

Processing an image, before feeding it to the network.

Source code in V3_1/src/super_gradients/training/processing/processing.py
70
71
72
73
74
75
76
def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, ComposeProcessingMetadata]:
    """Processing an image, before feeding it to the network."""
    processed_image, metadata_lst = image.copy(), []
    for processing in self.processings:
        processed_image, metadata = processing.preprocess_image(image=processed_image)
        metadata_lst.append(metadata)
    return processed_image, ComposeProcessingMetadata(metadata_lst=metadata_lst)

ImagePermute

Bases: Processing

Permute the image dimensions.

Parameters:

Name Type Description Default
permutation Tuple[int, int, int]

Specify new order of dims. Default value (2, 0, 1) suitable for converting from HWC to CHW format.

(2, 0, 1)
Source code in V3_1/src/super_gradients/training/processing/processing.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
@register_processing(Processings.ImagePermute)
class ImagePermute(Processing):
    """Permute the image dimensions.

    :param permutation: Specify new order of dims. Default value (2, 0, 1) suitable for converting from HWC to CHW format.
    """

    def __init__(self, permutation: Tuple[int, int, int] = (2, 0, 1)):
        self.permutation = permutation

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        processed_image = np.ascontiguousarray(image.transpose(*self.permutation))
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

NormalizeImage

Bases: Processing

Normalize an image based on means and standard deviation.

Parameters:

Name Type Description Default
mean List[float]

Mean values for each channel.

required
std List[float]

Standard deviation values for each channel.

required
Source code in V3_1/src/super_gradients/training/processing/processing.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
@register_processing(Processings.NormalizeImage)
class NormalizeImage(Processing):
    """Normalize an image based on means and standard deviation.

    :param mean:    Mean values for each channel.
    :param std:     Standard deviation values for each channel.
    """

    def __init__(self, mean: List[float], std: List[float]):
        self.mean = np.array(mean).reshape((1, 1, -1)).astype(np.float32)
        self.std = np.array(std).reshape((1, 1, -1)).astype(np.float32)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        return (image - self.mean) / self.std, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

Processing

Bases: ABC

Interface for preprocessing and postprocessing methods that are used to prepare images for a model and process the model's output.

Subclasses should implement the preprocess_image and postprocess_predictions methods according to the specific requirements of the model and task.

Source code in V3_1/src/super_gradients/training/processing/processing.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class Processing(ABC):
    """Interface for preprocessing and postprocessing methods that are
    used to prepare images for a model and process the model's output.

    Subclasses should implement the `preprocess_image` and `postprocess_predictions`
    methods according to the specific requirements of the model and task.
    """

    @abstractmethod
    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, Union[None, ProcessingMetadata]]:
        """Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W)."""
        pass

    @abstractmethod
    def postprocess_predictions(self, predictions: Prediction, metadata: Union[None, ProcessingMetadata]) -> Prediction:
        """Postprocess the model output predictions."""
        pass

postprocess_predictions(predictions, metadata) abstractmethod

Postprocess the model output predictions.

Source code in V3_1/src/super_gradients/training/processing/processing.py
57
58
59
60
@abstractmethod
def postprocess_predictions(self, predictions: Prediction, metadata: Union[None, ProcessingMetadata]) -> Prediction:
    """Postprocess the model output predictions."""
    pass

preprocess_image(image) abstractmethod

Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W).

Source code in V3_1/src/super_gradients/training/processing/processing.py
52
53
54
55
@abstractmethod
def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, Union[None, ProcessingMetadata]]:
    """Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W)."""
    pass

ProcessingMetadata dataclass

Bases: ABC

Metadata including information to postprocess a prediction.

Source code in V3_1/src/super_gradients/training/processing/processing.py
22
23
24
@dataclass
class ProcessingMetadata(ABC):
    """Metadata including information to postprocess a prediction."""

ReverseImageChannels

Bases: Processing

Reverse the order of the image channels (RGB -> BGR or BGR -> RGB).

Source code in V3_1/src/super_gradients/training/processing/processing.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
@register_processing(Processings.ReverseImageChannels)
class ReverseImageChannels(Processing):
    """Reverse the order of the image channels (RGB -> BGR or BGR -> RGB)."""

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Reverse the channel order of an image.

        :param image: Image, in (H, W, C) format.
        :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
        """

        if image.shape[2] != 3:
            raise ValueError("ReverseImageChannels expects 3 channels, got: " + str(image.shape[2]))

        processed_image = image[..., ::-1]
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

preprocess_image(image)

Reverse the channel order of an image.

Parameters:

Name Type Description Default
image np.ndarray

Image, in (H, W, C) format.

required

Returns:

Type Description
Tuple[np.ndarray, None]

Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)

Source code in V3_1/src/super_gradients/training/processing/processing.py
108
109
110
111
112
113
114
115
116
117
118
119
def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Reverse the channel order of an image.

    :param image: Image, in (H, W, C) format.
    :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
    """

    if image.shape[2] != 3:
        raise ValueError("ReverseImageChannels expects 3 channels, got: " + str(image.shape[2]))

    processed_image = image[..., ::-1]
    return processed_image, None

StandardizeImage

Bases: Processing

Standardize image pixel values with img/max_val

Parameters:

Name Type Description Default
max_value float

Current maximum value of the image pixels. (usually 255)

255.0
Source code in V3_1/src/super_gradients/training/processing/processing.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
@register_processing(Processings.StandardizeImage)
class StandardizeImage(Processing):
    """Standardize image pixel values with img/max_val

    :param max_value: Current maximum value of the image pixels. (usually 255)
    """

    def __init__(self, max_value: float = 255.0):
        super().__init__()
        self.max_value = max_value

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Reverse the channel order of an image.

        :param image: Image, in (H, W, C) format.
        :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
        """
        processed_image = (image / self.max_value).astype(np.float32)
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

preprocess_image(image)

Reverse the channel order of an image.

Parameters:

Name Type Description Default
image np.ndarray

Image, in (H, W, C) format.

required

Returns:

Type Description
Tuple[np.ndarray, None]

Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)

Source code in V3_1/src/super_gradients/training/processing/processing.py
136
137
138
139
140
141
142
143
def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Reverse the channel order of an image.

    :param image: Image, in (H, W, C) format.
    :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
    """
    processed_image = (image / self.max_value).astype(np.float32)
    return processed_image, None

default_ppyoloe_coco_processing_params()

Processing parameters commonly used for training PPYoloE on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in V3_1/src/super_gradients/training/processing/processing.py
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
def default_ppyoloe_coco_processing_params() -> dict:
    """Processing parameters commonly used for training PPYoloE on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            DetectionRescale(output_shape=(640, 640)),
            NormalizeImage(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.65,
        conf=0.5,
    )
    return params

default_yolo_nas_coco_processing_params()

Processing parameters commonly used for training YoloNAS on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in V3_1/src/super_gradients/training/processing/processing.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def default_yolo_nas_coco_processing_params() -> dict:
    """Processing parameters commonly used for training YoloNAS on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            DetectionLongestMaxSizeRescale(output_shape=(636, 636)),
            DetectionCenterPadding(output_shape=(640, 640), pad_value=114),
            StandardizeImage(max_value=255.0),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.7,
        conf=0.25,
    )
    return params

default_yolox_coco_processing_params()

Processing parameters commonly used for training YoloX on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in V3_1/src/super_gradients/training/processing/processing.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def default_yolox_coco_processing_params() -> dict:
    """Processing parameters commonly used for training YoloX on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            DetectionLongestMaxSizeRescale((640, 640)),
            DetectionBottomRightPadding((640, 640), 114),
            ImagePermute((2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.65,
        conf=0.1,
    )
    return params

get_pretrained_processing_params(model_name, pretrained_weights)

Get the processing parameters for a pretrained model. TODO: remove once we load it from the checkpoint

Source code in V3_1/src/super_gradients/training/processing/processing.py
331
332
333
334
335
336
337
338
339
340
341
342
def get_pretrained_processing_params(model_name: str, pretrained_weights: str) -> dict:
    """Get the processing parameters for a pretrained model.
    TODO: remove once we load it from the checkpoint
    """
    if pretrained_weights == "coco":
        if "yolox" in model_name:
            return default_yolox_coco_processing_params()
        elif "ppyoloe" in model_name:
            return default_ppyoloe_coco_processing_params()
        elif "yolo_nas" in model_name:
            return default_yolo_nas_coco_processing_params()
    return dict()