Processing

`AutoPadding`

Bases: Processing, ABC

Source code in latest/src/super_gradients/training/processing/processing.py

class AutoPadding(Processing, ABC):
    def __init__(self, shape_multiple: Tuple[int, int], pad_value: int):
        """
        :param shape_multiple:  Tuple of (H, W) indicating the height and width multiples to which the input image dimensions will be padded.
                                For instance, with a value of (32, 40), an input image of size (45, 67) will be padded to (64, 80).
        :param pad_value:       Value to pad the image with.
        """
        self.shape_multiple = shape_multiple
        self.pad_value = pad_value

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        # This implementation only pads the image, doesn't resize it.
        return False

`init(shape_multiple, pad_value)`

Parameters:

Name	Type	Description	Default
`shape_multiple`	`Tuple[int, int]`	Tuple of (H, W) indicating the height and width multiples to which the input image dimensions will be padded. For instance, with a value of (32, 40), an input image of size (45, 67) will be padded to (64, 80).	required
`pad_value`	`int`	Value to pad the image with.	required

Source code in latest/src/super_gradients/training/processing/processing.py

def __init__(self, shape_multiple: Tuple[int, int], pad_value: int):
    """
    :param shape_multiple:  Tuple of (H, W) indicating the height and width multiples to which the input image dimensions will be padded.
                            For instance, with a value of (32, 40), an input image of size (45, 67) will be padded to (64, 80).
    :param pad_value:       Value to pad the image with.
    """
    self.shape_multiple = shape_multiple
    self.pad_value = pad_value

`CenterCrop`

Bases: ClassificationProcess

Parameters:

Name	Type	Description	Default
`size`	`int`	Desired output size of the crop.	`224`

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.CenterCrop)
class CenterCrop(ClassificationProcess):
    """
    :param size: Desired output size of the crop.
    """

    def __init__(self, size: int = 224):
        super().__init__()
        self.size = int(size)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Crops the given image at the center.

        :param image: Image, in (H, W, C) format.
        :return:      The center cropped image.
        """
        height, width = image.shape[0], image.shape[1]

        # Calculate the start and end coordinates of the crop.
        start_x = (width - self.size) // 2
        start_y = (height - self.size) // 2
        end_x = start_x + self.size
        end_y = start_y + self.size

        cropped_image = image[start_y:end_y, start_x:end_x]
        return cropped_image, None

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
        """
        Infer the output image shape from the processing.

        :return: (rows, cols) Returns the last known output shape for all the processings.
        """
        return (self.size, self.size)

    @property
    def resizes_image(self) -> bool:
        return True

`infer_image_input_shape()`

Infer the output image shape from the processing.

Returns:

Type	Description
`Optional[Tuple[int, int]]`	(rows, cols) Returns the last known output shape for all the processings.

Source code in latest/src/super_gradients/training/processing/processing.py

def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
    """
    Infer the output image shape from the processing.

    :return: (rows, cols) Returns the last known output shape for all the processings.
    """
    return (self.size, self.size)

`preprocess_image(image)`

Crops the given image at the center.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	The center cropped image.

Source code in latest/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Crops the given image at the center.

    :param image: Image, in (H, W, C) format.
    :return:      The center cropped image.
    """
    height, width = image.shape[0], image.shape[1]

    # Calculate the start and end coordinates of the crop.
    start_x = (width - self.size) // 2
    start_y = (height - self.size) // 2
    end_x = start_x + self.size
    end_y = start_y + self.size

    cropped_image = image[start_y:end_y, start_x:end_x]
    return cropped_image, None

`ComposeProcessing`

Bases: Processing

Compose a list of Processing objects into a single Processing object.

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.ComposeProcessing)
class ComposeProcessing(Processing):
    """Compose a list of Processing objects into a single Processing object."""

    def __init__(self, processings: List[Processing]):
        """
        :param processings:     List of Processing objects to compose.
        """
        self.processings = processings

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, ComposeProcessingMetadata]:
        """Processing an image, before feeding it to the network."""
        processed_image, metadata_lst = image.copy(), []
        for processing in self.processings:
            processed_image, metadata = processing.preprocess_image(image=processed_image)
            metadata_lst.append(metadata)
        return processed_image, ComposeProcessingMetadata(metadata_lst=metadata_lst)

    def postprocess_predictions(self, predictions: Prediction, metadata: ComposeProcessingMetadata) -> Prediction:
        """Postprocess the model output predictions."""
        postprocessed_predictions = predictions
        for processing, metadata in zip(self.processings[::-1], metadata.metadata_lst[::-1]):
            postprocessed_predictions = processing.postprocess_predictions(postprocessed_predictions, metadata)
        return postprocessed_predictions

    def get_equivalent_photometric_module(self) -> nn.Module:
        modules = []
        for p in self.processings:
            module = p.get_equivalent_photometric_module()
            if module is not None and not isinstance(module, nn.Identity):
                modules.append(module)

        return nn.Sequential(*modules)

    def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
        """
        Infer the output image shape from the processing.

        :return: (rows, cols) Returns the last known output shape for all the processings.
        """
        output_shape = None
        for p in self.processings:
            new_output_shape = p.infer_image_input_shape()
            if new_output_shape is not None:
                output_shape = new_output_shape

        return output_shape

    @property
    def resizes_image(self) -> bool:
        return any(processing.resizes_image for processing in self.processings)

    def get_equivalent_compose_without_resizing(self, auto_padding: AutoPadding) -> "ComposeProcessing":
        """Get a composed processing equivalent to this one, but without resizing the image.
        :param auto_padding:    AutoPadding object to use for padding the image.
                                This is required since models often expect input image to be a multiple of a specific shape (usually 32x32).
                                This padding operation will be applied on the input image before any other processing.
        :return:                A composed processing equivalent to this one, but without resizing the image.
        """
        processings = [auto_padding]

        for processing in self.processings:
            if isinstance(processing, ComposeProcessing):
                processings.append(processing.get_equivalent_compose_without_resizing(auto_padding=auto_padding))
            elif not processing.resizes_image:
                processings.append(processing)
            else:
                logger.info(f"Skipping processing `{processing.__class__.__name__}` because it resizes the image.")
        return ComposeProcessing(processings)

`init(processings)`

Parameters:

Name	Type	Description	Default
`processings`	`List[Processing]`	List of Processing objects to compose.	required

Source code in latest/src/super_gradients/training/processing/processing.py

def __init__(self, processings: List[Processing]):
    """
    :param processings:     List of Processing objects to compose.
    """
    self.processings = processings

`get_equivalent_compose_without_resizing(auto_padding)`

Get a composed processing equivalent to this one, but without resizing the image.

Parameters:

Name	Type	Description	Default
`auto_padding`	`AutoPadding`	AutoPadding object to use for padding the image. This is required since models often expect input image to be a multiple of a specific shape (usually 32x32). This padding operation will be applied on the input image before any other processing.	required

Returns:

Type	Description
`ComposeProcessing`	A composed processing equivalent to this one, but without resizing the image.

Source code in latest/src/super_gradients/training/processing/processing.py

def get_equivalent_compose_without_resizing(self, auto_padding: AutoPadding) -> "ComposeProcessing":
    """Get a composed processing equivalent to this one, but without resizing the image.
    :param auto_padding:    AutoPadding object to use for padding the image.
                            This is required since models often expect input image to be a multiple of a specific shape (usually 32x32).
                            This padding operation will be applied on the input image before any other processing.
    :return:                A composed processing equivalent to this one, but without resizing the image.
    """
    processings = [auto_padding]

    for processing in self.processings:
        if isinstance(processing, ComposeProcessing):
            processings.append(processing.get_equivalent_compose_without_resizing(auto_padding=auto_padding))
        elif not processing.resizes_image:
            processings.append(processing)
        else:
            logger.info(f"Skipping processing `{processing.__class__.__name__}` because it resizes the image.")
    return ComposeProcessing(processings)

`infer_image_input_shape()`

Infer the output image shape from the processing.

Returns:

Type	Description
`Optional[Tuple[int, int]]`	(rows, cols) Returns the last known output shape for all the processings.

Source code in latest/src/super_gradients/training/processing/processing.py

def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
    """
    Infer the output image shape from the processing.

    :return: (rows, cols) Returns the last known output shape for all the processings.
    """
    output_shape = None
    for p in self.processings:
        new_output_shape = p.infer_image_input_shape()
        if new_output_shape is not None:
            output_shape = new_output_shape

    return output_shape

`postprocess_predictions(predictions, metadata)`

Postprocess the model output predictions.

Source code in latest/src/super_gradients/training/processing/processing.py

def postprocess_predictions(self, predictions: Prediction, metadata: ComposeProcessingMetadata) -> Prediction:
    """Postprocess the model output predictions."""
    postprocessed_predictions = predictions
    for processing, metadata in zip(self.processings[::-1], metadata.metadata_lst[::-1]):
        postprocessed_predictions = processing.postprocess_predictions(postprocessed_predictions, metadata)
    return postprocessed_predictions

`preprocess_image(image)`

Processing an image, before feeding it to the network.

Source code in latest/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, ComposeProcessingMetadata]:
    """Processing an image, before feeding it to the network."""
    processed_image, metadata_lst = image.copy(), []
    for processing in self.processings:
        processed_image, metadata = processing.preprocess_image(image=processed_image)
        metadata_lst.append(metadata)
    return processed_image, ComposeProcessingMetadata(metadata_lst=metadata_lst)

`ImagePermute`

Bases: Processing

Permute the image dimensions.

Parameters:

Name	Type	Description	Default
`permutation`	`Tuple[int, int, int]`	Specify new order of dims. Default value (2, 0, 1) suitable for converting from HWC to CHW format.	`(2, 0, 1)`

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.ImagePermute)
class ImagePermute(Processing):
    """Permute the image dimensions.

    :param permutation: Specify new order of dims. Default value (2, 0, 1) suitable for converting from HWC to CHW format.
    """

    def __init__(self, permutation: Tuple[int, int, int] = (2, 0, 1)):
        self.permutation = tuple(permutation)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        processed_image = np.ascontiguousarray(image.transpose(*self.permutation))
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        return False

`NormalizeImage`

Bases: Processing

Normalize an image based on means and standard deviation.

Parameters:

Name	Type	Description	Default
`mean`	`List[float]`	Mean values for each channel.	required
`std`	`List[float]`	Standard deviation values for each channel.	required

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.NormalizeImage)
class NormalizeImage(Processing):
    """Normalize an image based on means and standard deviation.

    :param mean:    Mean values for each channel.
    :param std:     Standard deviation values for each channel.
    """

    def __init__(self, mean: List[float], std: List[float]):
        self.mean = np.array(mean).reshape((1, 1, -1)).astype(np.float32)
        self.std = np.array(std).reshape((1, 1, -1)).astype(np.float32)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        processed_image = (image - self.mean) / self.std
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def get_equivalent_photometric_module(self) -> nn.Module:
        from super_gradients.conversion.preprocessing_modules import ApplyMeanStd

        return ApplyMeanStd(mean=self.mean, std=self.std)

    @property
    def resizes_image(self) -> bool:
        return False

`Processing`

Bases: ABC

Interface for preprocessing and postprocessing methods that are used to prepare images for a model and process the model's output.

Subclasses should implement the preprocess_image and postprocess_predictions methods according to the specific requirements of the model and task.

Source code in latest/src/super_gradients/training/processing/processing.py

class Processing(ABC):
    """Interface for preprocessing and postprocessing methods that are
    used to prepare images for a model and process the model's output.

    Subclasses should implement the `preprocess_image` and `postprocess_predictions`
    methods according to the specific requirements of the model and task.
    """

    @abstractmethod
    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, Union[None, ProcessingMetadata]]:
        """Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W)."""
        pass

    @abstractmethod
    def postprocess_predictions(self, predictions: Prediction, metadata: Union[None, ProcessingMetadata]) -> Prediction:
        """Postprocess the model output predictions."""
        pass

    @abstractmethod
    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        """
        Get the equivalent photometric preprocessing module for this processing.
        A photometric preprocessing apply a transformation to the image pixels, without changing the image size.
        This includes RGB -> BGR, standardization, normalization etc.
        If a Processing subclass does not have change pixel values, it should return an nn.Identity module.
        If a Processing subclass does not have an equivalent photometric preprocessing, it should return None.
        :return:
        """
        pass

    def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
        """
        Infer the shape (rows, cols) of the image after all the processing steps.
        This is the effective image size that is fed to model itself
        :return: Return the image shape (rows, cols), or None if the image shape cannot be inferred (When preprocessing
        contains no resize/padding operations).
        """
        return None

    @property
    @abstractmethod
    def resizes_image(self) -> bool:
        """Return True if the processing resizes the image, False otherwise."""
        pass

`resizes_image: bool` `abstractmethod` `property`

Return True if the processing resizes the image, False otherwise.

`get_equivalent_photometric_module()` `abstractmethod`

Get the equivalent photometric preprocessing module for this processing. A photometric preprocessing apply a transformation to the image pixels, without changing the image size. This includes RGB -> BGR, standardization, normalization etc. If a Processing subclass does not have change pixel values, it should return an nn.Identity module. If a Processing subclass does not have an equivalent photometric preprocessing, it should return None.

Returns:

Type	Description
`Optional[nn.Module]`

Source code in latest/src/super_gradients/training/processing/processing.py

@abstractmethod
def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
    """
    Get the equivalent photometric preprocessing module for this processing.
    A photometric preprocessing apply a transformation to the image pixels, without changing the image size.
    This includes RGB -> BGR, standardization, normalization etc.
    If a Processing subclass does not have change pixel values, it should return an nn.Identity module.
    If a Processing subclass does not have an equivalent photometric preprocessing, it should return None.
    :return:
    """
    pass

`infer_image_input_shape()`

Infer the shape (rows, cols) of the image after all the processing steps. This is the effective image size that is fed to model itself

Returns:

Type	Description
`Optional[Tuple[int, int]]`	Return the image shape (rows, cols), or None if the image shape cannot be inferred (When preprocessing contains no resize/padding operations).

Source code in latest/src/super_gradients/training/processing/processing.py

def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
    """
    Infer the shape (rows, cols) of the image after all the processing steps.
    This is the effective image size that is fed to model itself
    :return: Return the image shape (rows, cols), or None if the image shape cannot be inferred (When preprocessing
    contains no resize/padding operations).
    """
    return None

`postprocess_predictions(predictions, metadata)` `abstractmethod`

Postprocess the model output predictions.

Source code in latest/src/super_gradients/training/processing/processing.py

@abstractmethod
def postprocess_predictions(self, predictions: Prediction, metadata: Union[None, ProcessingMetadata]) -> Prediction:
    """Postprocess the model output predictions."""
    pass

`preprocess_image(image)` `abstractmethod`

Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W).

Source code in latest/src/super_gradients/training/processing/processing.py

@abstractmethod
def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, Union[None, ProcessingMetadata]]:
    """Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W)."""
    pass

`ProcessingMetadata` `dataclass`

Bases: ABC

Metadata including information to postprocess a prediction.

Source code in latest/src/super_gradients/training/processing/processing.py

@dataclass
class ProcessingMetadata(ABC):
    """Metadata including information to postprocess a prediction."""

`Resize`

Bases: ClassificationProcess

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.Resize)
class Resize(ClassificationProcess):
    def __init__(self, size: int = 224):
        super().__init__()
        self.size = int(size)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Resize an image.

        :param image: Image, in (H, W, C) format.
        :return:      The resized image.
        """
        height, width = image.shape[:2]
        output_shape = self.size, self.size
        scale_factor = max(output_shape[0] / height, output_shape[1] / width)

        if scale_factor != 1.0:
            new_height, new_width = int(height * scale_factor), int(width * scale_factor)
            image = _rescale_image_with_pil(image, target_shape=(new_height, new_width))

        return image, RescaleMetadata(original_shape=(height, width), scale_factor_h=scale_factor, scale_factor_w=scale_factor)

    def get_equivalent_photometric_module(self) -> None:
        return None

    def infer_image_input_shape(self) -> None:
        return None

    @property
    def resizes_image(self) -> bool:
        return True

`preprocess_image(image)`

Resize an image.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	The resized image.

Source code in latest/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Resize an image.

    :param image: Image, in (H, W, C) format.
    :return:      The resized image.
    """
    height, width = image.shape[:2]
    output_shape = self.size, self.size
    scale_factor = max(output_shape[0] / height, output_shape[1] / width)

    if scale_factor != 1.0:
        new_height, new_width = int(height * scale_factor), int(width * scale_factor)
        image = _rescale_image_with_pil(image, target_shape=(new_height, new_width))

    return image, RescaleMetadata(original_shape=(height, width), scale_factor_h=scale_factor, scale_factor_w=scale_factor)

`ReverseImageChannels`

Bases: Processing

Reverse the order of the image channels (RGB -> BGR or BGR -> RGB).

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.ReverseImageChannels)
class ReverseImageChannels(Processing):
    """Reverse the order of the image channels (RGB -> BGR or BGR -> RGB)."""

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Reverse the channel order of an image.

        :param image: Image, in (H, W, C) format.
        :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
        """

        if image.shape[2] != 3:
            raise ValueError("ReverseImageChannels expects 3 channels, got: " + str(image.shape[2]))

        processed_image = image[..., ::-1]
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def get_equivalent_photometric_module(self) -> nn.Module:
        from super_gradients.conversion.preprocessing_modules import ChannelSelect

        return ChannelSelect(channels=np.array([2, 1, 0], dtype=int))

    @property
    def resizes_image(self) -> bool:
        return False

`preprocess_image(image)`

Reverse the channel order of an image.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)

Source code in latest/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Reverse the channel order of an image.

    :param image: Image, in (H, W, C) format.
    :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
    """

    if image.shape[2] != 3:
        raise ValueError("ReverseImageChannels expects 3 channels, got: " + str(image.shape[2]))

    processed_image = image[..., ::-1]
    return processed_image, None

`SegmentationPadShortToCropSize`

Bases: Processing

Pads image to 'crop_size'.
Should be called only after "SegRescale" or "SegRandomRescale" in augmentations pipeline.

:param crop_size:   Tuple of (width, height) for the final crop size, if is scalar size is a square (crop_size, crop_size)

= :param fill_image: Grey value to fill image padded background.

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.SegmentationPadShortToCropSize)
class SegmentationPadShortToCropSize(Processing):
    """
        Pads image to 'crop_size'.
        Should be called only after "SegRescale" or "SegRandomRescale" in augmentations pipeline.

        :param crop_size:   Tuple of (width, height) for the final crop size, if is scalar size is a square (crop_size, crop_size)
    =    :param fill_image:  Grey value to fill image padded background.
    """

    def __init__(self, crop_size: Union[float, Tuple, List], fill_image: Union[int, Tuple, List]):
        self.crop_size = crop_size
        self.fill_image = tuple(fill_image) if isinstance(fill_image, typing.Sequence) else fill_image

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, DetectionPadToSizeMetadata]:
        # pad images from center symmetrically
        output_shape = max(image.shape[0], self.crop_size[0]), max(image.shape[1], self.crop_size[1])
        padding_coordinates = _get_center_padding_coordinates(input_shape=image.shape, output_shape=output_shape)
        padded_image = _pad_image(image=image, padding_coordinates=padding_coordinates, pad_value=self.fill_image)

        return padded_image, DetectionPadToSizeMetadata(padding_coordinates=padding_coordinates)

    def postprocess_predictions(self, predictions: SegmentationPrediction, metadata: DetectionPadToSizeMetadata) -> SegmentationPrediction:
        predictions.segmentation_map = predictions.segmentation_map[
            metadata.padding_coordinates.top : predictions.segmentation_map_shape[0] - metadata.padding_coordinates.bottom,
            metadata.padding_coordinates.left : predictions.segmentation_map_shape[1] - metadata.padding_coordinates.right,
        ]
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        return True

`SegmentationPadToDivisible`

Bases: Processing

Pads image to a size divisible by the defined parameter.

Parameters:

Name	Type	Description	Default
`divisible_value`	`int`	The divisible value, new image size is an int multiplication of this number	required
`fill_image`	`Union[int, Tuple, List]`	The value to use for padding the image.	required

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.SegmentationPadToDivisible)
class SegmentationPadToDivisible(Processing):
    """
    Pads image to a size divisible by the defined parameter.

    :param divisible_value:   The divisible value, new image size is an int multiplication of this number
    :param fill_image:        The value to use for padding the image.
    """

    def __init__(self, divisible_value: int, fill_image: Union[int, Tuple, List]):
        self.divisible_value = int(divisible_value)
        self.fill_image = tuple(fill_image) if isinstance(fill_image, typing.Sequence) else fill_image

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, DetectionPadToSizeMetadata]:
        h, w = image.shape[:2]
        padded_h = int(math.ceil(h / self.divisible_value) * self.divisible_value)
        padded_w = int(math.ceil(w / self.divisible_value) * self.divisible_value)

        padding_coordinates = _get_bottom_right_padding_coordinates(input_shape=image.shape, output_shape=(padded_h, padded_w))
        padded_image = _pad_image(image=image, padding_coordinates=padding_coordinates, pad_value=self.fill_image)

        return padded_image, DetectionPadToSizeMetadata(padding_coordinates=padding_coordinates)

    def postprocess_predictions(self, predictions: SegmentationPrediction, metadata: DetectionPadToSizeMetadata) -> SegmentationPrediction:
        predictions.segmentation_map = predictions.segmentation_map[
            metadata.padding_coordinates.top : predictions.segmentation_map_shape[0] - metadata.padding_coordinates.bottom,
            metadata.padding_coordinates.left : predictions.segmentation_map_shape[1] - metadata.padding_coordinates.right,
        ]
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        return True

`SegmentationRescale`

Bases: Processing

Rescale image by scaling factor while preserving aspect ratio. The rescaling can be done according to scale_factor, short_size or long_size. If more than one argument is given, the rescaling mode is determined by this order: scale_factor, then short_size, then long_size.

Parameters:

Name	Type	Description	Default
`scale_factor`	`Optional[float]`	Rescaling is done by multiplying input size by scale_factor: out_size = (scale_factor * w, scale_factor * h)	`None`
`short_size`	`Optional[int]`	Rescaling is done by determining the scale factor by the ratio short_size / min(h, w).	`None`
`long_size`	`Optional[int]`	Rescaling is done by determining the scale factor by the ratio long_size / max(h, w).	`None`

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.SegmentationRescale)
class SegmentationRescale(Processing):
    """Rescale image by scaling factor while preserving aspect ratio.
    The rescaling can be done according to scale_factor, short_size or long_size.
    If more than one argument is given, the rescaling mode is determined by this order: scale_factor, then short_size,
    then long_size.

    :param scale_factor: Rescaling is done by multiplying input size by scale_factor:
        out_size = (scale_factor * w, scale_factor * h)
    :param short_size:  Rescaling is done by determining the scale factor by the ratio short_size / min(h, w).
    :param long_size:   Rescaling is done by determining the scale factor by the ratio long_size / max(h, w).
    """

    def __init__(self, scale_factor: Optional[float] = None, short_size: Optional[int] = None, long_size: Optional[int] = None):
        self.scale_factor = scale_factor
        self.short_size = short_size
        self.long_size = long_size

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, SegmentationResizeMetadata]:
        height, width = image.shape[:2]
        scale_factor = _compute_scale_factor(self.scale_factor, self.short_size, self.long_size, width, height)

        if scale_factor != 1.0:
            new_width, new_height = int(scale_factor * width), int(scale_factor * height)
            resized_image = _rescale_image(image, target_shape=(new_height, new_width))

        return resized_image, SegmentationResizeMetadata(original_shape=(height, width))

    def postprocess_predictions(self, predictions: SegmentationPrediction, metadata: SegmentationResizeMetadata) -> SegmentationPrediction:
        predictions.segmentation_map = _rescale_image(
            predictions.segmentation_map.astype(np.uint8), target_shape=metadata.original_shape, interpolation_method=cv2.INTER_NEAREST
        )
        predictions.segmentation_map = predictions.segmentation_map.astype(np.int64)
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        """Return True if the processing resizes the image, False otherwise."""
        return True

`resizes_image: bool` `property`

Return True if the processing resizes the image, False otherwise.

`SegmentationResize`

Bases: Processing

Resize image to given image dimensions.

Parameters:

Name	Type	Description	Default
`output_shape`	`Tuple[int, int]`	output shape will be (output_h, output_w)	required

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.SegmentationResize)
class SegmentationResize(Processing):
    """Resize image to given image dimensions.

    :param output_shape:    output shape will be (output_h, output_w)
    """

    def __init__(self, output_shape: Tuple[int, int]):
        self.output_shape = output_shape

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, SegmentationResizeMetadata]:
        height, width = image.shape[:2]
        image = _rescale_image(image, target_shape=self.output_shape)

        return image, SegmentationResizeMetadata(original_shape=(height, width))

    def postprocess_predictions(self, predictions: SegmentationPrediction, metadata: SegmentationResizeMetadata) -> SegmentationPrediction:
        predictions.segmentation_map = _rescale_image(
            predictions.segmentation_map.astype(np.uint8), target_shape=metadata.original_shape, interpolation_method=cv2.INTER_NEAREST
        )
        predictions.segmentation_map = predictions.segmentation_map.astype(np.int64)
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        return True

`SegmentationResizeWithPadding`

Bases: Processing

Resize image to given image dimensions while preserving aspect ratio (padding might be used).

Parameters:

Name	Type	Description	Default
`output_shape`	`Tuple[int, int]`	(H, W)	required
`pad_value`	`int`	padding value (will be used if padding needed)	required

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.SegmentationResizeWithPadding)
class SegmentationResizeWithPadding(Processing):
    """Resize image to given image dimensions while preserving aspect ratio (padding might be used).

    :param output_shape:    (H, W)
    :param pad_value:    padding value (will be used if padding needed)
    """

    def __init__(self, output_shape: Tuple[int, int], pad_value: int):
        self.output_shape = output_shape
        self.pad_value = pad_value

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, SegmentationRescaleWithPaddingMetadata]:
        height, width = image.shape[:2]
        scale_factor = min(self.output_shape[0] / height, self.output_shape[1] / width)

        if scale_factor != 1.0:
            new_height, new_width = round(height * scale_factor), round(width * scale_factor)
            image = _rescale_image(image, target_shape=(new_height, new_width))

        padding_coordinates = _get_center_padding_coordinates(input_shape=image.shape, output_shape=self.output_shape)
        processed_image = _pad_image(image=image, padding_coordinates=padding_coordinates, pad_value=self.pad_value)

        return processed_image, SegmentationRescaleWithPaddingMetadata(
            original_shape=(height, width), scale_factor=scale_factor, padding_coordinates=padding_coordinates
        )

    def postprocess_predictions(self, predictions: SegmentationPrediction, metadata: SegmentationRescaleWithPaddingMetadata) -> SegmentationPrediction:
        predictions.segmentation_map = predictions.segmentation_map[
            metadata.padding_coordinates.top : predictions.segmentation_map_shape[0] - metadata.padding_coordinates.bottom,
            metadata.padding_coordinates.left : predictions.segmentation_map_shape[1] - metadata.padding_coordinates.right,
        ]
        predictions.segmentation_map = _rescale_image(
            predictions.segmentation_map.astype(np.uint8), target_shape=metadata.original_shape, interpolation_method=cv2.INTER_NEAREST
        )
        predictions.segmentation_map = predictions.segmentation_map.astype(np.int64)
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    @property
    def resizes_image(self) -> bool:
        return True

`StandardizeImage`

Bases: Processing

Standardize image pixel values with img/max_val

Parameters:

Name	Type	Description	Default
`max_value`	`float`	Current maximum value of the image pixels. (usually 255)	`255.0`

Source code in latest/src/super_gradients/training/processing/processing.py

@register_processing(Processings.StandardizeImage)
class StandardizeImage(Processing):
    """Standardize image pixel values with img/max_val

    :param max_value: Current maximum value of the image pixels. (usually 255)
    """

    def __init__(self, max_value: float = 255.0):
        super().__init__()
        self.max_value = float(max_value)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Reverse the channel order of an image.

        :param image: Image, in (H, W, C) format.
        :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
        """
        processed_image = (image / self.max_value).astype(np.float32)
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def update_mean_std_normalization(self, mean: np.ndarray, std: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        mean = mean / self.max_value
        std = std / self.max_value
        return mean, std

    def get_equivalent_photometric_module(self) -> nn.Module:
        from super_gradients.conversion.preprocessing_modules import ApplyMeanStd

        return ApplyMeanStd(mean=np.array([0], dtype=np.float32), std=np.array([self.max_value], dtype=np.float32))

    @property
    def resizes_image(self) -> bool:
        return False

`preprocess_image(image)`

Reverse the channel order of an image.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)

Source code in latest/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Reverse the channel order of an image.

    :param image: Image, in (H, W, C) format.
    :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
    """
    processed_image = (image / self.max_value).astype(np.float32)
    return processed_image, None

`default_cityscapes_processing_params(scale=1)`

Processing parameters commonly used for training segmentation models on Cityscapes dataset.

Source code in latest/src/super_gradients/training/processing/processing.py

def default_cityscapes_processing_params(scale: float = 1) -> dict:
    """Processing parameters commonly used for training segmentation models on Cityscapes dataset."""
    image_processor = ComposeProcessing(
        [
            SegmentationResizeWithPadding(output_shape=(int(1024 * scale), int(2048 * scale)), pad_value=0),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            StandardizeImage(),
            ImagePermute(),
        ]
    )
    params = dict(
        class_names=CITYSCAPES_DEFAULT_SEGMENTATION_CLASSES_LIST,
        image_processor=image_processor,
    )
    return params

`default_dekr_coco_processing_params()`

Processing parameters commonly used for training DEKR on COCO dataset.

Source code in latest/src/super_gradients/training/processing/processing.py

def default_dekr_coco_processing_params() -> dict:
    """Processing parameters commonly used for training DEKR on COCO dataset."""

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            KeypointsLongestMaxSizeRescale(output_shape=(640, 640)),
            KeypointsBottomRightPadding(output_shape=(640, 640), pad_value=127),
            StandardizeImage(max_value=255.0),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    edge_links = [
        [0, 1],
        [0, 2],
        [1, 2],
        [1, 3],
        [2, 4],
        [3, 5],
        [4, 6],
        [5, 6],
        [5, 7],
        [5, 11],
        [6, 8],
        [6, 12],
        [7, 9],
        [8, 10],
        [11, 12],
        [11, 13],
        [12, 14],
        [13, 15],
        [14, 16],
    ]

    edge_colors = [
        (214, 39, 40),  # Nose -> LeftEye
        (148, 103, 189),  # Nose -> RightEye
        (44, 160, 44),  # LeftEye -> RightEye
        (140, 86, 75),  # LeftEye -> LeftEar
        (227, 119, 194),  # RightEye -> RightEar
        (127, 127, 127),  # LeftEar -> LeftShoulder
        (188, 189, 34),  # RightEar -> RightShoulder
        (127, 127, 127),  # Shoulders
        (188, 189, 34),  # LeftShoulder -> LeftElbow
        (140, 86, 75),  # LeftTorso
        (23, 190, 207),  # RightShoulder -> RightElbow
        (227, 119, 194),  # RightTorso
        (31, 119, 180),  # LeftElbow -> LeftArm
        (255, 127, 14),  # RightElbow -> RightArm
        (148, 103, 189),  # Waist
        (255, 127, 14),  # Left Hip -> Left Knee
        (214, 39, 40),  # Right Hip -> Right Knee
        (31, 119, 180),  # Left Knee -> Left Ankle
        (44, 160, 44),  # Right Knee -> Right Ankle
    ]

    keypoint_colors = [
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
    ]
    params = dict(image_processor=image_processor, conf=0.05, edge_links=edge_links, edge_colors=edge_colors, keypoint_colors=keypoint_colors)
    return params

`default_imagenet_processing_params()`

Processing parameters commonly used for training resnet on Imagenet dataset.

Source code in latest/src/super_gradients/training/processing/processing.py

def default_imagenet_processing_params() -> dict:
    """Processing parameters commonly used for training resnet on Imagenet dataset."""
    image_processor = ComposeProcessing(
        [Resize(size=256), CenterCrop(size=224), StandardizeImage(), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ImagePermute()]
    )
    params = dict(
        class_names=IMAGENET_CLASSES,
        image_processor=image_processor,
    )
    return params

`default_ppyoloe_coco_processing_params()`

Processing parameters commonly used for training PPYoloE on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in latest/src/super_gradients/training/processing/processing.py

def default_ppyoloe_coco_processing_params() -> dict:
    """Processing parameters commonly used for training PPYoloE on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            DetectionRescale(output_shape=(640, 640)),
            NormalizeImage(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.65,
        conf=0.5,
    )
    return params

`default_segformer_cityscapes_processing_params()`

Processing parameters commonly used for training Segformer on Cityscapes dataset.

Source code in latest/src/super_gradients/training/processing/processing.py

def default_segformer_cityscapes_processing_params() -> dict:
    """Processing parameters commonly used for training Segformer on Cityscapes dataset."""
    image_processor = ComposeProcessing(
        [
            SegmentationRescale(long_size=1024),
            SegmentationPadShortToCropSize(crop_size=(1024, 2048), fill_image=0),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            StandardizeImage(),
            ImagePermute(),
        ]
    )
    params = dict(
        class_names=CITYSCAPES_DEFAULT_SEGMENTATION_CLASSES_LIST,
        image_processor=image_processor,
    )
    return params

`default_vit_imagenet_processing_params()`

Processing parameters used by ViT for training resnet on Imagenet dataset.

Source code in latest/src/super_gradients/training/processing/processing.py

def default_vit_imagenet_processing_params() -> dict:
    """Processing parameters used by ViT for training resnet on Imagenet dataset."""
    image_processor = ComposeProcessing(
        [Resize(size=256), CenterCrop(size=224), StandardizeImage(), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ImagePermute()]
    )
    params = dict(
        class_names=IMAGENET_CLASSES,
        image_processor=image_processor,
    )
    return params

`default_yolo_nas_coco_processing_params()`

Processing parameters commonly used for training YoloNAS on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in latest/src/super_gradients/training/processing/processing.py

def default_yolo_nas_coco_processing_params() -> dict:
    """Processing parameters commonly used for training YoloNAS on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            DetectionLongestMaxSizeRescale(output_shape=(636, 636)),
            DetectionCenterPadding(output_shape=(640, 640), pad_value=114),
            StandardizeImage(max_value=255.0),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.7,
        conf=0.25,
    )
    return params

`default_yolox_coco_processing_params()`

Processing parameters commonly used for training YoloX on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in latest/src/super_gradients/training/processing/processing.py

def default_yolox_coco_processing_params() -> dict:
    """Processing parameters commonly used for training YoloX on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            DetectionLongestMaxSizeRescale((640, 640)),
            DetectionBottomRightPadding((640, 640), 114),
            ImagePermute((2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.65,
        conf=0.1,
    )
    return params

`get_pretrained_processing_params(model_name, pretrained_weights)`

Get the processing parameters for a pretrained model. TODO: remove once we load it from the checkpoint

Source code in latest/src/super_gradients/training/processing/processing.py

def get_pretrained_processing_params(model_name: str, pretrained_weights: str) -> dict:
    """Get the processing parameters for a pretrained model.
    TODO: remove once we load it from the checkpoint
    """
    if pretrained_weights == "coco":
        if "yolox" in model_name:
            return default_yolox_coco_processing_params()
        elif "ppyoloe" in model_name:
            return default_ppyoloe_coco_processing_params()
        elif "yolo_nas" in model_name:
            return default_yolo_nas_coco_processing_params()

    if pretrained_weights == "coco_pose" and model_name in ("dekr_w32_no_dc", "dekr_custom"):
        return default_dekr_coco_processing_params()

    if pretrained_weights == "coco_pose" and model_name.startswith("yolo_nas_pose"):
        return default_yolo_nas_pose_coco_processing_params()

    if pretrained_weights == "imagenet" and model_name in {"vit_base", "vit_large", "vit_huge"}:
        return default_vit_imagenet_processing_params()

    if pretrained_weights == "imagenet":
        return default_imagenet_processing_params()

    if pretrained_weights == "cityscapes":
        if model_name in {"pp_lite_t_seg75", "pp_lite_b_seg75", "stdc1_seg75", "stdc2_seg75"}:
            return default_cityscapes_processing_params(0.75)
        elif model_name in {"pp_lite_t_seg50", "pp_lite_b_seg50", "stdc1_seg50", "stdc2_seg50"}:
            return default_cityscapes_processing_params(0.50)
        elif model_name in {"ddrnet_23", "ddrnet_23_slim", "ddrnet_39"}:
            return default_cityscapes_processing_params()
        elif model_name in {"segformer_b0", "segformer_b1", "segformer_b2", "segformer_b3", "segformer_b4", "segformer_b5"}:
            return default_segformer_cityscapes_processing_params()
    return dict()

Processing

AutoPadding

__init__(shape_multiple, pad_value)

CenterCrop

infer_image_input_shape()

preprocess_image(image)

ComposeProcessing

__init__(processings)

get_equivalent_compose_without_resizing(auto_padding)

infer_image_input_shape()

postprocess_predictions(predictions, metadata)

preprocess_image(image)

ImagePermute

NormalizeImage

Processing

resizes_image: bool abstractmethod property

get_equivalent_photometric_module() abstractmethod

infer_image_input_shape()

postprocess_predictions(predictions, metadata) abstractmethod

preprocess_image(image) abstractmethod

ProcessingMetadata dataclass

Resize

preprocess_image(image)

ReverseImageChannels

preprocess_image(image)

SegmentationPadShortToCropSize

SegmentationPadToDivisible

SegmentationRescale

resizes_image: bool property

SegmentationResize

SegmentationResizeWithPadding

StandardizeImage

preprocess_image(image)

default_cityscapes_processing_params(scale=1)

default_dekr_coco_processing_params()

default_imagenet_processing_params()

default_ppyoloe_coco_processing_params()

default_segformer_cityscapes_processing_params()

default_vit_imagenet_processing_params()

default_yolo_nas_coco_processing_params()

default_yolox_coco_processing_params()

get_pretrained_processing_params(model_name, pretrained_weights)

`AutoPadding`

`init(shape_multiple, pad_value)`

`CenterCrop`

`infer_image_input_shape()`

`preprocess_image(image)`

`ComposeProcessing`

`init(processings)`

`get_equivalent_compose_without_resizing(auto_padding)`

`infer_image_input_shape()`

`postprocess_predictions(predictions, metadata)`

`preprocess_image(image)`

`ImagePermute`

`NormalizeImage`

`Processing`

`resizes_image: bool` `abstractmethod` `property`

`get_equivalent_photometric_module()` `abstractmethod`

`infer_image_input_shape()`

`postprocess_predictions(predictions, metadata)` `abstractmethod`

`preprocess_image(image)` `abstractmethod`

`ProcessingMetadata` `dataclass`

`Resize`

`preprocess_image(image)`

`ReverseImageChannels`

`preprocess_image(image)`

`SegmentationPadShortToCropSize`

`SegmentationPadToDivisible`

`SegmentationRescale`

`resizes_image: bool` `property`

`SegmentationResize`

`SegmentationResizeWithPadding`

`StandardizeImage`

`preprocess_image(image)`

`default_cityscapes_processing_params(scale=1)`

`default_dekr_coco_processing_params()`

`default_imagenet_processing_params()`

`default_ppyoloe_coco_processing_params()`

`default_segformer_cityscapes_processing_params()`

`default_vit_imagenet_processing_params()`

`default_yolo_nas_coco_processing_params()`

`default_yolox_coco_processing_params()`

`get_pretrained_processing_params(model_name, pretrained_weights)`