Processing

`CenterCrop`

Bases: ClassificationProcess

Parameters:

Name	Type	Description	Default
`size`	`int`	Desired output size of the crop.	`224`

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.CenterCrop)
class CenterCrop(ClassificationProcess):
    """
    :param size: Desired output size of the crop.
    """

    def __init__(self, size: int = 224):
        super().__init__()
        self.size = int(size)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Crops the given image at the center.

        :param image: Image, in (H, W, C) format.
        :return:      The center cropped image.
        """
        height, width = image.shape[0], image.shape[1]

        # Calculate the start and end coordinates of the crop.
        start_x = (width - self.size) // 2
        start_y = (height - self.size) // 2
        end_x = start_x + self.size
        end_y = start_y + self.size

        cropped_image = image[start_y:end_y, start_x:end_x]
        return cropped_image, None

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

    def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
        """
        Infer the output image shape from the processing.

        :return: (rows, cols) Returns the last known output shape for all the processings.
        """
        return (self.size, self.size)

`infer_image_input_shape()`

Infer the output image shape from the processing.

Returns:

Type	Description
`Optional[Tuple[int, int]]`	(rows, cols) Returns the last known output shape for all the processings.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
    """
    Infer the output image shape from the processing.

    :return: (rows, cols) Returns the last known output shape for all the processings.
    """
    return (self.size, self.size)

`preprocess_image(image)`

Crops the given image at the center.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	The center cropped image.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Crops the given image at the center.

    :param image: Image, in (H, W, C) format.
    :return:      The center cropped image.
    """
    height, width = image.shape[0], image.shape[1]

    # Calculate the start and end coordinates of the crop.
    start_x = (width - self.size) // 2
    start_y = (height - self.size) // 2
    end_x = start_x + self.size
    end_y = start_y + self.size

    cropped_image = image[start_y:end_y, start_x:end_x]
    return cropped_image, None

`ComposeProcessing`

Bases: Processing

Compose a list of Processing objects into a single Processing object.

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.ComposeProcessing)
class ComposeProcessing(Processing):
    """Compose a list of Processing objects into a single Processing object."""

    def __init__(self, processings: List[Processing]):
        self.processings = processings

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, ComposeProcessingMetadata]:
        """Processing an image, before feeding it to the network."""
        processed_image, metadata_lst = image.copy(), []
        for processing in self.processings:
            processed_image, metadata = processing.preprocess_image(image=processed_image)
            metadata_lst.append(metadata)
        return processed_image, ComposeProcessingMetadata(metadata_lst=metadata_lst)

    def postprocess_predictions(self, predictions: Prediction, metadata: ComposeProcessingMetadata) -> Prediction:
        """Postprocess the model output predictions."""
        postprocessed_predictions = predictions
        for processing, metadata in zip(self.processings[::-1], metadata.metadata_lst[::-1]):
            postprocessed_predictions = processing.postprocess_predictions(postprocessed_predictions, metadata)
        return postprocessed_predictions

    def get_equivalent_photometric_module(self) -> nn.Module:
        modules = []
        for p in self.processings:
            module = p.get_equivalent_photometric_module()
            if module is not None and not isinstance(module, nn.Identity):
                modules.append(module)

        return nn.Sequential(*modules)

    def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
        """
        Infer the output image shape from the processing.

        :return: (rows, cols) Returns the last known output shape for all the processings.
        """
        output_shape = None
        for p in self.processings:
            new_output_shape = p.infer_image_input_shape()
            if new_output_shape is not None:
                output_shape = new_output_shape

        return output_shape

`infer_image_input_shape()`

Infer the output image shape from the processing.

Returns:

Type	Description
`Optional[Tuple[int, int]]`	(rows, cols) Returns the last known output shape for all the processings.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
    """
    Infer the output image shape from the processing.

    :return: (rows, cols) Returns the last known output shape for all the processings.
    """
    output_shape = None
    for p in self.processings:
        new_output_shape = p.infer_image_input_shape()
        if new_output_shape is not None:
            output_shape = new_output_shape

    return output_shape

`postprocess_predictions(predictions, metadata)`

Postprocess the model output predictions.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def postprocess_predictions(self, predictions: Prediction, metadata: ComposeProcessingMetadata) -> Prediction:
    """Postprocess the model output predictions."""
    postprocessed_predictions = predictions
    for processing, metadata in zip(self.processings[::-1], metadata.metadata_lst[::-1]):
        postprocessed_predictions = processing.postprocess_predictions(postprocessed_predictions, metadata)
    return postprocessed_predictions

`preprocess_image(image)`

Processing an image, before feeding it to the network.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, ComposeProcessingMetadata]:
    """Processing an image, before feeding it to the network."""
    processed_image, metadata_lst = image.copy(), []
    for processing in self.processings:
        processed_image, metadata = processing.preprocess_image(image=processed_image)
        metadata_lst.append(metadata)
    return processed_image, ComposeProcessingMetadata(metadata_lst=metadata_lst)

`ImagePermute`

Bases: Processing

Permute the image dimensions.

Parameters:

Name	Type	Description	Default
`permutation`	`Tuple[int, int, int]`	Specify new order of dims. Default value (2, 0, 1) suitable for converting from HWC to CHW format.	`(2, 0, 1)`

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.ImagePermute)
class ImagePermute(Processing):
    """Permute the image dimensions.

    :param permutation: Specify new order of dims. Default value (2, 0, 1) suitable for converting from HWC to CHW format.
    """

    def __init__(self, permutation: Tuple[int, int, int] = (2, 0, 1)):
        self.permutation = tuple(permutation)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        processed_image = np.ascontiguousarray(image.transpose(*self.permutation))
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        return None

`NormalizeImage`

Bases: Processing

Normalize an image based on means and standard deviation.

Parameters:

Name	Type	Description	Default
`mean`	`List[float]`	Mean values for each channel.	required
`std`	`List[float]`	Standard deviation values for each channel.	required

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.NormalizeImage)
class NormalizeImage(Processing):
    """Normalize an image based on means and standard deviation.

    :param mean:    Mean values for each channel.
    :param std:     Standard deviation values for each channel.
    """

    def __init__(self, mean: List[float], std: List[float]):
        self.mean = np.array(mean).reshape((1, 1, -1)).astype(np.float32)
        self.std = np.array(std).reshape((1, 1, -1)).astype(np.float32)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        return (image - self.mean) / self.std, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def get_equivalent_photometric_module(self) -> nn.Module:
        from super_gradients.conversion.preprocessing_modules import ApplyMeanStd

        return ApplyMeanStd(mean=self.mean, std=self.std)

`Processing`

Bases: ABC

Interface for preprocessing and postprocessing methods that are used to prepare images for a model and process the model's output.

Subclasses should implement the preprocess_image and postprocess_predictions methods according to the specific requirements of the model and task.

Source code in V3_4/src/super_gradients/training/processing/processing.py

class Processing(ABC):
    """Interface for preprocessing and postprocessing methods that are
    used to prepare images for a model and process the model's output.

    Subclasses should implement the `preprocess_image` and `postprocess_predictions`
    methods according to the specific requirements of the model and task.
    """

    @abstractmethod
    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, Union[None, ProcessingMetadata]]:
        """Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W)."""
        pass

    @abstractmethod
    def postprocess_predictions(self, predictions: Prediction, metadata: Union[None, ProcessingMetadata]) -> Prediction:
        """Postprocess the model output predictions."""
        pass

    @abstractmethod
    def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
        """
        Get the equivalent photometric preprocessing module for this processing.
        A photometric preprocessing apply a transformation to the image pixels, without changing the image size.
        This includes RGB -> BGR, standardization, normalization etc.
        If a Processing subclass does not have change pixel values, it should return an nn.Identity module.
        If a Processing subclass does not have an equivalent photometric preprocessing, it should return None.
        :return:
        """
        pass

    def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
        """
        Infer the shape (rows, cols) of the image after all the processing steps.
        This is the effective image size that is fed to model itself
        :return: Return the image shape (rows, cols), or None if the image shape cannot be inferred (When preprocessing
        contains no resize/padding operations).
        """
        return None

`get_equivalent_photometric_module()` `abstractmethod`

Get the equivalent photometric preprocessing module for this processing. A photometric preprocessing apply a transformation to the image pixels, without changing the image size. This includes RGB -> BGR, standardization, normalization etc. If a Processing subclass does not have change pixel values, it should return an nn.Identity module. If a Processing subclass does not have an equivalent photometric preprocessing, it should return None.

Returns:

Type	Description
`Optional[nn.Module]`

Source code in V3_4/src/super_gradients/training/processing/processing.py

@abstractmethod
def get_equivalent_photometric_module(self) -> Optional[nn.Module]:
    """
    Get the equivalent photometric preprocessing module for this processing.
    A photometric preprocessing apply a transformation to the image pixels, without changing the image size.
    This includes RGB -> BGR, standardization, normalization etc.
    If a Processing subclass does not have change pixel values, it should return an nn.Identity module.
    If a Processing subclass does not have an equivalent photometric preprocessing, it should return None.
    :return:
    """
    pass

`infer_image_input_shape()`

Infer the shape (rows, cols) of the image after all the processing steps. This is the effective image size that is fed to model itself

Returns:

Type	Description
`Optional[Tuple[int, int]]`	Return the image shape (rows, cols), or None if the image shape cannot be inferred (When preprocessing contains no resize/padding operations).

Source code in V3_4/src/super_gradients/training/processing/processing.py

def infer_image_input_shape(self) -> Optional[Tuple[int, int]]:
    """
    Infer the shape (rows, cols) of the image after all the processing steps.
    This is the effective image size that is fed to model itself
    :return: Return the image shape (rows, cols), or None if the image shape cannot be inferred (When preprocessing
    contains no resize/padding operations).
    """
    return None

`postprocess_predictions(predictions, metadata)` `abstractmethod`

Postprocess the model output predictions.

Source code in V3_4/src/super_gradients/training/processing/processing.py

@abstractmethod
def postprocess_predictions(self, predictions: Prediction, metadata: Union[None, ProcessingMetadata]) -> Prediction:
    """Postprocess the model output predictions."""
    pass

`preprocess_image(image)` `abstractmethod`

Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W).

Source code in V3_4/src/super_gradients/training/processing/processing.py

@abstractmethod
def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, Union[None, ProcessingMetadata]]:
    """Processing an image, before feeding it to the network. Expected to be in (H, W, C) or (H, W)."""
    pass

`ProcessingMetadata` `dataclass`

Bases: ABC

Metadata including information to postprocess a prediction.

Source code in V3_4/src/super_gradients/training/processing/processing.py

@dataclass
class ProcessingMetadata(ABC):
    """Metadata including information to postprocess a prediction."""

`Resize`

Bases: ClassificationProcess

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.Resize)
class Resize(ClassificationProcess):
    def __init__(self, size: int = 224):
        super().__init__()
        self.size = int(size)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Resize an image.

        :param image: Image, in (H, W, C) format.
        :return:      The resized image.
        """
        height, width = image.shape[:2]
        output_shape = self.size, self.size
        scale_factor = max(output_shape[0] / height, output_shape[1] / width)

        if scale_factor != 1.0:
            new_height, new_width = int(height * scale_factor), int(width * scale_factor)
            image = _rescale_image_with_pil(image, target_shape=(new_height, new_width))

        return image, RescaleMetadata(original_shape=(height, width), scale_factor_h=scale_factor, scale_factor_w=scale_factor)

    def get_equivalent_photometric_module(self) -> None:
        return None

    def infer_image_input_shape(self) -> None:
        return None

`preprocess_image(image)`

Resize an image.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	The resized image.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Resize an image.

    :param image: Image, in (H, W, C) format.
    :return:      The resized image.
    """
    height, width = image.shape[:2]
    output_shape = self.size, self.size
    scale_factor = max(output_shape[0] / height, output_shape[1] / width)

    if scale_factor != 1.0:
        new_height, new_width = int(height * scale_factor), int(width * scale_factor)
        image = _rescale_image_with_pil(image, target_shape=(new_height, new_width))

    return image, RescaleMetadata(original_shape=(height, width), scale_factor_h=scale_factor, scale_factor_w=scale_factor)

`ReverseImageChannels`

Bases: Processing

Reverse the order of the image channels (RGB -> BGR or BGR -> RGB).

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.ReverseImageChannels)
class ReverseImageChannels(Processing):
    """Reverse the order of the image channels (RGB -> BGR or BGR -> RGB)."""

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Reverse the channel order of an image.

        :param image: Image, in (H, W, C) format.
        :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
        """

        if image.shape[2] != 3:
            raise ValueError("ReverseImageChannels expects 3 channels, got: " + str(image.shape[2]))

        processed_image = image[..., ::-1]
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def get_equivalent_photometric_module(self) -> nn.Module:
        from super_gradients.conversion.preprocessing_modules import ChannelSelect

        return ChannelSelect(channels=np.array([2, 1, 0], dtype=int))

`preprocess_image(image)`

Reverse the channel order of an image.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)

Source code in V3_4/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Reverse the channel order of an image.

    :param image: Image, in (H, W, C) format.
    :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
    """

    if image.shape[2] != 3:
        raise ValueError("ReverseImageChannels expects 3 channels, got: " + str(image.shape[2]))

    processed_image = image[..., ::-1]
    return processed_image, None

`StandardizeImage`

Bases: Processing

Standardize image pixel values with img/max_val

Parameters:

Name	Type	Description	Default
`max_value`	`float`	Current maximum value of the image pixels. (usually 255)	`255.0`

Source code in V3_4/src/super_gradients/training/processing/processing.py

@register_processing(Processings.StandardizeImage)
class StandardizeImage(Processing):
    """Standardize image pixel values with img/max_val

    :param max_value: Current maximum value of the image pixels. (usually 255)
    """

    def __init__(self, max_value: float = 255.0):
        super().__init__()
        self.max_value = float(max_value)

    def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
        """Reverse the channel order of an image.

        :param image: Image, in (H, W, C) format.
        :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
        """
        processed_image = (image / self.max_value).astype(np.float32)
        return processed_image, None

    def postprocess_predictions(self, predictions: Prediction, metadata: None) -> Prediction:
        return predictions

    def update_mean_std_normalization(self, mean: np.ndarray, std: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        mean = mean / self.max_value
        std = std / self.max_value
        return mean, std

    def get_equivalent_photometric_module(self) -> nn.Module:
        from super_gradients.conversion.preprocessing_modules import ApplyMeanStd

        return ApplyMeanStd(mean=np.array([0], dtype=np.float32), std=np.array([self.max_value], dtype=np.float32))

`preprocess_image(image)`

Reverse the channel order of an image.

Parameters:

Name	Type	Description	Default
`image`	`np.ndarray`	Image, in (H, W, C) format.	required

Returns:

Type	Description
`Tuple[np.ndarray, None]`	Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)

Source code in V3_4/src/super_gradients/training/processing/processing.py

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, None]:
    """Reverse the channel order of an image.

    :param image: Image, in (H, W, C) format.
    :return:      Image with reversed channel order. (RGB if input was BGR, BGR if input was RGB)
    """
    processed_image = (image / self.max_value).astype(np.float32)
    return processed_image, None

`default_dekr_coco_processing_params()`

Processing parameters commonly used for training DEKR on COCO dataset.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def default_dekr_coco_processing_params() -> dict:
    """Processing parameters commonly used for training DEKR on COCO dataset."""

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            KeypointsLongestMaxSizeRescale(output_shape=(640, 640)),
            KeypointsBottomRightPadding(output_shape=(640, 640), pad_value=127),
            StandardizeImage(max_value=255.0),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    edge_links = [
        [0, 1],
        [0, 2],
        [1, 2],
        [1, 3],
        [2, 4],
        [3, 5],
        [4, 6],
        [5, 6],
        [5, 7],
        [5, 11],
        [6, 8],
        [6, 12],
        [7, 9],
        [8, 10],
        [11, 12],
        [11, 13],
        [12, 14],
        [13, 15],
        [14, 16],
    ]

    edge_colors = [
        (214, 39, 40),  # Nose -> LeftEye
        (148, 103, 189),  # Nose -> RightEye
        (44, 160, 44),  # LeftEye -> RightEye
        (140, 86, 75),  # LeftEye -> LeftEar
        (227, 119, 194),  # RightEye -> RightEar
        (127, 127, 127),  # LeftEar -> LeftShoulder
        (188, 189, 34),  # RightEar -> RightShoulder
        (127, 127, 127),  # Shoulders
        (188, 189, 34),  # LeftShoulder -> LeftElbow
        (140, 86, 75),  # LeftTorso
        (23, 190, 207),  # RightShoulder -> RightElbow
        (227, 119, 194),  # RightTorso
        (31, 119, 180),  # LeftElbow -> LeftArm
        (255, 127, 14),  # RightElbow -> RightArm
        (148, 103, 189),  # Waist
        (255, 127, 14),  # Left Hip -> Left Knee
        (214, 39, 40),  # Right Hip -> Right Knee
        (31, 119, 180),  # Left Knee -> Left Ankle
        (44, 160, 44),  # Right Knee -> Right Ankle
    ]

    keypoint_colors = [
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
        (31, 119, 180),
        (148, 103, 189),
    ]
    params = dict(image_processor=image_processor, conf=0.05, edge_links=edge_links, edge_colors=edge_colors, keypoint_colors=keypoint_colors)
    return params

`default_imagenet_processing_params()`

Processing parameters commonly used for training resnet on Imagenet dataset.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def default_imagenet_processing_params() -> dict:
    """Processing parameters commonly used for training resnet on Imagenet dataset."""
    image_processor = ComposeProcessing(
        [Resize(size=256), CenterCrop(size=224), StandardizeImage(), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ImagePermute()]
    )
    params = dict(
        class_names=IMAGENET_CLASSES,
        image_processor=image_processor,
    )
    return params

`default_ppyoloe_coco_processing_params()`

Processing parameters commonly used for training PPYoloE on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in V3_4/src/super_gradients/training/processing/processing.py

def default_ppyoloe_coco_processing_params() -> dict:
    """Processing parameters commonly used for training PPYoloE on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            DetectionRescale(output_shape=(640, 640)),
            NormalizeImage(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.65,
        conf=0.5,
    )
    return params

`default_vit_imagenet_processing_params()`

Processing parameters used by ViT for training resnet on Imagenet dataset.

Source code in V3_4/src/super_gradients/training/processing/processing.py

def default_vit_imagenet_processing_params() -> dict:
    """Processing parameters used by ViT for training resnet on Imagenet dataset."""
    image_processor = ComposeProcessing(
        [Resize(size=256), CenterCrop(size=224), StandardizeImage(), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ImagePermute()]
    )
    params = dict(
        class_names=IMAGENET_CLASSES,
        image_processor=image_processor,
    )
    return params

`default_yolo_nas_coco_processing_params()`

Processing parameters commonly used for training YoloNAS on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in V3_4/src/super_gradients/training/processing/processing.py

def default_yolo_nas_coco_processing_params() -> dict:
    """Processing parameters commonly used for training YoloNAS on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            DetectionLongestMaxSizeRescale(output_shape=(636, 636)),
            DetectionCenterPadding(output_shape=(640, 640), pad_value=114),
            StandardizeImage(max_value=255.0),
            ImagePermute(permutation=(2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.7,
        conf=0.25,
    )
    return params

`default_yolox_coco_processing_params()`

Processing parameters commonly used for training YoloX on COCO dataset. TODO: remove once we load it from the checkpoint

Source code in V3_4/src/super_gradients/training/processing/processing.py

def default_yolox_coco_processing_params() -> dict:
    """Processing parameters commonly used for training YoloX on COCO dataset.
    TODO: remove once we load it from the checkpoint
    """

    image_processor = ComposeProcessing(
        [
            ReverseImageChannels(),
            DetectionLongestMaxSizeRescale((640, 640)),
            DetectionBottomRightPadding((640, 640), 114),
            ImagePermute((2, 0, 1)),
        ]
    )

    params = dict(
        class_names=COCO_DETECTION_CLASSES_LIST,
        image_processor=image_processor,
        iou=0.65,
        conf=0.1,
    )
    return params

`get_pretrained_processing_params(model_name, pretrained_weights)`

Get the processing parameters for a pretrained model. TODO: remove once we load it from the checkpoint

Source code in V3_4/src/super_gradients/training/processing/processing.py

def get_pretrained_processing_params(model_name: str, pretrained_weights: str) -> dict:
    """Get the processing parameters for a pretrained model.
    TODO: remove once we load it from the checkpoint
    """
    if pretrained_weights == "coco":
        if "yolox" in model_name:
            return default_yolox_coco_processing_params()
        elif "ppyoloe" in model_name:
            return default_ppyoloe_coco_processing_params()
        elif "yolo_nas" in model_name:
            return default_yolo_nas_coco_processing_params()

    if pretrained_weights == "coco_pose" and model_name in ("dekr_w32_no_dc", "dekr_custom"):
        return default_dekr_coco_processing_params()

    if pretrained_weights == "coco_pose" and model_name.startswith("yolo_nas_pose"):
        return default_yolo_nas_pose_coco_processing_params()

    if pretrained_weights == "imagenet" and model_name in {"vit_base", "vit_large", "vit_huge"}:
        return default_vit_imagenet_processing_params()

    if pretrained_weights == "imagenet":
        return default_imagenet_processing_params()

    return dict()

Processing

CenterCrop

infer_image_input_shape()

preprocess_image(image)

ComposeProcessing

infer_image_input_shape()

postprocess_predictions(predictions, metadata)

preprocess_image(image)

ImagePermute

NormalizeImage

Processing

get_equivalent_photometric_module() abstractmethod

infer_image_input_shape()

postprocess_predictions(predictions, metadata) abstractmethod

preprocess_image(image) abstractmethod

ProcessingMetadata dataclass

Resize

preprocess_image(image)

ReverseImageChannels

preprocess_image(image)

StandardizeImage

preprocess_image(image)

default_dekr_coco_processing_params()

default_imagenet_processing_params()

default_ppyoloe_coco_processing_params()

default_vit_imagenet_processing_params()

default_yolo_nas_coco_processing_params()

default_yolox_coco_processing_params()

get_pretrained_processing_params(model_name, pretrained_weights)

`CenterCrop`

`infer_image_input_shape()`

`preprocess_image(image)`

`ComposeProcessing`

`infer_image_input_shape()`

`postprocess_predictions(predictions, metadata)`

`preprocess_image(image)`

`ImagePermute`

`NormalizeImage`

`Processing`

`get_equivalent_photometric_module()` `abstractmethod`

`infer_image_input_shape()`

`postprocess_predictions(predictions, metadata)` `abstractmethod`

`preprocess_image(image)` `abstractmethod`

`ProcessingMetadata` `dataclass`

`Resize`

`preprocess_image(image)`

`ReverseImageChannels`

`preprocess_image(image)`

`StandardizeImage`

`preprocess_image(image)`

`default_dekr_coco_processing_params()`

`default_imagenet_processing_params()`

`default_ppyoloe_coco_processing_params()`

`default_vit_imagenet_processing_params()`

`default_yolo_nas_coco_processing_params()`

`default_yolox_coco_processing_params()`

`get_pretrained_processing_params(model_name, pretrained_weights)`