Skip to content

Crash handler

setup_crash_handler()

Setup the environment to handle crashes, with crash tips and more.

Source code in src/super_gradients/common/crash_handler/crash_handler.py
 9
10
11
12
13
14
15
16
17
18
19
def setup_crash_handler():
    """Setup the environment to handle crashes, with crash tips and more."""
    is_setup_crash_tips = setup_crash_tips()
    is_setup_pro_user_monitoring = setup_pro_user_monitoring()
    if is_setup_crash_tips or is_setup_pro_user_monitoring:  # We don't want to wrap sys.excepthook when not required

        # This prevents hydra.main to catch errors that happen in the decorated function
        # (which leads sys.excepthook to never be called)
        env_variables.HYDRA_FULL_ERROR = "1"

        sys.excepthook = register_exceptions(sys.excepthook)

CrashTip

Base class to add tips to exceptions raised while using SuperGradients.

A tip is a more informative message with some suggestions for possible solutions or places to debug.

Source code in src/super_gradients/common/crash_handler/crash_tips.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
class CrashTip:
    """Base class to add tips to exceptions raised while using SuperGradients.

    A tip is a more informative message with some suggestions for possible solutions or places to debug.
    """

    _subclasses: List[Type["CrashTip"]] = []

    @classmethod
    def get_sub_classes(cls) -> List[Type["CrashTip"]]:
        """Get all the classes inheriting from CrashTip"""
        return cls._subclasses

    def __init_subclass__(cls):
        """Register any class inheriting from CrashTip"""
        CrashTip._subclasses.append(cls)

    @classmethod
    def is_relevant(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> bool:
        """
        Check if this tip is relevant.

        Beside the class, the input params are as returned by sys.exc_info():
            :param cls:             Class inheriting from CrashTip
            :param exc_type:        Type of exception
            :param exc_value:       Exception
            :param exc_traceback:   Traceback

            :return:                True if the current class can help with the exception
        """
        raise NotImplementedError

    @classmethod
    def _get_tips(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> List[str]:
        """
        Provide a customized tip for the exception, combining explanation and solution.

        Beside the class, the input params are as returned by sys.exc_info():
            :param cls:             Class inheriting from CrashTip
            :param exc_type:        Type of exception
            :param exc_value:       Exception
            :param exc_traceback:   Traceback

            :return:                Tip
        """
        raise NotImplementedError

    @classmethod
    def get_message(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> Union[None, str]:
        """
        Wrap the tip in a nice message.

        Beside the class, the input params are as returned by sys.exc_info():
            :param cls:             Class inheriting from CrashTip
            :param exc_type:        Type of exception
            :param exc_value:       Exception
            :param exc_traceback:   Traceback

            :return:                Tip
        """
        try:

            def format_tip(tip_index: int, tip: str):
                first_sentence, *following_sentences = tip.split("\n")
                first_sentence = f"{tip_index + 1}. {first_sentence}"
                following_sentences = [f"   {sentence}" for sentence in following_sentences]
                return "\n".join([first_sentence] + following_sentences)

            tips: List[str] = cls._get_tips(exc_type, exc_value, exc_traceback)
            formatted_tips: str = "\n".join([format_tip(i, tip) for i, tip in enumerate(tips)])

            message = (
                "═══════════════════════════════════════════╦═════════════════════════╦════════════════════════════════════════════════════════════\n"
                "                                           ║ SuperGradient Crash tip ║ \n"
                "                                           ╚═════════════════════════╝ \n"
                f"{fmt_txt('Something went wrong!', color='red', bold=True)} You can find below potential solution(s) to this error: \n\n"
                f"{formatted_tips}\n"
                f"{len(tips) + 1}. If the proposed solution(s) did not help, feel free to contact the SuperGradient team or to open a ticket on "
                f"https://github.com/Deci-AI/super-gradients/issues/new/choose\n\n"
                "see the trace above...\n"
                "══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════\n"
            )
            return "\n" + message
        except Exception:
            # It is important that the crash tip does not crash itself, because it is called atexit!
            # Otherwise, the user would get a crash on top of another crash and this would be extremly confusing
            return None

__init_subclass__()

Register any class inheriting from CrashTip

Source code in src/super_gradients/common/crash_handler/crash_tips.py
26
27
28
def __init_subclass__(cls):
    """Register any class inheriting from CrashTip"""
    CrashTip._subclasses.append(cls)

get_message(exc_type, exc_value, exc_traceback) classmethod

Wrap the tip in a nice message.

Beside the class, the input params are as returned by sys.exc_info(): :param cls: Class inheriting from CrashTip :param exc_type: Type of exception :param exc_value: Exception :param exc_traceback: Traceback

:return:                Tip
Source code in src/super_gradients/common/crash_handler/crash_tips.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
@classmethod
def get_message(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> Union[None, str]:
    """
    Wrap the tip in a nice message.

    Beside the class, the input params are as returned by sys.exc_info():
        :param cls:             Class inheriting from CrashTip
        :param exc_type:        Type of exception
        :param exc_value:       Exception
        :param exc_traceback:   Traceback

        :return:                Tip
    """
    try:

        def format_tip(tip_index: int, tip: str):
            first_sentence, *following_sentences = tip.split("\n")
            first_sentence = f"{tip_index + 1}. {first_sentence}"
            following_sentences = [f"   {sentence}" for sentence in following_sentences]
            return "\n".join([first_sentence] + following_sentences)

        tips: List[str] = cls._get_tips(exc_type, exc_value, exc_traceback)
        formatted_tips: str = "\n".join([format_tip(i, tip) for i, tip in enumerate(tips)])

        message = (
            "═══════════════════════════════════════════╦═════════════════════════╦════════════════════════════════════════════════════════════\n"
            "                                           ║ SuperGradient Crash tip ║ \n"
            "                                           ╚═════════════════════════╝ \n"
            f"{fmt_txt('Something went wrong!', color='red', bold=True)} You can find below potential solution(s) to this error: \n\n"
            f"{formatted_tips}\n"
            f"{len(tips) + 1}. If the proposed solution(s) did not help, feel free to contact the SuperGradient team or to open a ticket on "
            f"https://github.com/Deci-AI/super-gradients/issues/new/choose\n\n"
            "see the trace above...\n"
            "══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════\n"
        )
        return "\n" + message
    except Exception:
        # It is important that the crash tip does not crash itself, because it is called atexit!
        # Otherwise, the user would get a crash on top of another crash and this would be extremly confusing
        return None

get_sub_classes() classmethod

Get all the classes inheriting from CrashTip

Source code in src/super_gradients/common/crash_handler/crash_tips.py
21
22
23
24
@classmethod
def get_sub_classes(cls) -> List[Type["CrashTip"]]:
    """Get all the classes inheriting from CrashTip"""
    return cls._subclasses

is_relevant(exc_type, exc_value, exc_traceback) classmethod

Check if this tip is relevant.

Beside the class, the input params are as returned by sys.exc_info(): :param cls: Class inheriting from CrashTip :param exc_type: Type of exception :param exc_value: Exception :param exc_traceback: Traceback

:return:                True if the current class can help with the exception
Source code in src/super_gradients/common/crash_handler/crash_tips.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@classmethod
def is_relevant(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> bool:
    """
    Check if this tip is relevant.

    Beside the class, the input params are as returned by sys.exc_info():
        :param cls:             Class inheriting from CrashTip
        :param exc_type:        Type of exception
        :param exc_value:       Exception
        :param exc_traceback:   Traceback

        :return:                True if the current class can help with the exception
    """
    raise NotImplementedError

DDPNotInitializedTip

Bases: CrashTip

Note: I think that this should be caught within the code instead

Source code in src/super_gradients/common/crash_handler/crash_tips.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class DDPNotInitializedTip(CrashTip):
    """Note: I think that this should be caught within the code instead"""

    @classmethod
    def is_relevant(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType):
        expected_str = "Default process group has not been initialized, please make sure to call init_process_group."
        return isinstance(exc_value, RuntimeError) and expected_str in str(exc_value)

    @classmethod
    def _get_tips(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> List[str]:
        tip = (
            "Your environment was not setup correctly for DDP.\n"
            "Please run at the beginning of your script:\n"
            f">>> {fmt_txt('from super_gradients.training.utils.distributed_training_utils import setup_device', color='green')}\n"
            f">>> {fmt_txt('from super_gradients.common.data_types.enum import MultiGPUMode', color='green')}\n"
            f">>> {fmt_txt('setup_device(multi_gpu=MultiGPUMode.DISTRIBUTED_DATA_PARALLEL, num_gpus=...)', color='green')}"
        )
        return [tip]

RecipeFactoryFormatTip

Bases: CrashTip

Source code in src/super_gradients/common/crash_handler/crash_tips.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class RecipeFactoryFormatTip(CrashTip):
    @classmethod
    def is_relevant(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> bool:
        pattern = "Malformed object definition in configuration. Expecting either a string of object type or a single entry dictionary"
        return isinstance(exc_value, RuntimeError) and pattern in str(exc_value)

    @classmethod
    def _get_tips(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> List[str]:
        factory_name, params_dict = RecipeFactoryFormatTip._get_factory_with_params(exc_value)

        formatted_factory_name = fmt_txt(factory_name, bold=True, color="green")

        params_in_yaml = "\n".join(f"  {k}: {v}" for k, v in params_dict.items())
        user_yaml = f"- {factory_name}:\n" + params_in_yaml
        formatted_user_yaml = fmt_txt(user_yaml, indent=4, color="red")

        correct_yaml = f"- {factory_name}:\n" + indent_string(params_in_yaml, indent_size=2)
        formatted_correct_yaml = fmt_txt(correct_yaml, indent=4, color="green")

        tip = f"There is an indentation error in the recipe, while creating {formatted_factory_name}.\n"
        tip += "If your wrote this in your recipe:\n"
        tip += f"{formatted_user_yaml}\n"
        tip += "Please change it to:\n"
        tip += f"{formatted_correct_yaml}"
        tips = [tip]
        return tips

    @staticmethod
    def _get_factory_with_params(exc_value: Exception) -> Tuple[str, dict]:
        """Utility function to extract useful features from the exception.
        :return: Name of the factory that (we assume) was not correctly defined
        :return: Parameters that are passed to that factory
        """
        description = str(exc_value)
        params_dict = re.search(r"received: (.*?)$", description).group(1)
        params_dict = json_str_to_dict(params_dict)
        factory_name = next(iter(params_dict))
        params_dict.pop(factory_name)
        return factory_name, params_dict

WrongHydraVersionTip

Bases: CrashTip

Note: I think that this should be caught within the code instead

Source code in src/super_gradients/common/crash_handler/crash_tips.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
class WrongHydraVersionTip(CrashTip):
    """Note: I think that this should be caught within the code instead"""

    @classmethod
    def is_relevant(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType):
        expected_str = "__init__() got an unexpected keyword argument 'version_base'"
        return isinstance(exc_value, TypeError) and expected_str == str(exc_value)

    @classmethod
    def _get_tips(cls, exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> List[str]:
        import hydra

        tip = (
            f"{fmt_txt(f'hydra=={hydra.__version__}', color='red')} is not supported by SuperGradients. "
            f"Please run {fmt_txt('pip install hydra-core==1.2.0', color='green')}"
        )
        return [tip]

get_relevant_crash_tip_message(exc_type, exc_value, exc_traceback)

Get a CrashTip class if relevant for input exception

Source code in src/super_gradients/common/crash_handler/crash_tips.py
241
242
243
244
245
246
def get_relevant_crash_tip_message(exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> Union[None, str]:
    """Get a CrashTip class if relevant for input exception"""
    for crash_tip in CrashTip.get_sub_classes():
        if crash_tip.is_relevant(exc_type, exc_value, exc_traceback):
            return crash_tip.get_message(exc_type, exc_value, exc_traceback)
    return None

crash_tip_handler()

Display a crash tip if an error was raised

Source code in src/super_gradients/common/crash_handler/crash_tips_setup.py
10
11
12
13
14
def crash_tip_handler():
    """Display a crash tip if an error was raised"""
    crash_tip_message = ExceptionInfo.get_crash_tip_message()
    if crash_tip_message:
        print(crash_tip_message)

ExceptionInfo

Holds information about the session exception (if any)

Source code in src/super_gradients/common/crash_handler/exception.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class ExceptionInfo:
    """Holds information about the session exception (if any)"""

    _is_exception_raised = False
    exc_type = None
    exc_value = None
    exc_traceback = None

    @staticmethod
    def register_exception(exc_type: type, exc_value: Exception, exc_traceback: TracebackType):
        """Register the exception information into the class"""
        ExceptionInfo._is_exception_raised = True
        ExceptionInfo.exc_type = exc_type
        ExceptionInfo.exc_value = exc_value
        ExceptionInfo.exc_traceback = exc_traceback

    @staticmethod
    def is_exception_raised():
        """Check if an exception was raised in the current process"""
        return ExceptionInfo._is_exception_raised

    @staticmethod
    def get_crash_tip_message() -> Union[None, str]:
        return get_relevant_crash_tip_message(ExceptionInfo.exc_type, ExceptionInfo.exc_value, ExceptionInfo.exc_traceback)

is_exception_raised() staticmethod

Check if an exception was raised in the current process

Source code in src/super_gradients/common/crash_handler/exception.py
39
40
41
42
@staticmethod
def is_exception_raised():
    """Check if an exception was raised in the current process"""
    return ExceptionInfo._is_exception_raised

register_exception(exc_type, exc_value, exc_traceback) staticmethod

Register the exception information into the class

Source code in src/super_gradients/common/crash_handler/exception.py
31
32
33
34
35
36
37
@staticmethod
def register_exception(exc_type: type, exc_value: Exception, exc_traceback: TracebackType):
    """Register the exception information into the class"""
    ExceptionInfo._is_exception_raised = True
    ExceptionInfo.exc_type = exc_type
    ExceptionInfo.exc_value = exc_value
    ExceptionInfo.exc_traceback = exc_traceback

register_exceptions(excepthook)

Wrap excepthook with a step the saves the exception info to be available in the exit hooks.

Parameters:

Name Type Description Default
exc_type

Type of exception

required
exc_value

Exception

required
exc_traceback

Traceback

required

Returns:

Type Description
Callable

wrapped exceptook, that register the exception before raising it

Source code in src/super_gradients/common/crash_handler/exception.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
def register_exceptions(excepthook: Callable) -> Callable:
    """Wrap excepthook with a step the saves the exception info to be available in the exit hooks.
    :param exc_type:        Type of exception
    :param exc_value:       Exception
    :param exc_traceback:   Traceback

    :return: wrapped exceptook, that register the exception before raising it
    """

    def excepthook_with_register(exc_type: type, exc_value: Exception, exc_traceback: TracebackType) -> Callable:
        ExceptionInfo.register_exception(exc_type, exc_value, exc_traceback)
        return excepthook(exc_type, exc_value, exc_traceback)

    return excepthook_with_register

exception_upload_handler(platform_client)

Upload the log file to the deci platform if an error was raised

Source code in src/super_gradients/common/crash_handler/exception_monitoring_setup.py
14
15
16
17
18
19
20
21
22
23
24
25
26
@multi_process_safe
def exception_upload_handler(platform_client):
    """Upload the log file to the deci platform if an error was raised"""
    # Make sure that the sink is flushed
    ConsoleSink.flush()
    if not is_distributed() and ExceptionInfo.is_exception_raised():

        logger.info("Uploading console log to deci platform ...")
        try:
            platform_client.upload_file_to_s3(tag="SuperGradients", level="error", from_path=ConsoleSink.get_filename())
            logger.info("Exception was uploaded to deci platform!")
        except Exception as e:  # We don't want the code to break at exit because of the client (whatever the reason might be)
            logger.warning(f"Exception could not be uploaded to platform with exception: {e}")

setup_pro_user_monitoring()

Setup the pro user environment for error logging and monitoring

Source code in src/super_gradients/common/crash_handler/exception_monitoring_setup.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
@multi_process_safe
def setup_pro_user_monitoring() -> bool:
    """Setup the pro user environment for error logging and monitoring"""
    if client_enabled:
        if env_variables.UPLOAD_LOGS:
            logger.info("deci-platform-client package detected. activating automatic log uploading")
            logger.info(
                "If you do not have a deci-platform-client credentials or you wish to disable this feature, please set the env variable UPLOAD_LOGS=FALSE"
            )

            logger.info("Connecting to the deci platform ...")
            platform_client = DeciClient()
            logger.info("Connection to the deci platform successful!")

            atexit.register(exception_upload_handler, platform_client)
            return True
        else:
            logger.info("Automatic log upload was disabled. To enable it please set the env variable UPLOAD_LOGS=TRUE")
    return False

fmt_txt(txt, bold=False, color='', indent=0)

Format a text for the console.

Source code in src/super_gradients/common/crash_handler/utils.py
11
12
13
14
15
16
17
18
19
20
21
def fmt_txt(txt: str, bold: bool = False, color: str = "", indent: int = 0) -> str:
    """Format a text for the console."""
    if bold:
        BOLD = "\033[1m"
        END = "\033[0m"
        txt = BOLD + txt + END
    if color:
        txt = colored(txt, color)
    if indent:
        txt = indent_string(txt, indent_size=indent)
    return txt

indent_string(txt, indent_size)

Add an indentation to a string.

Source code in src/super_gradients/common/crash_handler/utils.py
5
6
7
8
def indent_string(txt: str, indent_size: int) -> str:
    """Add an indentation to a string."""
    indent = " " * indent_size
    return indent + txt.replace("\n", "\n" + indent)

json_str_to_dict(json_str)

Build a dictionary from a string in some sort of format.

Source code in src/super_gradients/common/crash_handler/utils.py
24
25
26
27
def json_str_to_dict(json_str: str) -> dict:
    """Build a dictionary from a string in some sort of format."""
    json_str = json_str.replace("None", '"None"').replace("'", '"')
    return json.loads(json_str)