easybuilders · lexming · Oct 23, 2023 · Oct 7, 2023 · Oct 7, 2023 · Oct 7, 2023
diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py
@@ -87,7 +87,7 @@
 from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP
 from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP
 from easybuild.tools.hooks import SINGLE_EXTENSION, TEST_STEP, TESTCASES_STEP, load_hooks, run_hook
-from easybuild.tools.run import check_async_cmd, run_cmd
+from easybuild.tools.run import RunShellCmdError, check_async_cmd, print_run_shell_cmd_error, run_cmd
 from easybuild.tools.jenkins import write_to_xml
 from easybuild.tools.module_generator import ModuleGeneratorLua, ModuleGeneratorTcl, module_generator, dependencies_for
 from easybuild.tools.module_naming_scheme.utilities import det_full_ec_version
@@ -4124,6 +4124,11 @@ def run_all_steps(self, run_test_cases):
                     start_time = datetime.now()
                     try:
                         self.run_step(step_name, step_methods)
+                    except RunShellCmdError as err:
+                        print_run_shell_cmd_error(err)
+                        ec_path = os.path.basename(self.cfg.path)
+                        error_msg = f"shell command '{err.cmd_name} ...' failed in {step_name} step for {ec_path}"
+                        raise EasyBuildError(error_msg)
                     finally:
                         if not self.dry_run:
                             step_duration = datetime.now() - start_time
@@ -4225,7 +4230,7 @@ def build_and_install_one(ecdict, init_env):
         app.cfg['skip'] = skip
 
     # build easyconfig
-    errormsg = '(no error)'
+    error_msg = '(no error)'
     # timing info
     start_time = time.time()
     try:
@@ -4263,9 +4268,7 @@ def build_and_install_one(ecdict, init_env):
                 adjust_permissions(app.installdir, stat.S_IWUSR, add=False, recursive=True)
 
     except EasyBuildError as err:
-        first_n = 300
-        errormsg = "build failed (first %d chars): %s" % (first_n, err.msg[:first_n])
-        _log.warning(errormsg)
+        error_msg = err.msg
         result = False
 
     ended = 'ended'
@@ -4387,11 +4390,7 @@ def ensure_writable_log_dir(log_dir):
         # build failed
         success = False
         summary = 'FAILED'
-
-        build_dir = ''
-        if app.builddir:
-            build_dir = " (build directory: %s)" % (app.builddir)
-        succ = "unsuccessfully%s: %s" % (build_dir, errormsg)
+        succ = "unsuccessfully: " + error_msg
 
         # cleanup logs
         app.close_log()
@@ -4424,7 +4423,7 @@ def ensure_writable_log_dir(log_dir):
 
     del app
 
-    return (success, application_log, errormsg)
+    return (success, application_log, error_msg)
 
 
 def copy_easyblocks_for_reprod(easyblock_instances, reprod_dir):

diff --git a/easybuild/main.py b/easybuild/main.py
@@ -70,7 +70,7 @@
 from easybuild.tools.github import add_pr_labels, install_github_token, list_prs, merge_pr, new_branch_github, new_pr
 from easybuild.tools.github import new_pr_from_branch
 from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
-from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, FAIL
+from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, CRASH, FAIL
 from easybuild.tools.hooks import load_hooks, run_hook
 from easybuild.tools.modules import modules_tool
 from easybuild.tools.options import opts_dict_to_eb_opts, set_up_configuration, use_color
@@ -149,11 +149,11 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):
 
         # keep track of success/total count
         if ec_res['success']:
-            test_msg = "Successfully built %s" % ec['spec']
+            test_msg = "Successfully installed %s" % ec['spec']
         else:
-            test_msg = "Build of %s failed" % ec['spec']
+            test_msg = "Installation of %s failed" % os.path.basename(ec['spec'])
             if 'err' in ec_res:
-                test_msg += " (err: %s)" % ec_res['err']
+                test_msg += ": %s" % ec_res['err']
 
         # dump test report next to log file
         test_report_txt = create_test_report(test_msg, [(ec, ec_res)], init_session_state)
@@ -169,8 +169,8 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):
                 adjust_permissions(parent_dir, stat.S_IWUSR, add=False, recursive=False)
 
         if not ec_res['success'] and exit_on_failure:
-            if 'traceback' in ec_res:
-                raise EasyBuildError(ec_res['traceback'])
+            if not isinstance(ec_res['err'], EasyBuildError):
+                raise ec_res['err']
             else:
                 raise EasyBuildError(test_msg)
 
@@ -747,14 +747,22 @@ def prepare_main(args=None, logfile=None, testing=None):
     return init_session_state, eb_go, cfg_settings
 
 
-if __name__ == "__main__":
-    init_session_state, eb_go, cfg_settings = prepare_main()
+def main_with_hooks(args=None):
+    init_session_state, eb_go, cfg_settings = prepare_main(args=args)
     hooks = load_hooks(eb_go.options.hooks)
     try:
-        main(prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
+        main(args=args, prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
     except EasyBuildError as err:
         run_hook(FAIL, hooks, args=[err])
-        print_error(err.msg)
+        sys.exit(1)
     except KeyboardInterrupt as err:
         run_hook(CANCEL, hooks, args=[err])
         print_error("Cancelled by user: %s" % err)
+    except Exception as err:
+        run_hook(CRASH, hooks, args=[err])
+        sys.stderr.write("EasyBuild crashed! Please consider reporting a bug, this should not happen...\n\n")
+        raise
+
+
+if __name__ == "__main__":
+    main_with_hooks()
diff --git a/easybuild/tools/build_log.py b/easybuild/tools/build_log.py
@@ -167,7 +167,7 @@ def nosupport(self, msg, ver):
 
     def error(self, msg, *args, **kwargs):
         """Print error message and raise an EasyBuildError."""
-        ebmsg = "EasyBuild crashed with an error %s: " % self.caller_info()
+        ebmsg = "EasyBuild encountered an error %s: " % self.caller_info()
         fancylogger.FancyLogger.error(self, ebmsg + msg, *args, **kwargs)
 
     def devel(self, msg, *args, **kwargs):

diff --git a/easybuild/tools/hooks.py b/easybuild/tools/hooks.py
@@ -67,6 +67,7 @@
 END = 'end'
 
 CANCEL = 'cancel'
+CRASH = 'crash'
 FAIL = 'fail'
 
 RUN_SHELL_CMD = 'run_shell_cmd'
@@ -107,6 +108,7 @@
     POST_PREF + BUILD_AND_INSTALL_LOOP,
     END,
     CANCEL,
+    CRASH,
     FAIL,
     PRE_PREF + RUN_SHELL_CMD,
     POST_PREF + RUN_SHELL_CMD,

diff --git a/easybuild/tools/run.py b/easybuild/tools/run.py
@@ -37,6 +37,7 @@
 """
 import contextlib
 import functools
+import inspect
 import os
 import re
 import signal
@@ -75,7 +76,83 @@
 ]
 
 
-RunResult = namedtuple('RunResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
+RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
+
+
+class RunShellCmdError(BaseException):
+
+    def __init__(self, cmd, exit_code, work_dir, output, stderr, caller_info, *args, **kwargs):
+        """Constructor for RunShellCmdError."""
+        self.cmd = cmd
+        self.cmd_name = cmd.split(' ')[0]
+        self.exit_code = exit_code
+        self.work_dir = work_dir
+        self.output = output
+        self.stderr = stderr
+        self.caller_info = caller_info
+
+        msg = f"Shell command '{self.cmd_name}' failed!"
+        super(RunShellCmdError, self).__init__(msg, *args, **kwargs)
+
+
+def print_run_shell_cmd_error(err):
+    """
+    Report failed shell command using provided RunShellCmdError instance
+    """
+
+    def pad_4_spaces(msg):
+        return ' ' * 4 + msg
+
+    cmd_name = err.cmd.split(' ')[0]
+    error_info = [
+        '',
+        "ERROR: Shell command failed!",
+        pad_4_spaces(f"full command              ->  {err.cmd}"),
+        pad_4_spaces(f"exit code                 ->  {err.exit_code}"),
+        pad_4_spaces(f"working directory         ->  {err.work_dir}"),
+    ]
+
+    tmpdir = tempfile.mkdtemp(prefix='shell-cmd-error-')
+    output_fp = os.path.join(tmpdir, f"{cmd_name}.out")
+    with open(output_fp, 'w') as fp:
+        fp.write(err.output or '')
+
+    if err.stderr is None:
+        error_info.append(pad_4_spaces(f"output (stdout + stderr)  ->  {output_fp}"))
+    else:
+        stderr_fp = os.path.join(tmpdir, f"{cmd_name}.err")
+        with open(stderr_fp, 'w') as fp:
+            fp.write(err.stderr)
+        error_info.extend([
+            pad_4_spaces(f"output (stdout)           ->  {output_fp}"),
+            pad_4_spaces(f"error/warnings (stderr)   ->  {stderr_fp}"),
+        ])
+
+    caller_file_name, caller_line_nr, caller_function_name = err.caller_info
+    called_from_info = f"'{caller_function_name}' function in {caller_file_name} (line {caller_line_nr})"
+    error_info.extend([
+        pad_4_spaces(f"called from               ->  {called_from_info}"),
+        '',
+    ])
+
+    sys.stderr.write('\n'.join(error_info) + '\n')
+
+
+def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
+    """
+    Raise RunShellCmdError for failing shell command, after collecting additional caller info
+    """
+
+    # figure out where failing command was run
+    # need to go 3 levels down:
+    # 1) this function
+    # 2) run_shell_cmd function
+    # 3) run_cmd_cache decorator
+    # 4) actual caller site
+    frameinfo = inspect.getouterframes(inspect.currentframe())[3]
+    caller_info = (frameinfo.filename, frameinfo.lineno, frameinfo.function)
+
+    raise RunShellCmdError(cmd, exit_code, work_dir, output, stderr, caller_info)
 
 
 def run_cmd_cache(func):
@@ -178,7 +255,7 @@ def to_cmd_str(cmd):
             msg += f"  (in {work_dir})"
             dry_run_msg(msg, silent=silent)
 
-        return RunResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
+        return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
 
     start_time = datetime.now()
     if not hidden:
@@ -204,14 +281,29 @@ def to_cmd_str(cmd):
             _log.info("Command to run was changed by pre-%s hook: '%s' (was: '%s')", RUN_SHELL_CMD, cmd, old_cmd)
 
     _log.info(f"Running command '{cmd_str}' in {work_dir}")
-    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=fail_on_error,
+    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=False,
                           cwd=work_dir, env=env, input=stdin, shell=shell, executable=executable)
 
     # return output as a regular string rather than a byte sequence (and non-UTF-8 characters get stripped out)
     output = proc.stdout.decode('utf-8', 'ignore')
-    stderr_output = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None
+    stderr = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None
 
-    res = RunResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr_output, work_dir=work_dir)
+    res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir)
+
+    # always log command output
+    cmd_name = cmd_str.split(' ')[0]
+    if split_stderr:
+        _log.info(f"Output of '{cmd_name} ...' shell command (stdout only):\n{res.output}")
+        _log.info(f"Warnings and errors of '{cmd_name} ...' shell command (stderr only):\n{res.stderr}")
+    else:
+        _log.info(f"Output of '{cmd_name} ...' shell command (stdout + stderr):\n{res.output}")
+
+    if res.exit_code == 0:
+        _log.info(f"Shell command completed successfully (see output above): {cmd_str}")
+    else:
+        _log.warning(f"Shell command FAILED (exit code {res.exit_code}, see output above): {cmd_str}")
+        if fail_on_error:
+            raise_run_shell_cmd_error(res.cmd, res.exit_code, res.work_dir, output=res.output, stderr=res.stderr)
 
     if with_hooks:
         run_hook_kwargs = {
@@ -222,13 +314,6 @@ def to_cmd_str(cmd):
         }
         run_hook(RUN_SHELL_CMD, hooks, post_step_hook=True, args=[cmd], kwargs=run_hook_kwargs)
 
-    if split_stderr:
-        log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code}, "
-        log_msg += f"with stdout:\n{res.output}\nstderr:\n{res.stderr}"
-    else:
-        log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code} and output:\n{res.output}"
-    _log.info(log_msg)
-
     if not hidden:
         time_since_start = time_str_since(start_time)
         trace_msg(f"command completed: exit {res.exit_code}, ran in {time_since_start}")

diff --git a/test/framework/build_log.py b/test/framework/build_log.py
@@ -139,8 +139,8 @@ def test_easybuildlog(self):
             r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*onemorewarning.*",
             r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*lastwarning.*",
             r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*thisisnotprinted.*",
-            r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput",
-            r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): err: msg: %s",
+            r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput",
+            r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): err: msg: %s",
             r"fancyroot.test_easybuildlog \[ERROR\] :: .*EasyBuild encountered an exception \(at .* in .*\): oops",
             '',
         ])
@@ -168,7 +168,7 @@ def test_easybuildlog(self):
             r"fancyroot.test_easybuildlog \[WARNING\] :: bleh",
             r"fancyroot.test_easybuildlog \[INFO\] :: 4\+2 = 42",
             r"fancyroot.test_easybuildlog \[DEBUG\] :: this is just a test",
-            r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): foo baz baz",
+            r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): foo baz baz",
             '',
         ])
         logtxt_regex = re.compile(r'^%s' % expected_logtxt, re.M)
@@ -223,7 +223,7 @@ def test_log_levels(self):
         info_msg = r"%s \[INFO\] :: fyi" % prefix
         warning_msg = r"%s \[WARNING\] :: this is a warning" % prefix
         deprecated_msg = r"%s \[WARNING\] :: Deprecated functionality, .*: almost kaput; see .*" % prefix
-        error_msg = r"%s \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput" % prefix
+        error_msg = r"%s \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput" % prefix
 
         expected_logtxt = '\n'.join([
             error_msg,

diff --git a/test/framework/hooks.py b/test/framework/hooks.py
@@ -79,6 +79,9 @@ def setUp(self):
             '',
             'def fail_hook(err):',
             '    print("EasyBuild FAIL: %s" % err)',
+            '',
+            'def crash_hook(err):',
+            '    print("EasyBuild CRASHED, oh no! => %s" % err)',
         ])
         write_file(self.test_hooks_pymod, test_hooks_pymod_txt)
 
@@ -97,8 +100,9 @@ def test_load_hooks(self):
 
         hooks = load_hooks(self.test_hooks_pymod)
 
-        self.assertEqual(len(hooks), 8)
+        self.assertEqual(len(hooks), 9)
         expected = [
+            'crash_hook',
             'fail_hook',
             'parse_hook',
             'post_configure_hook',
@@ -140,6 +144,7 @@ def test_find_hook(self):
         pre_single_extension_hook = [hooks[k] for k in hooks if k == 'pre_single_extension_hook'][0]
         start_hook = [hooks[k] for k in hooks if k == 'start_hook'][0]
         pre_run_shell_cmd_hook = [hooks[k] for k in hooks if k == 'pre_run_shell_cmd_hook'][0]
+        crash_hook = [hooks[k] for k in hooks if k == 'crash_hook'][0]
         fail_hook = [hooks[k] for k in hooks if k == 'fail_hook'][0]
         pre_build_and_install_loop_hook = [hooks[k] for k in hooks if k == 'pre_build_and_install_loop_hook'][0]
 
@@ -175,6 +180,10 @@ def test_find_hook(self):
         self.assertEqual(find_hook('fail', hooks, pre_step_hook=True), None)
         self.assertEqual(find_hook('fail', hooks, post_step_hook=True), None)
 
+        self.assertEqual(find_hook('crash', hooks), crash_hook)
+        self.assertEqual(find_hook('crash', hooks, pre_step_hook=True), None)
+        self.assertEqual(find_hook('crash', hooks, post_step_hook=True), None)
+
         hook_name = 'build_and_install_loop'
         self.assertEqual(find_hook(hook_name, hooks), None)
         self.assertEqual(find_hook(hook_name, hooks, pre_step_hook=True), pre_build_and_install_loop_hook)
@@ -209,6 +218,7 @@ def run_hooks():
                 run_hook('single_extension', hooks, post_step_hook=True, args=[None])
             run_hook('extensions', hooks, post_step_hook=True, args=[None])
             run_hook('fail', hooks, args=[EasyBuildError('oops')])
+            run_hook('crash', hooks, args=[RuntimeError('boom!')])
             stdout = self.get_stdout()
             stderr = self.get_stderr()
             self.mock_stdout(False)
@@ -244,6 +254,8 @@ def run_hooks():
             "this is run before installing an extension",
             "== Running fail hook...",
             "EasyBuild FAIL: 'oops'",
+            "== Running crash hook...",
+            "EasyBuild CRASHED, oh no! => boom!",
         ]
         expected_stdout = '\n'.join(expected_stdout_lines)
 

diff --git a/test/framework/options.py b/test/framework/options.py
@@ -737,6 +737,7 @@ def test_avail_hooks(self):
             "	post_build_and_install_loop_hook",
             "	end_hook",
             "	cancel_hook",
+            "	crash_hook",
             "	fail_hook",
             "	pre_run_shell_cmd_hook",
             "	post_run_shell_cmd_hook",