From c09cd0c9c27ab4be7d35cbcdedff0db4cfca8f84 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Sun, 8 Dec 2019 15:29:18 -0700 Subject: [PATCH] Added disk safety check before each test * Includes possible workaround for SMART self-test TimedOut errors --- scripts/wk/hw/diags.py | 63 ++++++++++++++++++++++++++++++++++++++++++ scripts/wk/hw/obj.py | 19 +++++++------ 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index f646df5a..5fda3ade 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -147,6 +147,51 @@ class State(): # exe.start_thread(self.fix_tmux_layout_loop) exe.start_thread(self.fix_tmux_layout_loop) + def disk_safety_checks(self, prep=False, wait_for_self_tests=True): + """Run disk safety checks.""" + self_tests_in_progress = False + for disk in self.disks: + disable_tests = False + try: + disk.safety_checks() + except hw_obj.CriticalHardwareError: + disable_tests = True + if 'Disk Attributes' in disk.tests: + disk.tests['Disk Attributes'].failed = True + disk.tests['Disk Attributes'].set_status('Failed') + except hw_obj.SMARTSelfTestInProgressError: + if prep: + std.print_warning(f'SMART self-test(s) in progress for {disk.path}') + if std.ask('Continue with all tests disabled for this device?'): + disable_tests = True + else: + std.print_standard('Diagnostics aborted.') + std.print_standard(' ') + std.pause('Press Enter to exit...') + raise SystemExit(1) + elif wait_for_self_tests: + self_tests_in_progress = True + else: + # Other tests will NOT be disabled + LOG.warning('SMART data may not be reliable for: %s', disk.path) + # Add note to report + if 'Disk Self-Test' in disk.tests: + disk.tests['Disk Self-Test'].failed = True + disk.tests['Disk Self-Test'].report.append( + std.color_string('Please manually review SMART data', 'YELLOW'), + ) + + # Disable tests if necessary + if disable_tests: + disable_disk_tests(disk) + + # Wait for self-test(s) + if self_tests_in_progress: + std.print_warning('SMART self-test(s) in progress') + std.print_standard('Waiting 60 seconds before continuing...') + std.sleep(60) + self.disk_safety_checks(wait_for_self_tests=False) + def fix_tmux_layout(self, forced=True, signum=None, frame=None): # pylint: disable=unused-argument """Fix tmux layout based on cfg.hw.TMUX_LAYOUT. @@ -224,6 +269,9 @@ class State(): disk.tests[name] = test_obj self.tests[name]['Objects'].append(test_obj) + # Run safety checks + #self.disk_safety_checks(prep=True) + def init_tmux(self): """Initialize tmux layout.""" tmux.kill_all_panes() @@ -605,6 +653,17 @@ def cpu_mprime_test(state, test_objects): tmux.kill_pane(state.panes.pop('Temps', None)) +def disable_disk_tests(disk): + """Disable remaining tests for disk.""" + LOG.warning('Disabling further tests for: %s', disk.path) + for name, test in disk.tests.items(): + if name == 'Disk Attributes': + continue + if test.status in ('Pending', 'Working'): + test.set_status('Denied') + test.disabled = True + + def disk_attribute_check(state, test_objects): """Disk attribute check.""" LOG.info('Disk Attribute Check') @@ -1168,6 +1227,10 @@ def run_diags(state, menu, quick_mode=False): # Skip disabled tests continue + # Run safety checks + if name.startswith('Disk') and name != 'Disk Attributes': + state.disk_safety_checks() + # Run test(s) function = details['Function'] args = [details['Objects']] diff --git a/scripts/wk/hw/obj.py b/scripts/wk/hw/obj.py index 87f72072..bb5bd89e 100644 --- a/scripts/wk/hw/obj.py +++ b/scripts/wk/hw/obj.py @@ -33,6 +33,9 @@ class CriticalHardwareError(RuntimeError): class SMARTNotSupportedError(TypeError): """Exception used for disks lacking SMART support.""" +class SMARTSelfTestInProgressError(RuntimeError): + """Exception used when a SMART self-test is in progress.""" + # Classes class BaseObj(): @@ -370,17 +373,16 @@ class Disk(BaseObj): self.add_note(msg, 'RED') LOG.error('%s %s', self.path, msg) + # Raise blocking exception if necessary + if blocking_event_encountered: + raise CriticalHardwareError(f'Critical error(s) for: {self.path}') + # SMART self-test status test_details = self.get_smart_self_test_details() if 'remaining_percent' in test_details.get('status', ''): - blocking_event_encountered = True - msg = 'SMART self-test in progress' - self.add_note(msg, 'RED') - LOG.error('%s %s', self.path, msg) - - # Raise exception if necessary - if blocking_event_encountered: - raise CriticalHardwareError(f'Critical error(s) for: {self.path}') + msg = f'SMART self-test in progress for: {self.path}' + LOG.error(msg) + raise SMARTSelfTestInProgressError(msg) def run_self_test(self, log_path): """Run disk self-test and check if it passed, returns bool. @@ -508,6 +510,7 @@ class Disk(BaseObj): class Test(): + # pylint: disable=too-few-public-methods """Object for tracking test specific data.""" def __init__(self, dev, label): self.dev = dev