Added disk safety check before each test
* Includes possible workaround for SMART self-test TimedOut errors
This commit is contained in:
parent
7d66b723ca
commit
c09cd0c9c2
2 changed files with 74 additions and 8 deletions
|
|
@ -147,6 +147,51 @@ class State():
|
|||
# exe.start_thread(self.fix_tmux_layout_loop)
|
||||
exe.start_thread(self.fix_tmux_layout_loop)
|
||||
|
||||
def disk_safety_checks(self, prep=False, wait_for_self_tests=True):
|
||||
"""Run disk safety checks."""
|
||||
self_tests_in_progress = False
|
||||
for disk in self.disks:
|
||||
disable_tests = False
|
||||
try:
|
||||
disk.safety_checks()
|
||||
except hw_obj.CriticalHardwareError:
|
||||
disable_tests = True
|
||||
if 'Disk Attributes' in disk.tests:
|
||||
disk.tests['Disk Attributes'].failed = True
|
||||
disk.tests['Disk Attributes'].set_status('Failed')
|
||||
except hw_obj.SMARTSelfTestInProgressError:
|
||||
if prep:
|
||||
std.print_warning(f'SMART self-test(s) in progress for {disk.path}')
|
||||
if std.ask('Continue with all tests disabled for this device?'):
|
||||
disable_tests = True
|
||||
else:
|
||||
std.print_standard('Diagnostics aborted.')
|
||||
std.print_standard(' ')
|
||||
std.pause('Press Enter to exit...')
|
||||
raise SystemExit(1)
|
||||
elif wait_for_self_tests:
|
||||
self_tests_in_progress = True
|
||||
else:
|
||||
# Other tests will NOT be disabled
|
||||
LOG.warning('SMART data may not be reliable for: %s', disk.path)
|
||||
# Add note to report
|
||||
if 'Disk Self-Test' in disk.tests:
|
||||
disk.tests['Disk Self-Test'].failed = True
|
||||
disk.tests['Disk Self-Test'].report.append(
|
||||
std.color_string('Please manually review SMART data', 'YELLOW'),
|
||||
)
|
||||
|
||||
# Disable tests if necessary
|
||||
if disable_tests:
|
||||
disable_disk_tests(disk)
|
||||
|
||||
# Wait for self-test(s)
|
||||
if self_tests_in_progress:
|
||||
std.print_warning('SMART self-test(s) in progress')
|
||||
std.print_standard('Waiting 60 seconds before continuing...')
|
||||
std.sleep(60)
|
||||
self.disk_safety_checks(wait_for_self_tests=False)
|
||||
|
||||
def fix_tmux_layout(self, forced=True, signum=None, frame=None):
|
||||
# pylint: disable=unused-argument
|
||||
"""Fix tmux layout based on cfg.hw.TMUX_LAYOUT.
|
||||
|
|
@ -224,6 +269,9 @@ class State():
|
|||
disk.tests[name] = test_obj
|
||||
self.tests[name]['Objects'].append(test_obj)
|
||||
|
||||
# Run safety checks
|
||||
#self.disk_safety_checks(prep=True)
|
||||
|
||||
def init_tmux(self):
|
||||
"""Initialize tmux layout."""
|
||||
tmux.kill_all_panes()
|
||||
|
|
@ -605,6 +653,17 @@ def cpu_mprime_test(state, test_objects):
|
|||
tmux.kill_pane(state.panes.pop('Temps', None))
|
||||
|
||||
|
||||
def disable_disk_tests(disk):
|
||||
"""Disable remaining tests for disk."""
|
||||
LOG.warning('Disabling further tests for: %s', disk.path)
|
||||
for name, test in disk.tests.items():
|
||||
if name == 'Disk Attributes':
|
||||
continue
|
||||
if test.status in ('Pending', 'Working'):
|
||||
test.set_status('Denied')
|
||||
test.disabled = True
|
||||
|
||||
|
||||
def disk_attribute_check(state, test_objects):
|
||||
"""Disk attribute check."""
|
||||
LOG.info('Disk Attribute Check')
|
||||
|
|
@ -1168,6 +1227,10 @@ def run_diags(state, menu, quick_mode=False):
|
|||
# Skip disabled tests
|
||||
continue
|
||||
|
||||
# Run safety checks
|
||||
if name.startswith('Disk') and name != 'Disk Attributes':
|
||||
state.disk_safety_checks()
|
||||
|
||||
# Run test(s)
|
||||
function = details['Function']
|
||||
args = [details['Objects']]
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ class CriticalHardwareError(RuntimeError):
|
|||
class SMARTNotSupportedError(TypeError):
|
||||
"""Exception used for disks lacking SMART support."""
|
||||
|
||||
class SMARTSelfTestInProgressError(RuntimeError):
|
||||
"""Exception used when a SMART self-test is in progress."""
|
||||
|
||||
|
||||
# Classes
|
||||
class BaseObj():
|
||||
|
|
@ -370,17 +373,16 @@ class Disk(BaseObj):
|
|||
self.add_note(msg, 'RED')
|
||||
LOG.error('%s %s', self.path, msg)
|
||||
|
||||
# Raise blocking exception if necessary
|
||||
if blocking_event_encountered:
|
||||
raise CriticalHardwareError(f'Critical error(s) for: {self.path}')
|
||||
|
||||
# SMART self-test status
|
||||
test_details = self.get_smart_self_test_details()
|
||||
if 'remaining_percent' in test_details.get('status', ''):
|
||||
blocking_event_encountered = True
|
||||
msg = 'SMART self-test in progress'
|
||||
self.add_note(msg, 'RED')
|
||||
LOG.error('%s %s', self.path, msg)
|
||||
|
||||
# Raise exception if necessary
|
||||
if blocking_event_encountered:
|
||||
raise CriticalHardwareError(f'Critical error(s) for: {self.path}')
|
||||
msg = f'SMART self-test in progress for: {self.path}'
|
||||
LOG.error(msg)
|
||||
raise SMARTSelfTestInProgressError(msg)
|
||||
|
||||
def run_self_test(self, log_path):
|
||||
"""Run disk self-test and check if it passed, returns bool.
|
||||
|
|
@ -508,6 +510,7 @@ class Disk(BaseObj):
|
|||
|
||||
|
||||
class Test():
|
||||
# pylint: disable=too-few-public-methods
|
||||
"""Object for tracking test specific data."""
|
||||
def __init__(self, dev, label):
|
||||
self.dev = dev
|
||||
|
|
|
|||
Loading…
Reference in a new issue