Added disk safety check before each test
* Includes possible workaround for SMART self-test TimedOut errors
This commit is contained in:
parent
7d66b723ca
commit
c09cd0c9c2
2 changed files with 74 additions and 8 deletions
|
|
@ -147,6 +147,51 @@ class State():
|
||||||
# exe.start_thread(self.fix_tmux_layout_loop)
|
# exe.start_thread(self.fix_tmux_layout_loop)
|
||||||
exe.start_thread(self.fix_tmux_layout_loop)
|
exe.start_thread(self.fix_tmux_layout_loop)
|
||||||
|
|
||||||
|
def disk_safety_checks(self, prep=False, wait_for_self_tests=True):
|
||||||
|
"""Run disk safety checks."""
|
||||||
|
self_tests_in_progress = False
|
||||||
|
for disk in self.disks:
|
||||||
|
disable_tests = False
|
||||||
|
try:
|
||||||
|
disk.safety_checks()
|
||||||
|
except hw_obj.CriticalHardwareError:
|
||||||
|
disable_tests = True
|
||||||
|
if 'Disk Attributes' in disk.tests:
|
||||||
|
disk.tests['Disk Attributes'].failed = True
|
||||||
|
disk.tests['Disk Attributes'].set_status('Failed')
|
||||||
|
except hw_obj.SMARTSelfTestInProgressError:
|
||||||
|
if prep:
|
||||||
|
std.print_warning(f'SMART self-test(s) in progress for {disk.path}')
|
||||||
|
if std.ask('Continue with all tests disabled for this device?'):
|
||||||
|
disable_tests = True
|
||||||
|
else:
|
||||||
|
std.print_standard('Diagnostics aborted.')
|
||||||
|
std.print_standard(' ')
|
||||||
|
std.pause('Press Enter to exit...')
|
||||||
|
raise SystemExit(1)
|
||||||
|
elif wait_for_self_tests:
|
||||||
|
self_tests_in_progress = True
|
||||||
|
else:
|
||||||
|
# Other tests will NOT be disabled
|
||||||
|
LOG.warning('SMART data may not be reliable for: %s', disk.path)
|
||||||
|
# Add note to report
|
||||||
|
if 'Disk Self-Test' in disk.tests:
|
||||||
|
disk.tests['Disk Self-Test'].failed = True
|
||||||
|
disk.tests['Disk Self-Test'].report.append(
|
||||||
|
std.color_string('Please manually review SMART data', 'YELLOW'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Disable tests if necessary
|
||||||
|
if disable_tests:
|
||||||
|
disable_disk_tests(disk)
|
||||||
|
|
||||||
|
# Wait for self-test(s)
|
||||||
|
if self_tests_in_progress:
|
||||||
|
std.print_warning('SMART self-test(s) in progress')
|
||||||
|
std.print_standard('Waiting 60 seconds before continuing...')
|
||||||
|
std.sleep(60)
|
||||||
|
self.disk_safety_checks(wait_for_self_tests=False)
|
||||||
|
|
||||||
def fix_tmux_layout(self, forced=True, signum=None, frame=None):
|
def fix_tmux_layout(self, forced=True, signum=None, frame=None):
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
"""Fix tmux layout based on cfg.hw.TMUX_LAYOUT.
|
"""Fix tmux layout based on cfg.hw.TMUX_LAYOUT.
|
||||||
|
|
@ -224,6 +269,9 @@ class State():
|
||||||
disk.tests[name] = test_obj
|
disk.tests[name] = test_obj
|
||||||
self.tests[name]['Objects'].append(test_obj)
|
self.tests[name]['Objects'].append(test_obj)
|
||||||
|
|
||||||
|
# Run safety checks
|
||||||
|
#self.disk_safety_checks(prep=True)
|
||||||
|
|
||||||
def init_tmux(self):
|
def init_tmux(self):
|
||||||
"""Initialize tmux layout."""
|
"""Initialize tmux layout."""
|
||||||
tmux.kill_all_panes()
|
tmux.kill_all_panes()
|
||||||
|
|
@ -605,6 +653,17 @@ def cpu_mprime_test(state, test_objects):
|
||||||
tmux.kill_pane(state.panes.pop('Temps', None))
|
tmux.kill_pane(state.panes.pop('Temps', None))
|
||||||
|
|
||||||
|
|
||||||
|
def disable_disk_tests(disk):
|
||||||
|
"""Disable remaining tests for disk."""
|
||||||
|
LOG.warning('Disabling further tests for: %s', disk.path)
|
||||||
|
for name, test in disk.tests.items():
|
||||||
|
if name == 'Disk Attributes':
|
||||||
|
continue
|
||||||
|
if test.status in ('Pending', 'Working'):
|
||||||
|
test.set_status('Denied')
|
||||||
|
test.disabled = True
|
||||||
|
|
||||||
|
|
||||||
def disk_attribute_check(state, test_objects):
|
def disk_attribute_check(state, test_objects):
|
||||||
"""Disk attribute check."""
|
"""Disk attribute check."""
|
||||||
LOG.info('Disk Attribute Check')
|
LOG.info('Disk Attribute Check')
|
||||||
|
|
@ -1168,6 +1227,10 @@ def run_diags(state, menu, quick_mode=False):
|
||||||
# Skip disabled tests
|
# Skip disabled tests
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Run safety checks
|
||||||
|
if name.startswith('Disk') and name != 'Disk Attributes':
|
||||||
|
state.disk_safety_checks()
|
||||||
|
|
||||||
# Run test(s)
|
# Run test(s)
|
||||||
function = details['Function']
|
function = details['Function']
|
||||||
args = [details['Objects']]
|
args = [details['Objects']]
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,9 @@ class CriticalHardwareError(RuntimeError):
|
||||||
class SMARTNotSupportedError(TypeError):
|
class SMARTNotSupportedError(TypeError):
|
||||||
"""Exception used for disks lacking SMART support."""
|
"""Exception used for disks lacking SMART support."""
|
||||||
|
|
||||||
|
class SMARTSelfTestInProgressError(RuntimeError):
|
||||||
|
"""Exception used when a SMART self-test is in progress."""
|
||||||
|
|
||||||
|
|
||||||
# Classes
|
# Classes
|
||||||
class BaseObj():
|
class BaseObj():
|
||||||
|
|
@ -370,17 +373,16 @@ class Disk(BaseObj):
|
||||||
self.add_note(msg, 'RED')
|
self.add_note(msg, 'RED')
|
||||||
LOG.error('%s %s', self.path, msg)
|
LOG.error('%s %s', self.path, msg)
|
||||||
|
|
||||||
|
# Raise blocking exception if necessary
|
||||||
|
if blocking_event_encountered:
|
||||||
|
raise CriticalHardwareError(f'Critical error(s) for: {self.path}')
|
||||||
|
|
||||||
# SMART self-test status
|
# SMART self-test status
|
||||||
test_details = self.get_smart_self_test_details()
|
test_details = self.get_smart_self_test_details()
|
||||||
if 'remaining_percent' in test_details.get('status', ''):
|
if 'remaining_percent' in test_details.get('status', ''):
|
||||||
blocking_event_encountered = True
|
msg = f'SMART self-test in progress for: {self.path}'
|
||||||
msg = 'SMART self-test in progress'
|
LOG.error(msg)
|
||||||
self.add_note(msg, 'RED')
|
raise SMARTSelfTestInProgressError(msg)
|
||||||
LOG.error('%s %s', self.path, msg)
|
|
||||||
|
|
||||||
# Raise exception if necessary
|
|
||||||
if blocking_event_encountered:
|
|
||||||
raise CriticalHardwareError(f'Critical error(s) for: {self.path}')
|
|
||||||
|
|
||||||
def run_self_test(self, log_path):
|
def run_self_test(self, log_path):
|
||||||
"""Run disk self-test and check if it passed, returns bool.
|
"""Run disk self-test and check if it passed, returns bool.
|
||||||
|
|
@ -508,6 +510,7 @@ class Disk(BaseObj):
|
||||||
|
|
||||||
|
|
||||||
class Test():
|
class Test():
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
"""Object for tracking test specific data."""
|
"""Object for tracking test specific data."""
|
||||||
def __init__(self, dev, label):
|
def __init__(self, dev, label):
|
||||||
self.dev = dev
|
self.dev = dev
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue