Major overhaul of NVMe/SMART reporting/testing
* Moved all errors/warnings to new nvme_smart_notes var * Allows the notes to be appended to the attribute report much easier * Allows the attribute report to be updated without losing notes * NVMe/SMART should only contains the SMART short self-test results * If available and requested * Updated safety_check to fix erroneous requests * Disable tests logic reworked * Updated generate_disk_report() to include notes * Addresses issue #96
This commit is contained in:
parent
95aa063a69
commit
a5852d1caa
1 changed files with 73 additions and 86 deletions
|
|
@ -76,6 +76,7 @@ class DiskObj():
|
||||||
self.lsblk = {}
|
self.lsblk = {}
|
||||||
self.name = re.sub(r'^.*/(.*)', r'\1', disk_path)
|
self.name = re.sub(r'^.*/(.*)', r'\1', disk_path)
|
||||||
self.nvme_attributes = {}
|
self.nvme_attributes = {}
|
||||||
|
self.nvme_smart_notes = {}
|
||||||
self.override_disabled = False
|
self.override_disabled = False
|
||||||
self.path = disk_path
|
self.path = disk_path
|
||||||
self.smart_attributes = {}
|
self.smart_attributes = {}
|
||||||
|
|
@ -93,6 +94,11 @@ class DiskObj():
|
||||||
self.description = '{size} ({tran}) {model} {serial}'.format(
|
self.description = '{size} ({tran}) {model} {serial}'.format(
|
||||||
**self.lsblk)
|
**self.lsblk)
|
||||||
|
|
||||||
|
def add_nvme_smart_note(self, note):
|
||||||
|
"""Add note that will be included in the NVMe / SMART report."""
|
||||||
|
# A dict is used to avoid duplicate notes
|
||||||
|
self.nvme_smart_notes[note] = None
|
||||||
|
|
||||||
def calc_io_dd_values(self):
|
def calc_io_dd_values(self):
|
||||||
"""Calcualte I/O benchmark dd values."""
|
"""Calcualte I/O benchmark dd values."""
|
||||||
# Get real disk size
|
# Get real disk size
|
||||||
|
|
@ -152,6 +158,9 @@ class DiskObj():
|
||||||
attr_type = self.attr_type
|
attr_type = self.attr_type
|
||||||
disk_ok = True
|
disk_ok = True
|
||||||
if self.nvme_attributes:
|
if self.nvme_attributes:
|
||||||
|
self.add_nvme_smart_note(
|
||||||
|
' {YELLOW}NVMe disk support is still experimental{CLEAR}'.format(
|
||||||
|
**COLORS))
|
||||||
items = self.nvme_attributes.items()
|
items = self.nvme_attributes.items()
|
||||||
elif self.smart_attributes:
|
elif self.smart_attributes:
|
||||||
items = self.smart_attributes.items()
|
items = self.smart_attributes.items()
|
||||||
|
|
@ -180,17 +189,18 @@ class DiskObj():
|
||||||
if not self.smartctl.get('smart_status', {}).get('passed', True):
|
if not self.smartctl.get('smart_status', {}).get('passed', True):
|
||||||
disk_ok = False
|
disk_ok = False
|
||||||
self.override_disabled = True
|
self.override_disabled = True
|
||||||
|
self.add_nvme_smart_note(
|
||||||
|
' {RED}SMART overall self-assessment: Failed{CLEAR}'.format(**COLORS))
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
return disk_ok
|
return disk_ok
|
||||||
|
|
||||||
def check_smart_self_test(silent=False):
|
def check_smart_self_test(self, silent=False):
|
||||||
"""Check if a SMART self-test is currently running, returns bool."""
|
"""Check if a SMART self-test is currently running, returns bool."""
|
||||||
msg = 'SMART self-test in progress, all tests disabled'
|
msg = 'SMART self-test in progress, all tests disabled'
|
||||||
running = 'remaining_percent' in self.smart_self_test.get('status', '')
|
test_running = 'remaining_percent' in self.smart_self_test.get('status', '')
|
||||||
disk_ok = not running
|
|
||||||
|
|
||||||
if running:
|
if test_running:
|
||||||
# Ask to abort
|
# Ask to abort
|
||||||
if not silent:
|
if not silent:
|
||||||
print_warning('WARNING: {}'.format(msg))
|
print_warning('WARNING: {}'.format(msg))
|
||||||
|
|
@ -198,18 +208,12 @@ class DiskObj():
|
||||||
if ask('Abort HW Diagnostics?'):
|
if ask('Abort HW Diagnostics?'):
|
||||||
exit_script()
|
exit_script()
|
||||||
|
|
||||||
# Add warning to report
|
# Add warning note
|
||||||
if 'NVMe / SMART' in self.tests:
|
self.add_nvme_smart_note(
|
||||||
self.tests['NVMe / SMART'].report = self.generate_attribute_report()
|
' {YELLOW}WARNING: {msg}{CLEAR}'.format(msg=msg, **COLORS))
|
||||||
self.tests['NVMe / SMART'].report.append(
|
|
||||||
'{YELLOW}WARNING: {msg}{CLEAR}'.format(msg=msg, **COLORS))
|
|
||||||
|
|
||||||
# Disable all tests for this disk
|
|
||||||
for t in self.tests.keys():
|
|
||||||
self.disable_test(t, 'Denied')
|
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
return disk_ok
|
return test_running
|
||||||
|
|
||||||
def disable_test(self, name, status):
|
def disable_test(self, name, status):
|
||||||
"""Disable test by name and update status."""
|
"""Disable test by name and update status."""
|
||||||
|
|
@ -220,30 +224,19 @@ class DiskObj():
|
||||||
def generate_attribute_report(
|
def generate_attribute_report(
|
||||||
self, description=False, short_test=False, timestamp=False):
|
self, description=False, short_test=False, timestamp=False):
|
||||||
"""Generate NVMe / SMART report, returns list."""
|
"""Generate NVMe / SMART report, returns list."""
|
||||||
|
attr_type = self.attr_type
|
||||||
report = []
|
report = []
|
||||||
if description:
|
if description:
|
||||||
report.append('{BLUE}Device ({name}){CLEAR}'.format(
|
report.append('{BLUE}Device ({name}){CLEAR}'.format(
|
||||||
name=self.name, **COLORS))
|
name=self.name, **COLORS))
|
||||||
report.append(' {}'.format(self.description))
|
report.append(' {}'.format(self.description))
|
||||||
|
|
||||||
# Warnings
|
# Skip attributes if they don't exist
|
||||||
if self.nvme_attributes:
|
if not (self.nvme_attributes or self.smart_attributes):
|
||||||
attr_type = 'NVMe'
|
|
||||||
report.append(
|
|
||||||
' {YELLOW}NVMe disk support is still experimental{CLEAR}'.format(
|
|
||||||
**COLORS))
|
|
||||||
elif self.smart_attributes:
|
|
||||||
attr_type = 'SMART'
|
|
||||||
else:
|
|
||||||
# No attribute data available, return short report
|
|
||||||
report.append(
|
report.append(
|
||||||
' {YELLOW}No NVMe or SMART data available{CLEAR}'.format(
|
' {YELLOW}No NVMe or SMART data available{CLEAR}'.format(
|
||||||
**COLORS))
|
**COLORS))
|
||||||
return report
|
return report
|
||||||
if not self.smartctl.get('smart_status', {}).get('passed', True):
|
|
||||||
report.append(
|
|
||||||
' {RED}SMART overall self-assessment: Failed{CLEAR}'.format(
|
|
||||||
**COLORS))
|
|
||||||
|
|
||||||
# Attributes
|
# Attributes
|
||||||
report.append('{BLUE}{a} Attributes{YELLOW}{u:>23} {t}{CLEAR}'.format(
|
report.append('{BLUE}{a} Attributes{YELLOW}{u:>23} {t}{CLEAR}'.format(
|
||||||
|
|
@ -293,30 +286,21 @@ class DiskObj():
|
||||||
# Add line to report
|
# Add line to report
|
||||||
report.append(_line)
|
report.append(_line)
|
||||||
|
|
||||||
# SMART short-test
|
|
||||||
if short_test:
|
|
||||||
report.append('{BLUE}SMART Short self-test{CLEAR}'.format(**COLORS))
|
|
||||||
report.append(' {}'.format(
|
|
||||||
self.smart_self_test['status'].get(
|
|
||||||
'string', 'UNKNOWN').capitalize()))
|
|
||||||
if self.smart_timeout:
|
|
||||||
report.append(' {YELLOW}Timed out{CLEAR}'.format(**COLORS))
|
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
return report
|
return report
|
||||||
|
|
||||||
def generate_disk_report(self):
|
def generate_disk_report(self):
|
||||||
"""Generate disk report with data from all tests."""
|
"""Generate disk report with data from all tests."""
|
||||||
report = []
|
report = []
|
||||||
report.append('{BLUE}Device ({name}){CLEAR}'.format(
|
|
||||||
name=self.name, **COLORS))
|
|
||||||
report.append(' {}'.format(self.description))
|
|
||||||
|
|
||||||
# Attributes
|
# Attributes
|
||||||
if 'NVMe / SMART' not in self.tests:
|
report.extend(self.generate_attribute_report(description=True))
|
||||||
report.extend(self.generate_attribute_report())
|
|
||||||
elif not self.tests['NVMe / SMART'].report:
|
# Notes
|
||||||
report.extend(self.generate_attribute_report())
|
if self.nvme_smart_notes:
|
||||||
|
report.append('{BLUE}{attr_type} Notes{CLEAR}'.format(
|
||||||
|
attr_type=self.attr_type, **COLORS))
|
||||||
|
report.extend(sorted(self.nvme_smart_notes.keys()))
|
||||||
|
|
||||||
# Tests
|
# Tests
|
||||||
for test in self.tests.values():
|
for test in self.tests.values():
|
||||||
|
|
@ -410,54 +394,55 @@ class DiskObj():
|
||||||
|
|
||||||
def safety_check(self, silent=False):
|
def safety_check(self, silent=False):
|
||||||
"""Run safety checks and disable tests if necessary."""
|
"""Run safety checks and disable tests if necessary."""
|
||||||
|
test_running = False
|
||||||
if self.nvme_attributes or self.smart_attributes:
|
if self.nvme_attributes or self.smart_attributes:
|
||||||
disk_ok = self.check_attributes()
|
disk_ok = self.check_attributes()
|
||||||
disk_ok &= self.check_smart_self_test(silent)
|
test_running = self.check_smart_self_test(silent)
|
||||||
|
|
||||||
|
# Show errors (unless a SMART self-test is running)
|
||||||
|
if not (silent or test_running):
|
||||||
|
if disk_ok:
|
||||||
|
# 199/C7 warning
|
||||||
|
if self.smart_attributes.get(199, {}).get('raw', 0) > 0:
|
||||||
|
print_warning('199/C7 error detected')
|
||||||
|
print_standard(' (Have you tried swapping the disk cable?)')
|
||||||
|
else:
|
||||||
|
# Override?
|
||||||
|
show_report(
|
||||||
|
self.generate_attribute_report(description=True),
|
||||||
|
log_report=True)
|
||||||
|
print_warning(' {} error(s) detected.'.format(self.attr_type))
|
||||||
|
if self.override_disabled:
|
||||||
|
print_standard('Tests disabled for this device')
|
||||||
|
pause()
|
||||||
|
elif not (len(self.tests) == 3 and OVERRIDES_LIMITED):
|
||||||
|
if OVERRIDES_FORCED or ask('Run tests on this device anyway?'):
|
||||||
|
disk_ok = True
|
||||||
|
if 'NVMe / SMART' in self.tests:
|
||||||
|
self.disable_test('NVMe / SMART', 'OVERRIDE')
|
||||||
|
if not self.nvme_attributes and self.smart_attributes:
|
||||||
|
# Re-enable for SMART short-tests
|
||||||
|
self.tests['NVMe / SMART'].disabled = False
|
||||||
|
print_standard(' ')
|
||||||
else:
|
else:
|
||||||
# No NVMe/SMART details
|
# No NVMe/SMART details
|
||||||
self.disable_test('NVMe / SMART', 'N/A')
|
self.disable_test('NVMe / SMART', 'N/A')
|
||||||
if silent:
|
if silent:
|
||||||
disk_ok = OVERRIDES_FORCED
|
disk_ok = OVERRIDES_FORCED
|
||||||
else:
|
else:
|
||||||
print_info('Device ({})'.format(self.name))
|
|
||||||
print_standard(' {}'.format(self.description))
|
|
||||||
print_warning(' No NVMe or SMART data available')
|
|
||||||
disk_ok = OVERRIDES_FORCED or ask('Run tests on this device anyway?')
|
|
||||||
print_standard(' ')
|
|
||||||
|
|
||||||
# Show errors
|
|
||||||
if not silent:
|
|
||||||
if disk_ok:
|
|
||||||
# 199/C7 warning
|
|
||||||
if self.smart_attributes.get(199, {}).get('raw', 0) > 0:
|
|
||||||
print_warning('199/C7 error detected')
|
|
||||||
print_standard(' (Have you tried swapping the disk cable?)')
|
|
||||||
else:
|
|
||||||
# Override?
|
|
||||||
show_report(
|
show_report(
|
||||||
self.generate_attribute_report(description=True),
|
self.generate_attribute_report(description=True),
|
||||||
log_report=True)
|
log_report=True)
|
||||||
print_warning(' {} error(s) detected.'.format(self.attr_type))
|
disk_ok = OVERRIDES_FORCED or ask('Run tests on this device anyway?')
|
||||||
if self.override_disabled:
|
print_standard(' ')
|
||||||
print_standard('Tests disabled for this device')
|
|
||||||
pause()
|
|
||||||
elif not (len(self.tests) == 3 and OVERRIDES_LIMITED):
|
|
||||||
if OVERRIDES_FORCED or ask('Run tests on this device anyway?'):
|
|
||||||
disk_ok = True
|
|
||||||
if 'NVMe / SMART' in self.tests:
|
|
||||||
self.disable_test('NVMe / SMART', 'OVERRIDE')
|
|
||||||
if not self.nvme_attributes and self.smart_attributes:
|
|
||||||
# Re-enable for SMART short-tests
|
|
||||||
self.tests['NVMe / SMART'].disabled = False
|
|
||||||
print_standard(' ')
|
|
||||||
|
|
||||||
# Disable tests if necessary
|
|
||||||
if not disk_ok:
|
# Disable tests if necessary (statuses won't be overwritten)
|
||||||
if 'NVMe / SMART' in self.tests:
|
if test_running:
|
||||||
# NOTE: This will not overwrite the existing status if set
|
for t in ['NVMe / SMART', 'badblocks', 'I/O Benchmark']:
|
||||||
self.disable_test('NVMe / SMART', 'NS')
|
self.disable_test(t, 'Denied')
|
||||||
if not self.tests['NVMe / SMART'].report:
|
elif not disk_ok:
|
||||||
self.tests['NVMe / SMART'].report = self.generate_attribute_report()
|
self.disable_test('NVMe / SMART', 'NS')
|
||||||
for t in ['badblocks', 'I/O Benchmark']:
|
for t in ['badblocks', 'I/O Benchmark']:
|
||||||
self.disable_test(t, 'Denied')
|
self.disable_test(t, 'Denied')
|
||||||
|
|
||||||
|
|
@ -1456,7 +1441,6 @@ def run_nvme_smart_tests(state, test):
|
||||||
test.timeout = test.dev.smart_self_test['polling_minutes'].get(
|
test.timeout = test.dev.smart_self_test['polling_minutes'].get(
|
||||||
'short', 5)
|
'short', 5)
|
||||||
test.timeout = int(test.timeout) + 5
|
test.timeout = int(test.timeout) + 5
|
||||||
_include_short_test = True
|
|
||||||
_self_test_started = False
|
_self_test_started = False
|
||||||
_self_test_finished = False
|
_self_test_finished = False
|
||||||
|
|
||||||
|
|
@ -1505,7 +1489,6 @@ def run_nvme_smart_tests(state, test):
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
test.aborted = True
|
test.aborted = True
|
||||||
test.report = test.dev.generate_attribute_report()
|
|
||||||
test.report.append('{BLUE}SMART Short self-test{CLEAR}'.format(
|
test.report.append('{BLUE}SMART Short self-test{CLEAR}'.format(
|
||||||
**COLORS))
|
**COLORS))
|
||||||
test.report.append(' {YELLOW}Aborted{CLEAR}'.format(**COLORS))
|
test.report.append(' {YELLOW}Aborted{CLEAR}'.format(**COLORS))
|
||||||
|
|
@ -1530,13 +1513,17 @@ def run_nvme_smart_tests(state, test):
|
||||||
for t in ['badblocks', 'I/O Benchmark']:
|
for t in ['badblocks', 'I/O Benchmark']:
|
||||||
test.dev.disable_test(t, 'Denied')
|
test.dev.disable_test(t, 'Denied')
|
||||||
|
|
||||||
|
# Save report
|
||||||
|
test.report.append('{BLUE}SMART Short self-test{CLEAR}'.format(**COLORS))
|
||||||
|
test.report.append(' {}'.format(
|
||||||
|
test.dev.smart_self_test['status'].get(
|
||||||
|
'string', 'UNKNOWN').capitalize()))
|
||||||
|
if test.dev.smart_timeout:
|
||||||
|
test.report.append(' {YELLOW}Timed out{CLEAR}'.format(**COLORS))
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
tmux_kill_pane(state.panes.pop('SMART', None))
|
tmux_kill_pane(state.panes.pop('SMART', None))
|
||||||
|
|
||||||
# Save report
|
|
||||||
test.report = test.dev.generate_attribute_report(
|
|
||||||
short_test=_include_short_test)
|
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
update_progress_pane(state)
|
update_progress_pane(state)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue