SMART section finished
* The script waits for the self-test for polling-time+5min * Unless it detects completion after 60 seconds * Fixed missing linebreaks in progress panes * BREAKING: TESTS data structure reworked to support per-dev status
This commit is contained in:
parent
579a747878
commit
24e72f2f35
1 changed files with 152 additions and 56 deletions
|
|
@ -32,9 +32,11 @@ TESTS = {
|
||||||
'NVMe/SMART': {
|
'NVMe/SMART': {
|
||||||
'Enabled': False,
|
'Enabled': False,
|
||||||
'Quick': False,
|
'Quick': False,
|
||||||
|
'Status': {},
|
||||||
},
|
},
|
||||||
'badblocks': {
|
'badblocks': {
|
||||||
'Enabled': False,
|
'Enabled': False,
|
||||||
|
'Status': {},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -49,9 +51,9 @@ def get_smart_details(dev):
|
||||||
|
|
||||||
def get_status_color(s):
|
def get_status_color(s):
|
||||||
color = COLORS['CLEAR']
|
color = COLORS['CLEAR']
|
||||||
if s in ['NS', 'Unknown']:
|
if s in ['Denied', 'NS', 'OVERRIDE', 'Unknown']:
|
||||||
color = COLORS['RED']
|
color = COLORS['RED']
|
||||||
elif s in ['Aborted', 'OVERRIDE', 'Working', 'Skipped']:
|
elif s in ['Aborted', 'Working', 'Skipped']:
|
||||||
color = COLORS['YELLOW']
|
color = COLORS['YELLOW']
|
||||||
elif s in ['CS']:
|
elif s in ['CS']:
|
||||||
color = COLORS['GREEN']
|
color = COLORS['GREEN']
|
||||||
|
|
@ -193,6 +195,7 @@ def run_mprime():
|
||||||
update_progress()
|
update_progress()
|
||||||
if TESTS['NVMe/SMART']['Enabled'] or TESTS['badblocks']['Enabled']:
|
if TESTS['NVMe/SMART']['Enabled'] or TESTS['badblocks']['Enabled']:
|
||||||
if not ask('Proceed to next test?'):
|
if not ask('Proceed to next test?'):
|
||||||
|
run_program('tmux kill-pane -a'.split())
|
||||||
raise GenericError
|
raise GenericError
|
||||||
else:
|
else:
|
||||||
if TESTS['Prime95']['NS']:
|
if TESTS['Prime95']['NS']:
|
||||||
|
|
@ -207,20 +210,98 @@ def run_mprime():
|
||||||
run_program('tmux kill-pane -a'.split())
|
run_program('tmux kill-pane -a'.split())
|
||||||
|
|
||||||
def run_smart():
|
def run_smart():
|
||||||
# Set Window layout
|
aborted = False
|
||||||
pane_worker = WINDOW.split_window(attach=False)
|
clear_screen()
|
||||||
pane_worker.set_height(10)
|
print_log('\nStart NVMe/SMART test(s)\n')
|
||||||
pane_progress = WINDOW.split_window(attach=False, vertical=False)
|
progress_file = '{}/selftest_progress.out'.format(global_vars['LogDir'])
|
||||||
pane_progress.set_width(15)
|
update_progress()
|
||||||
pane_progress.clear()
|
|
||||||
#pane_progress.send_keys('watch -c -n1 -t cat "{}"'.format(TESTS['Progress Out']))
|
|
||||||
pane_progress.send_keys(''.format(TESTS['Progress Out']))
|
|
||||||
|
|
||||||
# Start test
|
# Set Window layout and start test
|
||||||
sleep(120)
|
run_program('tmux split-window -dl 3 watch -c -n1 -t cat {}'.format(
|
||||||
|
progress_file).split())
|
||||||
|
run_program('tmux split-window -dhl 15 watch -c -n1 -t cat {}'.format(
|
||||||
|
TESTS['Progress Out']).split())
|
||||||
|
|
||||||
|
# Show disk details
|
||||||
|
for name, dev in sorted(TESTS['NVMe/SMART']['Devices'].items()):
|
||||||
|
show_disk_details(dev)
|
||||||
|
print_standard(' ')
|
||||||
|
update_progress()
|
||||||
|
|
||||||
|
# Run
|
||||||
|
for name, dev in sorted(TESTS['NVMe/SMART']['Devices'].items()):
|
||||||
|
cur_status = TESTS['NVMe/SMART']['Status'][name]
|
||||||
|
if cur_status == 'OVERRIDE':
|
||||||
|
# Skipping test per user request
|
||||||
|
continue
|
||||||
|
if TESTS['NVMe/SMART']['Quick'] or dev.get('NVMe Disk', False):
|
||||||
|
# Skip SMART self-tests for quick checks and NVMe disks
|
||||||
|
if dev['Quick Health OK']:
|
||||||
|
TESTS['NVMe/SMART']['Status'][name] = 'CS'
|
||||||
|
else:
|
||||||
|
TESTS['NVMe/SMART']['Status'][name] = 'NS'
|
||||||
|
elif not dev['Quick Health OK']:
|
||||||
|
# SMART overall == Failed or attributes bad, avoid self-test
|
||||||
|
TESTS['NVMe/SMART']['Status'][name] = 'NS'
|
||||||
|
else:
|
||||||
|
# Start SMART short self-test
|
||||||
|
test_length = dev['smartctl'].get(
|
||||||
|
'ata_smart_data', {}).get(
|
||||||
|
'self_test', {}).get(
|
||||||
|
'polling_minutes', {}).get(
|
||||||
|
'short', 5)
|
||||||
|
test_length = int(test_length) + 5
|
||||||
|
TESTS['NVMe/SMART']['Status'][name] = 'Working'
|
||||||
|
update_progress()
|
||||||
|
print_standard('Running SMART short self-test(s):')
|
||||||
|
print_standard(
|
||||||
|
' /dev/{:8}({} minutes)...'.format(name, test_length),
|
||||||
|
end='', flush=True)
|
||||||
|
run_program(
|
||||||
|
'sudo smartctl -t short /dev/{}'.format(name).split(),
|
||||||
|
check=False)
|
||||||
|
|
||||||
|
# Wait and show progress (in 10 second increments)
|
||||||
|
for iteration in range(int(test_length*60/10)):
|
||||||
|
# Update SMART data
|
||||||
|
dev['smartctl'] = get_smart_details(name)
|
||||||
|
|
||||||
|
# Check if test is complete
|
||||||
|
if iteration >= 6:
|
||||||
|
done = dev['smartctl'].get(
|
||||||
|
'ata_smart_data', {}).get(
|
||||||
|
'self_test', {}).get(
|
||||||
|
'status', {}).get(
|
||||||
|
'passed', False)
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Update progress_file
|
||||||
|
with open(progress_file, 'w') as f:
|
||||||
|
f.write('SMART self-test status:\n {}'.format(
|
||||||
|
dev['smartctl'].get(
|
||||||
|
'ata_smart_data', {}).get(
|
||||||
|
'self_test', {}).get(
|
||||||
|
'status', {}).get(
|
||||||
|
'string', 'unknown')))
|
||||||
|
sleep(10)
|
||||||
|
os.remove(progress_file)
|
||||||
|
|
||||||
|
# Check result
|
||||||
|
test_passed = dev['smartctl'].get(
|
||||||
|
'ata_smart_data', {}).get(
|
||||||
|
'self_test', {}).get(
|
||||||
|
'status', {}).get(
|
||||||
|
'passed', False)
|
||||||
|
if test_passed:
|
||||||
|
TESTS['NVMe/SMART']['Status'][name] = 'CS'
|
||||||
|
else:
|
||||||
|
TESTS['NVMe/SMART']['Status'][name] = 'NS'
|
||||||
|
update_progress()
|
||||||
|
print_standard('Done', timestamp=False)
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
run_program(['tmux kill-pane -a'.split()], check=False)
|
run_program('tmux kill-pane -a'.split(), check=False)
|
||||||
|
|
||||||
def run_tests(tests):
|
def run_tests(tests):
|
||||||
print_log('Starting Hardware Diagnostics')
|
print_log('Starting Hardware Diagnostics')
|
||||||
|
|
@ -257,12 +338,18 @@ def scan_disks():
|
||||||
# Get eligible disk list
|
# Get eligible disk list
|
||||||
result = run_program(['lsblk', '-J', '-O'])
|
result = run_program(['lsblk', '-J', '-O'])
|
||||||
json_data = json.loads(result.stdout.decode())
|
json_data = json.loads(result.stdout.decode())
|
||||||
devs = json_data.get('blockdevices', [])
|
devs = {}
|
||||||
devs = {d['name']: {'lsblk': d, 'Status': 'Pending'} for d in devs
|
for d in json_data.get('blockdevices', []):
|
||||||
if d['type'] == 'disk' and d['hotplug'] == '0'}
|
if d['type'] == 'disk' and d['hotplug'] == '0':
|
||||||
|
devs[d['name']] = {'lsblk': d}
|
||||||
|
TESTS['NVMe/SMART']['Status'][d['name']] = 'Pending'
|
||||||
|
TESTS['badblocks']['Status'][d['name']] = 'Pending'
|
||||||
|
|
||||||
for dev, data in devs.items():
|
for dev, data in devs.items():
|
||||||
# Get SMART attributes
|
# Get SMART attributes
|
||||||
|
run_program(
|
||||||
|
cmd = 'sudo smartctl -s on /dev/{}'.format(dev).split(),
|
||||||
|
check = False)
|
||||||
data['smartctl'] = get_smart_details(dev)
|
data['smartctl'] = get_smart_details(dev)
|
||||||
|
|
||||||
# Get NVMe attributes
|
# Get NVMe attributes
|
||||||
|
|
@ -300,13 +387,19 @@ def scan_disks():
|
||||||
show_disk_details(data)
|
show_disk_details(data)
|
||||||
print_warning("WARNING: Health can't be confirmed for: {}".format(
|
print_warning("WARNING: Health can't be confirmed for: {}".format(
|
||||||
'/dev/{}'.format(dev)))
|
'/dev/{}'.format(dev)))
|
||||||
|
dev_name = data['lsblk']['name']
|
||||||
|
print_standard(' ')
|
||||||
if ask('Run badblocks for this device anyway?'):
|
if ask('Run badblocks for this device anyway?'):
|
||||||
data['OVERRIDE'] = True
|
TESTS['NVMe/SMART']['Status'][dev_name] = 'OVERRIDE'
|
||||||
|
else:
|
||||||
|
TESTS['badblocks']['Status'][dev_name] = 'Denied'
|
||||||
|
print_standard(' ') # In case there's more than one "OVERRIDE" disk
|
||||||
|
|
||||||
TESTS['NVMe/SMART']['Devices'] = devs
|
TESTS['NVMe/SMART']['Devices'] = devs
|
||||||
TESTS['badblocks']['Devices'] = devs
|
TESTS['badblocks']['Devices'] = devs
|
||||||
|
|
||||||
def show_disk_details(dev):
|
def show_disk_details(dev):
|
||||||
|
dev_name = dev['lsblk']['name']
|
||||||
# Device description
|
# Device description
|
||||||
print_info('Device: /dev/{}'.format(dev['lsblk']['name']))
|
print_info('Device: /dev/{}'.format(dev['lsblk']['name']))
|
||||||
for key in ['model', 'size', 'serial']:
|
for key in ['model', 'size', 'serial']:
|
||||||
|
|
@ -330,8 +423,8 @@ def show_disk_details(dev):
|
||||||
print_error('ERROR: SMART overall-health assessment result: FAILED')
|
print_error('ERROR: SMART overall-health assessment result: FAILED')
|
||||||
|
|
||||||
# Attributes
|
# Attributes
|
||||||
print_info('Attributes:')
|
|
||||||
if dev.get('NVMe Disk', False):
|
if dev.get('NVMe Disk', False):
|
||||||
|
print_info('Attributes:')
|
||||||
for attrib, threshold in sorted(ATTRIBUTES['NVMe'].items()):
|
for attrib, threshold in sorted(ATTRIBUTES['NVMe'].items()):
|
||||||
if attrib in dev['nvme-cli']:
|
if attrib in dev['nvme-cli']:
|
||||||
print_standard(
|
print_standard(
|
||||||
|
|
@ -343,14 +436,16 @@ def show_disk_details(dev):
|
||||||
raw_num >= threshold.get('Error', -1)):
|
raw_num >= threshold.get('Error', -1)):
|
||||||
print_error(raw_str, timestamp=False)
|
print_error(raw_str, timestamp=False)
|
||||||
if not threshold.get('Ignore', False):
|
if not threshold.get('Ignore', False):
|
||||||
dev['NVMe/SMART']['Status'] = 'NS'
|
dev['Quick Health OK'] = False
|
||||||
|
TESTS['NVMe/SMART']['Status'][dev_name] = 'NS'
|
||||||
elif (threshold.get('Warning', False) and
|
elif (threshold.get('Warning', False) and
|
||||||
raw_num >= threshold.get('Warning', -1)):
|
raw_num >= threshold.get('Warning', -1)):
|
||||||
print_warning(raw_str, timestamp=False)
|
print_warning(raw_str, timestamp=False)
|
||||||
else:
|
else:
|
||||||
print_success(raw_str, timestamp=False)
|
print_success(raw_str, timestamp=False)
|
||||||
else:
|
elif dev['smartctl'].get('ata_smart_attributes', None):
|
||||||
# SMART attributes
|
# SMART attributes
|
||||||
|
print_info('Attributes:')
|
||||||
s_table = dev['smartctl'].get('ata_smart_attributes', {}).get(
|
s_table = dev['smartctl'].get('ata_smart_attributes', {}).get(
|
||||||
'table', {})
|
'table', {})
|
||||||
s_table = {a.get('id', 'Unknown'): a for a in s_table}
|
s_table = {a.get('id', 'Unknown'): a for a in s_table}
|
||||||
|
|
@ -371,20 +466,14 @@ def show_disk_details(dev):
|
||||||
raw_num >= threshold.get('Error', -1)):
|
raw_num >= threshold.get('Error', -1)):
|
||||||
print_error(raw_str, timestamp=False)
|
print_error(raw_str, timestamp=False)
|
||||||
if not threshold.get('Ignore', False):
|
if not threshold.get('Ignore', False):
|
||||||
dev['NVMe/SMART']['Status'] = 'NS'
|
dev['Quick Health OK'] = False
|
||||||
|
TESTS['SMART']['Status'][dev_name] = 'NS'
|
||||||
elif (threshold.get('Warning', False) and
|
elif (threshold.get('Warning', False) and
|
||||||
raw_num >= threshold.get('Warning', -1)):
|
raw_num >= threshold.get('Warning', -1)):
|
||||||
print_warning(raw_str, timestamp=False)
|
print_warning(raw_str, timestamp=False)
|
||||||
else:
|
else:
|
||||||
print_success(raw_str, timestamp=False)
|
print_success(raw_str, timestamp=False)
|
||||||
|
|
||||||
# Quick Health OK
|
|
||||||
print_standard('Quick health assessment: ', end='', flush=True)
|
|
||||||
if dev['Quick Health OK']:
|
|
||||||
print_success('Passed.\n', timestamp=False)
|
|
||||||
else:
|
|
||||||
print_error('Failed.\n', timestamp=False)
|
|
||||||
|
|
||||||
def show_results():
|
def show_results():
|
||||||
clear_screen()
|
clear_screen()
|
||||||
print_standard('Results')
|
print_standard('Results')
|
||||||
|
|
@ -395,26 +484,33 @@ def show_results():
|
||||||
TESTS['Progress Out']).split())
|
TESTS['Progress Out']).split())
|
||||||
|
|
||||||
# Prime95
|
# Prime95
|
||||||
print_info('\nPrime95:')
|
if TESTS['Prime95']['Enabled']:
|
||||||
for log, regex in [
|
print_info('\nPrime95:')
|
||||||
['results.txt', r'(error|fail)'],
|
for log, regex in [
|
||||||
['prime.log', r'completed.*0 errors, 0 warnings']]:
|
['results.txt', r'(error|fail)'],
|
||||||
if log in TESTS['Prime95']:
|
['prime.log', r'completed.*0 errors, 0 warnings']]:
|
||||||
#print_standard(log)
|
if log in TESTS['Prime95']:
|
||||||
lines = [line.strip() for line
|
#print_standard(log)
|
||||||
in TESTS['Prime95'][log].splitlines()
|
lines = [line.strip() for line
|
||||||
if re.search(regex, line, re.IGNORECASE)]
|
in TESTS['Prime95'][log].splitlines()
|
||||||
for line in lines[-4:]:
|
if re.search(regex, line, re.IGNORECASE)]
|
||||||
line = re.sub(r'^.*Worker #\d.*Torture Test (.*)', r'\1',
|
for line in lines[-4:]:
|
||||||
line, re.IGNORECASE)
|
line = re.sub(r'^.*Worker #\d.*Torture Test (.*)', r'\1',
|
||||||
if TESTS['Prime95'].get('NS', False):
|
line, re.IGNORECASE)
|
||||||
print_error(' {}'.format(line))
|
if TESTS['Prime95'].get('NS', False):
|
||||||
else:
|
print_error(' {}'.format(line))
|
||||||
print_standard(' {}'.format(line))
|
else:
|
||||||
|
print_standard(' {}'.format(line))
|
||||||
|
print_standard(' ')
|
||||||
|
|
||||||
# NVMe/SMART
|
# NVMe/SMART / badblocks
|
||||||
|
if TESTS['NVMe/SMART']['Enabled'] or TESTS['badblocks']['Enabled']:
|
||||||
# badblocks
|
for name, dev in sorted(TESTS['NVMe/SMART']['Devices'].items()):
|
||||||
|
show_disk_details(dev)
|
||||||
|
if TESTS['badblocks']['Enabled']:
|
||||||
|
#TODO
|
||||||
|
pass
|
||||||
|
print_standard(' ')
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
pause('Press Enter to return to main menu... ')
|
pause('Press Enter to return to main menu... ')
|
||||||
|
|
@ -427,32 +523,32 @@ def update_progress():
|
||||||
output.append('{BLUE}HW Diagnostics{CLEAR}'.format(**COLORS))
|
output.append('{BLUE}HW Diagnostics{CLEAR}'.format(**COLORS))
|
||||||
output.append('───────────────')
|
output.append('───────────────')
|
||||||
if TESTS['Prime95']['Enabled']:
|
if TESTS['Prime95']['Enabled']:
|
||||||
output.append('')
|
output.append(' ')
|
||||||
output.append('{BLUE}Prime95{s_color}{status:>8}{CLEAR}'.format(
|
output.append('{BLUE}Prime95{s_color}{status:>8}{CLEAR}'.format(
|
||||||
s_color = get_status_color(TESTS['Prime95']['Status']),
|
s_color = get_status_color(TESTS['Prime95']['Status']),
|
||||||
status = TESTS['Prime95']['Status'],
|
status = TESTS['Prime95']['Status'],
|
||||||
**COLORS))
|
**COLORS))
|
||||||
if TESTS['NVMe/SMART']['Enabled']:
|
if TESTS['NVMe/SMART']['Enabled']:
|
||||||
output.append('')
|
output.append(' ')
|
||||||
output.append('{BLUE}NVMe / SMART{CLEAR}'.format(**COLORS))
|
output.append('{BLUE}NVMe / SMART{CLEAR}'.format(**COLORS))
|
||||||
if TESTS['NVMe/SMART']['Quick']:
|
if TESTS['NVMe/SMART']['Quick']:
|
||||||
output.append('{YELLOW} (Quick Check){CLEAR}'.format(**COLORS))
|
output.append('{YELLOW} (Quick Check){CLEAR}'.format(**COLORS))
|
||||||
for dev, data in sorted(TESTS['NVMe/SMART']['Devices'].items()):
|
for dev, status in sorted(TESTS['NVMe/SMART']['Status'].items()):
|
||||||
output.append('{dev}{s_color}{status:>{pad}}{CLEAR}'.format(
|
output.append('{dev}{s_color}{status:>{pad}}{CLEAR}'.format(
|
||||||
dev = dev,
|
dev = dev,
|
||||||
pad = 15-len(dev),
|
pad = 15-len(dev),
|
||||||
s_color = get_status_color(data['Status']),
|
s_color = get_status_color(status),
|
||||||
status = data['Status'],
|
status = status,
|
||||||
**COLORS))
|
**COLORS))
|
||||||
if TESTS['badblocks']['Enabled']:
|
if TESTS['badblocks']['Enabled']:
|
||||||
output.append('')
|
output.append(' ')
|
||||||
output.append('{BLUE}badblocks{CLEAR}'.format(**COLORS))
|
output.append('{BLUE}badblocks{CLEAR}'.format(**COLORS))
|
||||||
for dev, data in sorted(TESTS['badblocks']['Devices'].items()):
|
for dev, status in sorted(TESTS['badblocks']['Status'].items()):
|
||||||
output.append('{dev}{s_color}{status:>{pad}}{CLEAR}'.format(
|
output.append('{dev}{s_color}{status:>{pad}}{CLEAR}'.format(
|
||||||
dev = dev,
|
dev = dev,
|
||||||
pad = 15-len(dev),
|
pad = 15-len(dev),
|
||||||
s_color = get_status_color(data['Status']),
|
s_color = get_status_color(status),
|
||||||
status = data['Status'],
|
status = status,
|
||||||
**COLORS))
|
**COLORS))
|
||||||
|
|
||||||
# Add line-endings
|
# Add line-endings
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue