From c42c7647896ff36eab988d2e58c704a2eda1bd78 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 26 Nov 2018 18:35:33 -0700 Subject: [PATCH] Updated hw_diags.py --- .bin/Scripts/functions/hw_diags.py | 172 ++++++++++++++++++----------- 1 file changed, 110 insertions(+), 62 deletions(-) diff --git a/.bin/Scripts/functions/hw_diags.py b/.bin/Scripts/functions/hw_diags.py index d2282692..169eb337 100644 --- a/.bin/Scripts/functions/hw_diags.py +++ b/.bin/Scripts/functions/hw_diags.py @@ -8,23 +8,23 @@ from functions.common import * # STATIC VARIABLES ATTRIBUTES = { 'NVMe': { - 'critical_warning': {'Error': 1}, - 'media_errors': {'Error': 1}, - 'power_on_hours': {'Warning': 12000, 'Error': 18000, 'Ignore': True}, + 'critical_warning': {'Error': 1}, + 'media_errors': {'Error': 1}, + 'power_on_hours': {'Warning': 12000, 'Error': 18000, 'Ignore': True}, 'unsafe_shutdowns': {'Warning': 1}, }, 'SMART': { - 5: {'Error': 1}, - 9: {'Warning': 12000, 'Error': 18000, 'Ignore': True}, - 10: {'Warning': 1}, - 184: {'Error': 1}, - 187: {'Warning': 1}, - 188: {'Warning': 1}, - 196: {'Warning': 1, 'Error': 10, 'Ignore': True}, - 197: {'Error': 1}, - 198: {'Error': 1}, - 199: {'Error': 1, 'Ignore': True}, - 201: {'Warning': 1}, + 5: {'Hex': '05', 'Error': 1}, + 9: {'Hex': '09', 'Warning': 12000, 'Error': 18000, 'Ignore': True}, + 10: {'Hex': '0A', 'Error': 1}, + 184: {'Hex': 'B8', 'Error': 1}, + 187: {'Hex': 'BB', 'Error': 1}, + 188: {'Hex': 'BC', 'Error': 1}, + 196: {'Hex': 'C4', 'Error': 1}, + 197: {'Hex': 'C5', 'Error': 1}, + 198: {'Hex': 'C6', 'Error': 1}, + 199: {'Hex': 'C7', 'Error': 1, 'Ignore': True}, + 201: {'Hex': 'C9', 'Error': 1}, }, } IO_VARS = { @@ -37,9 +37,15 @@ IO_VARS = { 'Scale 8': [2**(0.56*(x+1))+(16*(x+1)) for x in range(8)], 'Scale 16': [2**(0.56*(x+1))+(16*(x+1)) for x in range(16)], 'Scale 32': [2**(0.56*(x+1)/2)+(16*(x+1)/2) for x in range(32)], - 'Threshold Fail': 65*1024**2, - 'Threshold Warn': 135*1024**2, - 'Threshold Great': 750*1024**2, + 'Threshold Graph Fail': 65*1024**2, + 'Threshold Graph Warn': 135*1024**2, + 'Threshold Graph Great': 750*1024**2, + 'Threshold HDD Min': 50*1024**2, + 'Threshold HDD High Avg': 75*1024**2, + 'Threshold HDD Low Avg': 65*1024**2, + 'Threshold SSD Min': 90*1024**2, + 'Threshold SSD High Avg': 135*1024**2, + 'Threshold SSD Low Avg': 100*1024**2, 'Graph Horizontal': ('▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'), 'Graph Horizontal Width': 40, 'Graph Vertical': ( @@ -60,6 +66,7 @@ TESTS = { 'NVMe/SMART': { 'Enabled': False, 'Quick': False, + 'Short Test': {}, 'Status': {}, }, 'badblocks': { @@ -88,11 +95,11 @@ def generate_horizontal_graph(rates, oneline=False): # Set color r_color = COLORS['CLEAR'] - if r < IO_VARS['Threshold Fail']: + if r < IO_VARS['Threshold Graph Fail']: r_color = COLORS['RED'] - elif r < IO_VARS['Threshold Warn']: + elif r < IO_VARS['Threshold Graph Warn']: r_color = COLORS['YELLOW'] - elif r > IO_VARS['Threshold Great']: + elif r > IO_VARS['Threshold Graph Great']: r_color = COLORS['GREEN'] # Build graph @@ -225,16 +232,21 @@ def menu_diags(*args): action_entries = actions, spacer = '──────────────────────────') if selection.isnumeric(): + ticket_number = None if diag_modes[int(selection)-1]['Name'] != 'Quick drive test': - # Save log for non-quick tests + clear_screen() + print_standard(' ') ticket_number = get_ticket_number() - global_vars['LogDir'] = '{}/Logs/{}'.format( + # Save log for non-quick tests + global_vars['Date-Time'] = time.strftime("%Y-%m-%d_%H%M_%z") + global_vars['LogDir'] = '{}/Logs/{}_{}'.format( global_vars['Env']['HOME'], - ticket_number if ticket_number else global_vars['Date-Time']) + ticket_number, + global_vars['Date-Time']) os.makedirs(global_vars['LogDir'], exist_ok=True) global_vars['LogFile'] = '{}/Hardware Diagnostics.log'.format( global_vars['LogDir']) - run_tests(diag_modes[int(selection)-1]['Tests']) + run_tests(diag_modes[int(selection)-1]['Tests'], ticket_number) elif selection == 'A': run_program(['hw-diags-audio'], check=False, pipe=False) pause('Press Enter to return to main menu... ') @@ -256,7 +268,7 @@ def menu_diags(*args): elif selection == 'Q': break -def run_badblocks(): +def run_badblocks(ticket_number): """Run a read-only test for all detected disks.""" aborted = False clear_screen() @@ -318,7 +330,7 @@ def run_badblocks(): run_program('tmux kill-pane -a'.split(), check=False) pass -def run_iobenchmark(): +def run_iobenchmark(ticket_number): """Run a read-only test for all detected disks.""" aborted = False clear_screen() @@ -469,10 +481,25 @@ def run_iobenchmark(): TESTS['iobenchmark']['Results'][name] = report # Set CS/NS - if min(TESTS['iobenchmark']['Data'][name]['Read Rates']) <= IO_VARS['Threshold Fail']: + min_read = min(TESTS['iobenchmark']['Data'][name]['Read Rates']) + avg_read = sum( + TESTS['iobenchmark']['Data'][name]['Read Rates'])/len( + TESTS['iobenchmark']['Data'][name]['Read Rates']) + dev_rotational = dev['lsblk'].get('rota', None) + if dev_rotational == "0": + # Use SSD scale + thresh_min = IO_VARS['Threshold SSD Min'] + thresh_high_avg = IO_VARS['Threshold SSD High Avg'] + thresh_low_avg = IO_VARS['Threshold SSD Low Avg'] + else: + # Use HDD scale + thresh_min = IO_VARS['Threshold HDD Min'] + thresh_high_avg = IO_VARS['Threshold HDD High Avg'] + thresh_low_avg = IO_VARS['Threshold HDD Low Avg'] + if min_read <= thresh_min and avg_read <= thresh_high_avg: + TESTS['iobenchmark']['Status'][name] = 'NS' + elif avg_read <= thresh_low_avg: TESTS['iobenchmark']['Status'][name] = 'NS' - elif min(TESTS['iobenchmark']['Data'][name]['Read Rates']) <= IO_VARS['Threshold Warn']: - TESTS['iobenchmark']['Status'][name] = 'Unknown' else: TESTS['iobenchmark']['Status'][name] = 'CS' @@ -487,7 +514,7 @@ def run_iobenchmark(): run_program('tmux kill-pane -a'.split(), check=False) pass -def run_mprime(): +def run_mprime(ticket_number): """Run Prime95 for MPRIME_LIMIT minutes while showing the temps.""" aborted = False print_log('\nStart Prime95 test') @@ -501,7 +528,7 @@ def run_mprime(): TESTS['Progress Out']).split()) run_program('tmux split-window -bd watch -c -n1 -t hw-sensors'.split()) run_program('tmux resize-pane -y 3'.split()) - + # Start test run_program(['apple-fans', 'max']) try: @@ -516,6 +543,9 @@ def run_mprime(): except KeyboardInterrupt: # Catch CTRL+C aborted = True + TESTS['Prime95']['Status'] = 'Aborted' + print_warning('\nAborted.') + update_progress() # Save "final" temps run_program( @@ -563,15 +593,7 @@ def run_mprime(): TESTS['Prime95']['CS'] = bool(r) # Update status - if aborted: - TESTS['Prime95']['Status'] = 'Aborted' - print_warning('\nAborted.') - update_progress() - if TESTS['NVMe/SMART']['Enabled'] or TESTS['badblocks']['Enabled']: - if not ask('Proceed to next test?'): - run_program('tmux kill-pane -a'.split()) - raise GenericError - else: + if not aborted: if TESTS['Prime95']['NS']: TESTS['Prime95']['Status'] = 'NS' elif TESTS['Prime95']['CS']: @@ -580,10 +602,21 @@ def run_mprime(): TESTS['Prime95']['Status'] = 'Unknown' update_progress() + if aborted: + if TESTS['NVMe/SMART']['Enabled'] or TESTS['badblocks']['Enabled']: + if not ask('Proceed to next test?'): + for name in TESTS['NVMe/SMART']['Devices'].keys(): + for t in ['NVMe/SMART', 'badblocks', 'iobenchmark']: + cur_status = TESTS[t]['Status'][name] + if cur_status not in ['CS', 'Denied', 'NS']: + TESTS[t]['Status'][name] = 'Aborted' + run_program('tmux kill-pane -a'.split()) + raise GenericError + # Done run_program('tmux kill-pane -a'.split()) -def run_nvme_smart(): +def run_nvme_smart(ticket_number): """Run the built-in NVMe or SMART test for all detected disks.""" aborted = False clear_screen() @@ -605,6 +638,7 @@ def run_nvme_smart(): # Run for name, dev in sorted(TESTS['NVMe/SMART']['Devices'].items()): + TESTS['NVMe/SMART']['Short Test'][name] = None cur_status = TESTS['NVMe/SMART']['Status'][name] if cur_status == 'OVERRIDE': # Skipping test per user request @@ -635,7 +669,7 @@ def run_nvme_smart(): run_program( 'sudo smartctl -t short /dev/{}'.format(name).split(), check=False) - + # Wait and show progress (in 10 second increments) for iteration in range(int(test_length*60/10)): # Update SMART data @@ -670,18 +704,24 @@ def run_nvme_smart(): 'passed', False) if test_passed: TESTS['NVMe/SMART']['Status'][name] = 'CS' + TESTS['NVMe/SMART']['Short Test'][name] = 'CS' else: TESTS['NVMe/SMART']['Status'][name] = 'NS' + TESTS['NVMe/SMART']['Short Test'][name] = 'NS' update_progress() print_standard('Done', timestamp=False) # Done run_program('tmux kill-pane -a'.split(), check=False) -def run_tests(tests): +def run_tests(tests, ticket_number=None): """Run selected hardware test(s).""" - print_log('Starting Hardware Diagnostics') - print_log('\nRunning tests: {}'.format(', '.join(tests))) + clear_screen() + print_standard('Starting Hardware Diagnostics') + if ticket_number: + print_standard(' For Ticket #{}'.format(ticket_number)) + print_standard(' ') + print_standard('Running tests: {}'.format(', '.join(tests))) # Enable selected tests for t in ['Prime95', 'NVMe/SMART', 'badblocks', 'iobenchmark']: TESTS[t]['Enabled'] = t in tests @@ -690,7 +730,6 @@ def run_tests(tests): # Initialize if TESTS['NVMe/SMART']['Enabled'] or TESTS['badblocks']['Enabled'] or TESTS['iobenchmark']['Enabled']: print_standard(' ') - print_standard('Scanning disks...') scan_disks() update_progress() @@ -698,22 +737,22 @@ def run_tests(tests): mprime_aborted = False if TESTS['Prime95']['Enabled']: try: - run_mprime() + run_mprime(ticket_number) except GenericError: mprime_aborted = True if not mprime_aborted: if TESTS['NVMe/SMART']['Enabled']: - run_nvme_smart() + run_nvme_smart(ticket_number) if TESTS['badblocks']['Enabled']: - run_badblocks() + run_badblocks(ticket_number) if TESTS['iobenchmark']['Enabled']: - run_iobenchmark() - + run_iobenchmark(ticket_number) + # Show results show_results() # Open log - if not TESTS['NVMe/SMART']['Quick']: + if not TESTS['NVMe/SMART']['Quick'] and ENABLED_OPEN_LOGS: try: popen_program(['nohup', 'leafpad', global_vars['LogFile']], pipe=True) except Exception: @@ -723,7 +762,6 @@ def run_tests(tests): def scan_disks(full_paths=False, only_path=None): """Scan for disks eligible for hardware testing.""" - clear_screen() # Get eligible disk list cmd = ['lsblk', '-J', '-O'] @@ -743,13 +781,18 @@ def scan_disks(full_paths=False, only_path=None): TESTS['iobenchmark']['Status'][d['name']] = 'Pending' else: # Skip WizardKit devices - wk_label = '{}_LINUX'.format(KIT_NAME_SHORT) - if wk_label not in [c.get('label', '') for c in d.get('children', [])]: + skip_dev=False + wk_label_regex = r'{}_(LINUX|UFD)'.format(KIT_NAME_SHORT) + for c in d.get('children', []): + r = re.search( + wk_label_regex, c.get('label', ''), re.IGNORECASE) + skip_dev = bool(r) + if not skip_dev: devs[d['name']] = {'lsblk': d} TESTS['NVMe/SMART']['Status'][d['name']] = 'Pending' TESTS['badblocks']['Status'][d['name']] = 'Pending' TESTS['iobenchmark']['Status'][d['name']] = 'Pending' - + for dev, data in devs.items(): # Get SMART attributes run_program( @@ -758,7 +801,7 @@ def scan_disks(full_paths=False, only_path=None): dev).split(), check = False) data['smartctl'] = get_smart_details(dev) - + # Get NVMe attributes if data['lsblk']['tran'] == 'nvme': cmd = 'sudo nvme smart-log /dev/{} -o json'.format(dev).split() @@ -782,7 +825,12 @@ def scan_disks(full_paths=False, only_path=None): ] if data.get('NVMe Disk', False): crit_warn = data['nvme-cli'].get('critical_warning', 1) - data['Quick Health OK'] = True if crit_warn == 0 else False + if crit_warn == 0: + dev_name = data['lsblk']['name'] + data['Quick Health OK'] = True + TESTS['NVMe/SMART']['Status'][dev_name] = 'CS' + else: + data['Quick Health OK'] = False elif set(wanted_smart_list).issubset(data['smartctl'].keys()): data['SMART Pass'] = data['smartctl'].get('smart_status', {}).get( 'passed', False) @@ -791,7 +839,7 @@ def scan_disks(full_paths=False, only_path=None): else: data['Quick Health OK'] = False data['SMART Support'] = False - + # Ask for manual overrides if necessary if TESTS['badblocks']['Enabled'] or TESTS['iobenchmark']['Enabled']: show_disk_details(data) @@ -989,13 +1037,13 @@ def update_io_progress(percent, rate, progress_file): bar_color = COLORS['CLEAR'] rate_color = COLORS['CLEAR'] step = get_graph_step(rate, scale=32) - if rate < IO_VARS['Threshold Fail']: + if rate < IO_VARS['Threshold Graph Fail']: bar_color = COLORS['RED'] rate_color = COLORS['YELLOW'] - elif rate < IO_VARS['Threshold Warn']: + elif rate < IO_VARS['Threshold Graph Warn']: bar_color = COLORS['YELLOW'] rate_color = COLORS['YELLOW'] - elif rate > IO_VARS['Threshold Great']: + elif rate > IO_VARS['Threshold Graph Great']: bar_color = COLORS['GREEN'] rate_color = COLORS['GREEN'] line = ' {p:5.1f}% {b_color}{b:<4} {r_color}{r:6.1f} Mb/s{c}\n'.format(