From 70823d2cd80c123932fcce862fc6c3e3d3c50be6 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Tue, 4 Jun 2019 20:53:34 -0600 Subject: [PATCH] Updated HW-Diags and sensor sections --- .bin/Scripts/debug/hw_diags.py | 9 +- .bin/Scripts/functions/hw_diags.py | 294 +++++++++++++++++------------ .bin/Scripts/functions/sensors.py | 89 ++++----- .bin/Scripts/hw-diags-menu | 2 +- 4 files changed, 226 insertions(+), 168 deletions(-) diff --git a/.bin/Scripts/debug/hw_diags.py b/.bin/Scripts/debug/hw_diags.py index 87a35990..44517fb4 100644 --- a/.bin/Scripts/debug/hw_diags.py +++ b/.bin/Scripts/debug/hw_diags.py @@ -149,11 +149,14 @@ def save_debug_reports(state, global_vars): f.write('{}\n'.format(line)) -def upload_logdir(global_vars): +def upload_logdir(global_vars, reason='Crash'): """Upload compressed LogDir to CRASH_SERVER.""" source = global_vars['LogDir'] source = source[source.rfind('/')+1:] - dest = '{}.txz'.format(source) + dest = 'HW-Diags_{reason}_{Date-Time}.txz'.format( + reason=reason, + **global_vars, + ) data = None # Compress LogDir @@ -166,7 +169,7 @@ def upload_logdir(global_vars): data = f.read() # Upload data - url = '{}/Crash_{}.txz'.format(CRASH_SERVER['Url'], source) + url = '{}/{}'.format(CRASH_SERVER['Url'], dest) r = requests.put( url, data=data, diff --git a/.bin/Scripts/functions/hw_diags.py b/.bin/Scripts/functions/hw_diags.py index 36be4a30..db5d01ca 100644 --- a/.bin/Scripts/functions/hw_diags.py +++ b/.bin/Scripts/functions/hw_diags.py @@ -36,6 +36,7 @@ class CpuObj(): self.tests = OrderedDict() self.get_details() self.name = self.lscpu.get('Model name', 'Unknown CPU') + self.description = self.name def get_details(self): """Get CPU details from lscpu.""" @@ -57,6 +58,13 @@ class CpuObj(): report.append('{BLUE}Device{CLEAR}'.format(**COLORS)) report.append(' {}'.format(self.name)) + # Include RAM details + ram_details = get_ram_details() + ram_total = human_readable_size(ram_details.pop('Total', 0)).strip() + ram_dimms = ['{}x {}'.format(v, k) for k, v in sorted(ram_details.items())] + report.append('{BLUE}RAM{CLEAR}'.format(**COLORS)) + report.append(' {} ({})'.format(ram_total, ', '.join(ram_dimms))) + # Tests for test in self.tests.values(): report.extend(test.report) @@ -220,11 +228,12 @@ class DiskObj(): # Done return test_running - def disable_test(self, name, status): + def disable_test(self, name, status, test_failed=False): """Disable test by name and update status.""" if name in self.tests: self.tests[name].update_status(status) self.tests[name].disabled = True + self.tests[name].failed = test_failed def generate_attribute_report( self, description=False, timestamp=False): @@ -487,7 +496,7 @@ class DiskObj(): for t in ['badblocks', 'I/O Benchmark']: self.disable_test(t, 'Denied') elif not disk_ok: - self.disable_test('NVMe / SMART', 'NS') + self.disable_test('NVMe / SMART', 'NS', test_failed=True) for t in ['badblocks', 'I/O Benchmark']: self.disable_test(t, 'Denied') @@ -495,6 +504,7 @@ class DiskObj(): class State(): """Object to track device objects and overall state.""" def __init__(self): + self.args = None self.cpu = None self.disks = [] self.panes = {} @@ -522,6 +532,83 @@ class State(): }, }) + def build_outer_panes(self): + """Build top and side panes.""" + clear_screen() + + # Top + self.panes['Top'] = tmux_split_window( + behind=True, lines=2, vertical=True, + text=TOP_PANE_TEXT) + + # Started + self.panes['Started'] = tmux_split_window( + lines=SIDE_PANE_WIDTH, target_pane=self.panes['Top'], + text='{BLUE}Started{CLEAR}\n{s}'.format( + s=time.strftime("%Y-%m-%d %H:%M %Z"), + **COLORS)) + + # Progress + self.panes['Progress'] = tmux_split_window( + lines=SIDE_PANE_WIDTH, + watch=self.progress_out) + + def fix_tmux_panes(self): + """Fix pane sizes if the window has been resized.""" + needs_fixed = False + + # Bail? + if not self.panes: + return + + # Check layout + for k, v in self.tmux_layout.items(): + if not v.get('Check'): + # Not concerned with the size of this pane + continue + # Get target + target = None + if k != 'Current': + if k not in self.panes: + # Skip missing panes + continue + else: + target = self.panes[k] + + # Check pane size + x, y = tmux_get_pane_size(pane_id=target) + if v.get('x', False) and v['x'] != x: + needs_fixed = True + if v.get('y', False) and v['y'] != y: + needs_fixed = True + + # Bail? + if not needs_fixed: + return + + # Update layout + for k, v in self.tmux_layout.items(): + # Get target + target = None + if k != 'Current': + if k not in self.panes: + # Skip missing panes + continue + else: + target = self.panes[k] + + # Resize pane + tmux_resize_pane(pane_id=target, **v) + + def fix_tmux_panes_loop(self): + while True: + try: + self.fix_tmux_panes() + sleep(1) + except RuntimeError: + # Assuming layout definitions changes mid-run, ignoring + pass + def init(self): """Remove test objects, set log, and add devices.""" self.disks = [] @@ -529,14 +616,18 @@ class State(): v['Objects'] = [] # Update LogDir - if not self.quick_mode: + if self.quick_mode: + global_vars['LogDir'] = '{}/Logs/{}'.format( + global_vars['Env']['HOME'], + time.strftime('%Y-%m-%d_%H%M_%z')) + else: global_vars['LogDir'] = '{}/Logs/{}_{}'.format( global_vars['Env']['HOME'], get_ticket_number(), time.strftime('%Y-%m-%d_%H%M_%z')) - os.makedirs(global_vars['LogDir'], exist_ok=True) - global_vars['LogFile'] = '{}/Hardware Diagnostics.log'.format( - global_vars['LogDir']) + os.makedirs(global_vars['LogDir'], exist_ok=True) + global_vars['LogFile'] = '{}/Hardware Diagnostics.log'.format( + global_vars['LogDir']) self.progress_out = '{}/progress.out'.format(global_vars['LogDir']) # Add CPU @@ -565,7 +656,13 @@ class State(): # Start tmux thread self.tmux_layout = TMUX_LAYOUT.copy() - start_thread(fix_tmux_panes_loop, args=[self]) + start_thread(self.fix_tmux_panes_loop) + + def set_top_pane_text(self, text): + """Set top pane text using TOP_PANE_TEXT and provided text.""" + tmux_update_pane( + self.panes['Top'], + text='{}\n{}'.format(TOP_PANE_TEXT, text)) class TestObj(): @@ -600,28 +697,6 @@ class TestObj(): # Functions -def build_outer_panes(state): - """Build top and side panes.""" - clear_screen() - - # Top - state.panes['Top'] = tmux_split_window( - behind=True, lines=2, vertical=True, - text=TOP_PANE_TEXT) - - # Started - state.panes['Started'] = tmux_split_window( - lines=SIDE_PANE_WIDTH, target_pane=state.panes['Top'], - text='{BLUE}Started{CLEAR}\n{s}'.format( - s=time.strftime("%Y-%m-%d %H:%M %Z"), - **COLORS)) - - # Progress - state.panes['Progress'] = tmux_split_window( - lines=SIDE_PANE_WIDTH, - watch=state.progress_out) - - def build_status_string(label, status, info_label=False): """Build status string with appropriate colors.""" status_color = COLORS['CLEAR'] @@ -638,64 +713,6 @@ def build_status_string(label, status, info_label=False): **COLORS) -def fix_tmux_panes_loop(state): - while True: - try: - fix_tmux_panes(state) - sleep(1) - except RuntimeError: - # Assuming layout definitions changes mid-run, ignoring - pass - - -def fix_tmux_panes(state): - """Fix pane sizes if the window has been resized.""" - needs_fixed = False - - # Bail? - if not state.panes: - return - - # Check layout - for k, v in state.tmux_layout.items(): - if not v.get('Check'): - # Not concerned with the size of this pane - continue - # Get target - target = None - if k != 'Current': - if k not in state.panes: - # Skip missing panes - continue - else: - target = state.panes[k] - - # Check pane size - x, y = tmux_get_pane_size(pane_id=target) - if v.get('x', False) and v['x'] != x: - needs_fixed = True - if v.get('y', False) and v['y'] != y: - needs_fixed = True - - # Bail? - if not needs_fixed: - return - - # Update layout - for k, v in state.tmux_layout.items(): - # Get target - target = None - if k != 'Current': - if k not in state.panes: - # Skip missing panes - continue - else: - target = state.panes[k] - - # Resize pane - tmux_resize_pane(pane_id=target, **v) - - def generate_horizontal_graph(rates, oneline=False): """Generate horizontal graph from rates, returns list.""" graph = ['', '', '', ''] @@ -755,6 +772,44 @@ def get_graph_step(rate, scale=16): return step +def get_ram_details(): + """Get RAM details via dmidecode, returns dict.""" + cmd = ['sudo', 'dmidecode', '--type', 'memory'] + manufacturer = 'UNKNOWN' + ram_details = {'Total': 0} + size = 0 + + # Get DMI data + result = run_program(cmd, encoding='utf-8', errors='ignore') + dmi_data = result.stdout.splitlines() + + # Parse data + for line in dmi_data: + line = line.strip() + if line == 'Memory Device': + # Reset vars + manufacturer = 'UNKNOWN' + size = 0 + elif line.startswith('Size:'): + size = convert_to_bytes(line.replace('Size: ', '')) + elif line.startswith('Manufacturer:'): + manufacturer = line.replace('Manufacturer: ', '') + if size > 0: + # Add RAM to list if slot populated + ram_str = '{} {}'.format( + human_readable_size(size).strip(), + manufacturer, + ) + ram_details['Total'] += size + if ram_str in ram_details: + ram_details[ram_str] += 1 + else: + ram_details[ram_str] = 1 + + # Done + return ram_details + + def get_read_rate(s): """Get read rate in bytes/s from dd progress output.""" real_rate = None @@ -767,6 +822,7 @@ def get_read_rate(s): def menu_diags(state, args): """Main menu to select and run HW tests.""" args = [a.lower() for a in args] + state.args = args checkmark = '*' if 'DISPLAY' in global_vars['Env']: checkmark = '✓' @@ -908,10 +964,7 @@ def run_badblocks_test(state, test): update_progress_pane(state) # Update tmux layout - tmux_update_pane( - state.panes['Top'], - text='{}\n{}'.format( - TOP_PANE_TEXT, dev.description)) + state.set_top_pane_text(dev.description) # Create monitor pane test.badblocks_out = '{}/badblocks_{}.out'.format( @@ -994,10 +1047,11 @@ def run_hw_tests(state): """Run enabled hardware tests.""" print_standard('Scanning devices...') state.init() + tests_enabled = False # Build Panes update_progress_pane(state) - build_outer_panes(state) + state.build_outer_panes() # Show selected tests and create TestObj()s print_info('Selected Tests:') @@ -1009,6 +1063,8 @@ def run_hw_tests(state): COLORS['CLEAR'], QUICK_LABEL if state.quick_mode and 'NVMe' in k else '')) if v['Enabled']: + tests_enabled = True + # Create TestObj and track under both CpuObj/DiskObj and State if k in TESTS_CPU: test_obj = TestObj( @@ -1022,6 +1078,11 @@ def run_hw_tests(state): v['Objects'].append(test_obj) print_standard('') + # Bail if no tests selected + if not tests_enabled: + tmux_kill_pane(*state.panes.values()) + return + # Run disk safety checks (if necessary) _disk_tests_enabled = False for k in TESTS_DISK: @@ -1064,7 +1125,7 @@ def run_hw_tests(state): # Rebuild panes update_progress_pane(state) - build_outer_panes(state) + state.build_outer_panes() # Mark unfinished tests as aborted for k, v in state.tests.items(): @@ -1076,8 +1137,22 @@ def run_hw_tests(state): # Update side pane update_progress_pane(state) - # Done + # Show results show_results(state) + + # Upload for review + if ENABLED_UPLOAD_DATA and ask('Upload results for review?'): + try_and_print( + message='Saving debug reports...', + function=save_debug_reports, + state=state, global_vars=global_vars) + try_and_print( + message='Uploading Data...', + function=upload_logdir, + global_vars=global_vars, + reason='Review') + + # Done sleep(1) if state.quick_mode: pause('Press Enter to exit... ') @@ -1104,10 +1179,7 @@ def run_io_benchmark(state, test): update_progress_pane(state) # Update tmux layout - tmux_update_pane( - state.panes['Top'], - text='{}\n{}'.format( - TOP_PANE_TEXT, dev.description)) + state.set_top_pane_text(dev.description) state.tmux_layout['Current'] = {'y': 15, 'Check': True} # Create monitor pane @@ -1266,9 +1338,7 @@ def run_mprime_test(state, test): test.thermal_abort = False # Update tmux layout - tmux_update_pane( - state.panes['Top'], - text='{}\n{}'.format(TOP_PANE_TEXT, dev.name)) + state.set_top_pane_text(dev.name) # Start live sensor monitor test.sensors_out = '{}/sensors.out'.format(global_vars['TmpDir']) @@ -1431,7 +1501,7 @@ def run_mprime_test(state, test): # Add temps to report test.report.append('{BLUE}Temps{CLEAR}'.format(**COLORS)) for line in generate_sensor_report( - test.sensor_data, 'Idle', 'Max', 'Cooldown', core_only=True): + test.sensor_data, 'Idle', 'Max', 'Cooldown', cpu_only=True): test.report.append(' {}'.format(line)) # Add abort message(s) @@ -1481,10 +1551,7 @@ def run_nvme_smart_tests(state, test, update_mode=False): update_progress_pane(state) # Update tmux layout - tmux_update_pane( - state.panes['Top'], - text='{}\n{}'.format( - TOP_PANE_TEXT, dev.description)) + state.set_top_pane_text(dev.description) # SMART short self-test if dev.smart_attributes and not (state.quick_mode or update_mode): @@ -1629,9 +1696,7 @@ def show_report(report, log_report=False): def show_results(state): """Show results for all tests.""" clear_screen() - tmux_update_pane( - state.panes['Top'], - text='{}\nResults'.format(TOP_PANE_TEXT)) + state.set_top_pane_text('Results') # CPU tests _enabled = False @@ -1661,17 +1726,6 @@ def show_results(state): # Update progress update_progress_pane(state) - # Ask for review - if ENABLED_UPLOAD_DATA and ask('Upload results for review?'): - try_and_print( - message='Saving debug reports...', - function=save_debug_reports, - state=state, global_vars=global_vars) - try_and_print( - message='Uploading Data...', - function=upload_logdir, - global_vars=global_vars) - def update_main_options(state, selection, main_options): """Update menu and state based on selection.""" diff --git a/.bin/Scripts/functions/sensors.py b/.bin/Scripts/functions/sensors.py index 993306bd..49a7472c 100644 --- a/.bin/Scripts/functions/sensors.py +++ b/.bin/Scripts/functions/sensors.py @@ -1,4 +1,6 @@ -# Wizard Kit: Functions - Sensors +'''Wizard Kit: Functions - Sensors''' +# pylint: disable=no-name-in-module,wildcard-import +# vim: sts=2 sw=2 ts=2 import json import re @@ -9,7 +11,7 @@ from settings.sensors import * # Error Classes class ThermalLimitReachedError(Exception): - pass + '''Thermal limit reached error.''' def clear_temps(sensor_data): @@ -20,28 +22,30 @@ def clear_temps(sensor_data): _data['Temps'] = [] -def fix_sensor_str(s): +def fix_sensor_str(_s): """Cleanup string and return str.""" - s = re.sub(r'^(\w+)-(\w+)-(\w+)', r'\1 (\2 \3)', s, re.IGNORECASE) - s = s.title() - s = s.replace('Coretemp', 'CoreTemp') - s = s.replace('Acpi', 'ACPI') - s = s.replace('ACPItz', 'ACPI TZ') - s = s.replace('Isa ', 'ISA ') - s = s.replace('Id ', 'ID ') - s = re.sub(r'(\D+)(\d+)', r'\1 \2', s, re.IGNORECASE) - s = s.replace(' ', ' ') - return s + _s = re.sub(r'^(\w+)-(\w+)-(\w+)', r'\1 (\2 \3)', _s, re.IGNORECASE) + _s = _s.title() + _s = _s.replace('Coretemp', 'CPUTemp') + _s = _s.replace('Acpi', 'ACPI') + _s = _s.replace('ACPItz', 'ACPI TZ') + _s = _s.replace('Isa ', 'ISA ') + _s = _s.replace('Pci ', 'PCI ') + _s = _s.replace('Id ', 'ID ') + _s = re.sub(r'(\D+)(\d+)', r'\1 \2', _s, re.IGNORECASE) + _s = re.sub(r'^K (\d+)Temp', r'AMD K\1 Temps', _s, re.IGNORECASE) + _s = re.sub(r'T(ctl|die)', r'CPU (T\1)', _s, re.IGNORECASE) + return _s def generate_sensor_report( sensor_data, *temp_labels, - colors=True, core_only=False): + colors=True, cpu_only=False): """Generate report based on temp_labels, returns list if str.""" report = [] for _section, _adapters in sorted(sensor_data.items()): - # CoreTemps then Other temps - if core_only and 'Core' not in _section: + # CPU temps then Other temps + if cpu_only and 'CPU' not in _section: continue for _adapter, _sources in sorted(_adapters.items()): # Adapter @@ -56,7 +60,7 @@ def generate_sensor_report( ': ' if _label != 'Current' else '', get_temp_str(_data.get(_label, '???'), colors=colors)) report.append(_line) - if not core_only: + if not cpu_only: report.append(' ') # Handle empty reports (i.e. no sensors detected) @@ -91,17 +95,17 @@ def get_colored_temp_str(temp): else: color = COLORS['CLEAR'] return '{color}{prefix}{temp:2.0f}°C{CLEAR}'.format( - color = color, - prefix = '-' if temp < 0 else '', - temp = temp, + color=color, + prefix='-' if temp < 0 else '', + temp=temp, **COLORS) def get_raw_sensor_data(): """Read sensor data and return dict.""" - data = {} + json_data = {} cmd = ['sensors', '-j'] - + # Get raw data try: result = run_program(cmd) @@ -122,8 +126,8 @@ def get_raw_sensor_data(): try: json_data = json.loads('\n'.join(raw_data)) except json.JSONDecodeError: - # Still broken, just set to empty dict - json_data = {} + # Still broken, just return the empty dict + pass # Done return json_data @@ -132,10 +136,10 @@ def get_raw_sensor_data(): def get_sensor_data(): """Parse raw sensor data and return new dict.""" json_data = get_raw_sensor_data() - sensor_data = {'CoreTemps': {}, 'Other': {}} + sensor_data = {'CPUTemps': {}, 'Other': {}} for _adapter, _sources in json_data.items(): - if 'coretemp' in _adapter: - _section = 'CoreTemps' + if is_cpu_adapter(_adapter): + _section = 'CPUTemps' else: _section = 'Other' sensor_data[_section][_adapter] = {} @@ -157,8 +161,8 @@ def get_sensor_data(): } # Remove empty sections - for k, v in sensor_data.items(): - v = {k2: v2 for k2, v2 in v.items() if v2} + for _k, _v in sensor_data.items(): + _v = {_k2: _v2 for _k2, _v2 in _v.items() if _v2} # Done return sensor_data @@ -178,14 +182,20 @@ def get_temp_str(temp, colors=True): temp) +def is_cpu_adapter(adapter): + """Checks if adapter is a known CPU adapter, returns bool.""" + is_cpu = re.search(r'(core|k\d+)temp', adapter, re.IGNORECASE) + return bool(is_cpu) + + def monitor_sensors(monitor_pane, monitor_file): """Continually update sensor data and report to screen.""" sensor_data = get_sensor_data() while True: update_sensor_data(sensor_data) - with open(monitor_file, 'w') as f: + with open(monitor_file, 'w') as _f: report = generate_sensor_report(sensor_data, 'Current', 'Max') - f.write('\n'.join(report)) + _f.write('\n'.join(report)) sleep(1) if monitor_pane and not tmux_poll_pane(monitor_pane): break @@ -196,7 +206,7 @@ def save_average_temp(sensor_data, temp_label, seconds=10): clear_temps(sensor_data) # Get temps - for i in range(seconds): + for _i in range(seconds): # pylint: disable=unused-variable update_sensor_data(sensor_data) sleep(1) @@ -219,24 +229,15 @@ def update_sensor_data(sensor_data, thermal_limit=None): _data['Current'] = _temp _data['Max'] = max(_temp, _data['Max']) _data['Temps'].append(_temp) - except Exception: + except Exception: # pylint: disable=broad-except # Dumb workound for Dell sensors with changing source names pass # Check if thermal limit reached - if thermal_limit and _section == 'CoreTemps': + if thermal_limit and _section == 'CPUTemps': if max(_data['Current'], _data['Max']) >= thermal_limit: - raise ThermalLimitReachedError('CoreTemps reached limit') - - -def join_columns(column1, column2, width=55): - return '{:<{}}{}'.format( - column1, - 55+len(column1)-len(REGEX_COLORS.sub('', column1)), - column2) + raise ThermalLimitReachedError('CPU temps reached limit') if __name__ == '__main__': print("This file is not meant to be called directly.") - -# vim: sts=2 sw=2 ts=2 diff --git a/.bin/Scripts/hw-diags-menu b/.bin/Scripts/hw-diags-menu index 1c241bf9..fc95e04a 100755 --- a/.bin/Scripts/hw-diags-menu +++ b/.bin/Scripts/hw-diags-menu @@ -49,7 +49,7 @@ if __name__ == '__main__': global_vars=global_vars) # Done - sleep(10) + sleep(1) pause('Press Enter to exit...') exit_script(1)