From 172cb398bac2f5139402ce2a4064bc8ecc9e9360 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 4 Apr 2022 18:30:21 -0600 Subject: [PATCH 01/18] Convert hardware objects to dataclasses --- scripts/wk/cfg/hw.py | 6 + scripts/wk/hw/__init__.py | 4 +- scripts/wk/hw/{obj.py => disk.py} | 413 +++++++++--------------------- scripts/wk/hw/system.py | 183 +++++++++++++ scripts/wk/hw/test.py | 27 ++ 5 files changed, 339 insertions(+), 294 deletions(-) rename scripts/wk/hw/{obj.py => disk.py} (64%) create mode 100644 scripts/wk/hw/system.py create mode 100644 scripts/wk/hw/test.py diff --git a/scripts/wk/cfg/hw.py b/scripts/wk/cfg/hw.py index ed040437..65fa156a 100644 --- a/scripts/wk/cfg/hw.py +++ b/scripts/wk/cfg/hw.py @@ -60,9 +60,15 @@ KNOWN_RAM_VENDOR_IDS = { '0xAD00': 'Hynix', '0xCE00': 'Samsung', } +NVME_WARNING_KEYS = ( + 'spare_below_threshold', + 'reliability_degraded', + 'volatile_memory_backup_failed', + ) REGEX_POWER_ON_TIME = re.compile( r'^(\d+)([Hh].*|\s+\(\d+\s+\d+\s+\d+\).*)' ) +SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS = 120 SMC_IDS = { # Sources: https://github.com/beltex/SMCKit/blob/master/SMCKit/SMC.swift # http://www.opensource.apple.com/source/net_snmp/ diff --git a/scripts/wk/hw/__init__.py b/scripts/wk/hw/__init__.py index 741d3151..092a7428 100644 --- a/scripts/wk/hw/__init__.py +++ b/scripts/wk/hw/__init__.py @@ -2,5 +2,7 @@ from . import ddrescue from . import diags -from . import obj +from . import disk from . import sensors +from . import system +from . import test diff --git a/scripts/wk/hw/obj.py b/scripts/wk/hw/disk.py similarity index 64% rename from scripts/wk/hw/obj.py rename to scripts/wk/hw/disk.py index d6923432..5703e1d2 100644 --- a/scripts/wk/hw/obj.py +++ b/scripts/wk/hw/disk.py @@ -1,4 +1,4 @@ -"""WizardKit: Hardware objects (mostly)""" +"""WizardKit: Disk object and functions""" # vim: sts=2 sw=2 ts=2 import logging @@ -6,7 +6,8 @@ import pathlib import plistlib import re -from collections import OrderedDict +from dataclasses import dataclass, field +from typing import Any, Union from wk.cfg.hw import ( ATTRIBUTE_COLORS, @@ -14,28 +15,23 @@ from wk.cfg.hw import ( KEY_SMART, KNOWN_DISK_ATTRIBUTES, KNOWN_DISK_MODELS, - KNOWN_RAM_VENDOR_IDS, + NVME_WARNING_KEYS, REGEX_POWER_ON_TIME, + SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS, ) from wk.cfg.main import KIT_NAME_SHORT from wk.exe import get_json_from_command, run_program +from wk.hw.test import Test from wk.std import ( PLATFORM, bytes_to_string, color_string, sleep, - string_to_bytes, ) # STATIC VARIABLES LOG = logging.getLogger(__name__) -NVME_WARNING_KEYS = ( - 'spare_below_threshold', - 'reliability_degraded', - 'volatile_memory_backup_failed', - ) -SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS = 120 WK_LABEL_REGEX = re.compile( fr'{KIT_NAME_SHORT}_(LINUX|UFD)', re.IGNORECASE, @@ -54,130 +50,50 @@ class SMARTSelfTestInProgressError(RuntimeError): # Classes -class BaseObj(): - """Base object for tracking device data.""" - def __init__(self): - self.tests = OrderedDict() - - def all_tests_passed(self): - """Check if all tests passed, returns bool.""" - return all(results.passed for results in self.tests.values()) - - def any_test_failed(self): - """Check if any test failed, returns bool.""" - return any(results.failed for results in self.tests.values()) - - -class CpuRam(BaseObj): - """Object for tracking CPU & RAM specific data.""" - def __init__(self): - super().__init__() - self.description = 'Unknown' - self.details = {} - self.ram_total = 'Unknown' - self.ram_dimms = [] - self.tests = OrderedDict() - - # Update details - self.get_cpu_details() - self.get_ram_details() - - def generate_report(self): - """Generate CPU & RAM report, returns list.""" - report = [] - report.append(color_string('Device', 'BLUE')) - report.append(f' {self.description}') - - # Include RAM details - report.append(color_string('RAM', 'BLUE')) - report.append(f' {self.ram_total} ({", ".join(self.ram_dimms)})') - - # Tests - for test in self.tests.values(): - report.extend(test.report) - - return report - - def get_cpu_details(self): - """Get CPU details using OS specific methods.""" - if PLATFORM == 'Darwin': - cmd = 'sysctl -n machdep.cpu.brand_string'.split() - proc = run_program(cmd, check=False) - self.description = re.sub(r'\s+', ' ', proc.stdout.strip()) - elif PLATFORM == 'Linux': - cmd = ['lscpu', '--json'] - json_data = get_json_from_command(cmd) - for line in json_data.get('lscpu', [{}]): - _field = line.get('field', '').replace(':', '') - _data = line.get('data', '') - if not (_field or _data): - # Skip - continue - self.details[_field] = _data - - self.description = self.details.get('Model name', '') - - # Replace empty description - if not self.description: - self.description = 'Unknown CPU' - - def get_ram_details(self): - """Get RAM details using OS specific methods.""" - if PLATFORM == 'Darwin': - dimm_list = get_ram_list_macos() - elif PLATFORM == 'Linux': - dimm_list = get_ram_list_linux() - - details = {'Total': 0} - for dimm_details in dimm_list: - size, manufacturer = dimm_details - if size <= 0: - # Skip empty DIMMs - continue - description = f'{bytes_to_string(size)} {manufacturer}' - details['Total'] += size - if description in details: - details[description] += 1 - else: - details[description] = 1 - - # Save details - self.ram_total = bytes_to_string(details.pop('Total', 0)) - self.ram_dimms = [ - f'{count}x {desc}' for desc, count in sorted(details.items()) - ] - - -class Disk(BaseObj): +@dataclass(slots=True) +class Disk: + # pylint: disable=too-many-instance-attributes """Object for tracking disk specific data.""" - def __init__(self, path): - super().__init__() - self.attributes = {} - self.description = 'Unknown' - self.details = {} - self.notes = [] - self.path = pathlib.Path(path).resolve() - self.smartctl = {} - self.tests = OrderedDict() + attributes: dict[Any, dict] = field(init=False, default_factory=dict) + bus: str = field(init=False) + description: str = field(init=False) + filesystem: str = field(init=False) + log_sec: int = field(init=False) + model: str = field(init=False) + name: str = field(init=False) + notes: list[str] = field(init=False, default_factory=list) + path: Union[pathlib.Path, str] + phy_sec: int = field(init=False) + raw_details: dict[str, Any] = field(init=False) + raw_smartctl: dict[str, Any] = field(init=False) + serial: str = field(init=False) + size: int = field(init=False) + ssd: bool = field(init=False) + tests: list[Test] = field(init=False, default_factory=list) + use_sat: bool = field(init=False, default=False) - # Update details + def __post_init__(self) -> None: + self.path = pathlib.Path(self.path).resolve() self.get_details() + self.set_description() self.enable_smart() self.update_smart_details() - if self.details['bus'] == 'USB' and not self.attributes: + if not self.attributes and self.bus == 'USB': # Try using SAT LOG.warning('Using SAT for smartctl for %s', self.path) - self.enable_smart(use_sat=True) - self.update_smart_details(use_sat=True) + self.notes = [] + self.use_sat = True + self.enable_smart() + self.update_smart_details() if not self.is_4k_aligned(): self.add_note('One or more partitions are not 4K aligned', 'YELLOW') - def abort_self_test(self): + def abort_self_test(self) -> None: """Abort currently running non-captive self-test.""" cmd = ['sudo', 'smartctl', '--abort', self.path] run_program(cmd, check=False) - def add_note(self, note, color=None): + def add_note(self, note, color=None) -> None: """Add note that will be included in the disk report.""" if color: note = color_string(note, color) @@ -185,10 +101,10 @@ class Disk(BaseObj): self.notes.append(note) self.notes.sort() - def check_attributes(self, only_blocking=False): + def check_attributes(self, only_blocking=False) -> bool: """Check if any known attributes are failing, returns bool.""" attributes_ok = True - known_attributes = get_known_disk_attributes(self.details['model']) + known_attributes = get_known_disk_attributes(self.model) for attr, value in self.attributes.items(): # Skip unknown attributes if attr not in known_attributes: @@ -219,29 +135,29 @@ class Disk(BaseObj): # Done return attributes_ok - def disable_disk_tests(self): + def disable_disk_tests(self) -> None: """Disable all tests.""" LOG.warning('Disabling all tests for: %s', self.path) - for test in self.tests.values(): + for test in self.tests: if test.status in ('Pending', 'Working'): test.set_status('Denied') test.disabled = True - def enable_smart(self, use_sat=False): + def enable_smart(self) -> None: """Try enabling SMART for this disk.""" cmd = [ 'sudo', 'smartctl', - f'--device={"sat,auto" if use_sat else "auto"}', + f'--device={"sat,auto" if self.use_sat else "auto"}', '--tolerance=permissive', '--smart=on', self.path, ] run_program(cmd, check=False) - def generate_attribute_report(self): + def generate_attribute_report(self) -> list[str]: """Generate attribute report, returns list.""" - known_attributes = get_known_disk_attributes(self.details['model']) + known_attributes = get_known_disk_attributes(self.model) report = [] for attr, value in sorted(self.attributes.items()): note = '' @@ -294,7 +210,7 @@ class Disk(BaseObj): # Done return report - def generate_report(self, header=True): + def generate_report(self, header=True) -> list[str]: """Generate Disk report, returns list.""" report = [] if header: @@ -314,63 +230,63 @@ class Disk(BaseObj): report.append(f' {note}') # Tests - for test in self.tests.values(): + for test in self.tests: report.extend(test.report) return report - def get_details(self): + def get_details(self) -> None: """Get disk details using OS specific methods. Required details default to generic descriptions and are converted to the correct type. """ if PLATFORM == 'Darwin': - self.details = get_disk_details_macos(self.path) + self.raw_details = get_disk_details_macos(self.path) elif PLATFORM == 'Linux': - self.details = get_disk_details_linux(self.path) + self.raw_details = get_disk_details_linux(self.path) # Set necessary details - self.details['bus'] = str(self.details.get('bus', '???')).upper() - self.details['bus'] = self.details['bus'].replace('IMAGE', 'Image') - self.details['bus'] = self.details['bus'].replace('NVME', 'NVMe') - self.details['fstype'] = self.details.get('fstype', 'Unknown') - self.details['log-sec'] = self.details.get('log-sec', 512) - self.details['model'] = self.details.get('model', 'Unknown Model') - self.details['name'] = self.details.get('name', self.path) - self.details['phy-sec'] = self.details.get('phy-sec', 512) - self.details['serial'] = self.details.get('serial', 'Unknown Serial') - self.details['size'] = self.details.get('size', -1) - self.details['ssd'] = self.details.get('ssd', False) + self.bus = str(self.raw_details.get('bus', '???')).upper() + self.bus = self.bus.replace('IMAGE', 'Image') + self.bus = self.bus.replace('NVME', 'NVMe') + self.filesystem = self.raw_details.get('fstype', 'Unknown') + self.log_sec = self.raw_details.get('log-sec', 512) + self.model = self.raw_details.get('model', 'Unknown Model') + self.name = self.raw_details.get('name', self.path) + self.phy_sec = self.raw_details.get('phy-sec', 512) + self.serial = self.raw_details.get('serial', 'Unknown Serial') + self.size = self.raw_details.get('size', -1) + self.ssd = self.raw_details.get('ssd', False) # Ensure certain attributes types + ## NOTE: This is ugly, deal. for attr in ['bus', 'model', 'name', 'serial']: - if not isinstance(self.details[attr], str): - self.details[attr] = str(self.details[attr]) - for attr in ['phy-sec', 'size']: - if not isinstance(self.details[attr], int): - try: - self.details[attr] = int(self.details[attr]) - except (TypeError, ValueError): - LOG.error('Invalid disk %s: %s', attr, self.details[attr]) - self.details[attr] = -1 + setattr(self, attr, str(getattr(self, attr))) + for attr in ['log_sec', 'phy_sec', 'size']: + try: + setattr(self, attr, int(getattr(self, attr))) + except (TypeError, ValueError): + LOG.error('Invalid disk %s: %s', attr, getattr(self, attr)) + if attr == 'size': + setattr(self, attr, -1) # Set description self.description = ( - f'{bytes_to_string(self.details["size"], use_binary=False)}' - f' ({self.details["bus"]})' - f' {self.details["model"]}' - f' {self.details["serial"]}' + f'{bytes_to_string(self.size, use_binary=False)}' + f' ({self.bus})' + f' {self.model}' + f' {self.serial}' ) - def get_labels(self): + def get_labels(self) -> list[str]: """Build list of labels for this disk, returns list.""" labels = [] - # Add all labels from lsblk - for disk in [self.details, *self.details.get('children', [])]: - labels.append(disk.get('label', '')) - labels.append(disk.get('partlabel', '')) + # Add all labels from raw_details + for details in [self.raw_details, *self.raw_details.get('children', [])]: + labels.append(details.get('label', '')) + labels.append(details.get('partlabel', '')) # Remove empty labels labels = [str(label) for label in labels if label] @@ -378,11 +294,11 @@ class Disk(BaseObj): # Done return labels - def get_smart_self_test_details(self): + def get_smart_self_test_details(self) -> dict[Any, Any]: """Shorthand to get deeply nested self-test details, returns dict.""" details = {} try: - details = self.smartctl['ata_smart_data']['self_test'] + details = self.raw_smartctl['ata_smart_data']['self_test'] except (KeyError, TypeError): # Assuming disk lacks SMART support, ignore and return empty dict. pass @@ -390,17 +306,17 @@ class Disk(BaseObj): # Done return details - def is_4k_aligned(self): + def is_4k_aligned(self) -> bool: """Check that all disk partitions are aligned, returns bool.""" aligned = True if PLATFORM == 'Darwin': - aligned = is_4k_aligned_macos(self.details) + aligned = is_4k_aligned_macos(self.raw_details) elif PLATFORM == 'Linux': - aligned = is_4k_aligned_linux(self.path, self.details['phy-sec']) + aligned = is_4k_aligned_linux(self.path, self.phy_sec) return aligned - def safety_checks(self): + def safety_checks(self) -> None: """Run safety checks and raise an exception if necessary.""" blocking_event_encountered = False self.update_smart_details() @@ -411,7 +327,7 @@ class Disk(BaseObj): LOG.error('%s: Blocked for failing attribute(s)', self.path) # NVMe status - nvme_status = self.smartctl.get('smart_status', {}).get('nvme', {}) + nvme_status = self.raw_smartctl.get('smart_status', {}).get('nvme', {}) if nvme_status.get('media_read_only', False): blocking_event_encountered = True msg = 'Media has been placed in read-only mode' @@ -426,7 +342,7 @@ class Disk(BaseObj): # SMART overall assessment smart_passed = True try: - smart_passed = self.smartctl['smart_status']['passed'] + smart_passed = self.raw_smartctl['smart_status']['passed'] except (KeyError, TypeError): # Assuming disk doesn't support SMART overall assessment pass @@ -447,7 +363,7 @@ class Disk(BaseObj): LOG.error(msg) raise SMARTSelfTestInProgressError(msg) - def run_self_test(self, log_path): + def run_self_test(self, log_path) -> bool: """Run disk self-test and check if it passed, returns bool. NOTE: This function is here to reserve a place for future @@ -456,7 +372,7 @@ class Disk(BaseObj): result = self.run_smart_self_test(log_path) return result - def run_smart_self_test(self, log_path): + def run_smart_self_test(self, log_path) -> bool: """Run SMART self-test and check if it passed, returns bool. NOTE: An exception will be raised if the disk lacks SMART support. @@ -467,7 +383,7 @@ class Disk(BaseObj): status_str = 'Starting self-test...' test_details = self.get_smart_self_test_details() test_minutes = 15 - size_str = bytes_to_string(self.details["size"], use_binary=False) + size_str = bytes_to_string(self.size, use_binary=False) header_str = color_string( ['[', self.path.name, ' ', size_str, ']'], [None, 'BLUE', None, 'CYAN', None], @@ -532,35 +448,34 @@ class Disk(BaseObj): # Done return result - def update_smart_details(self, use_sat=False): - """Update SMART details via smartctl.""" - self.attributes = {} + def set_description(self) -> None: + """Set disk description from details.""" + self.description = ( + f'{bytes_to_string(self.size, use_binary=False)}' + f' ({self.bus}) {self.model} {self.serial}' + ) - # Check if SAT is needed - if not use_sat: - # use_sat not set, check previous run (if possible) - for arg in self.smartctl.get('smartctl', {}).get('argv', []): - if arg == '--device=sat,auto': - use_sat = True - break + def update_smart_details(self) -> None: + """Update SMART details via smartctl.""" + updated_attributes = {} # Get SMART data cmd = [ 'sudo', 'smartctl', - f'--device={"sat,auto" if use_sat else "auto"}', + f'--device={"sat,auto" if self.use_sat else "auto"}', '--tolerance=verypermissive', '--all', '--json', self.path, ] - self.smartctl = get_json_from_command(cmd, check=False) + self.raw_smartctl = get_json_from_command(cmd, check=False) # Check for attributes - if KEY_NVME in self.smartctl: - for name, value in self.smartctl[KEY_NVME].items(): + if KEY_NVME in self.raw_smartctl: + for name, value in self.raw_smartctl[KEY_NVME].items(): try: - self.attributes[name] = { + updated_attributes[name] = { 'name': name, 'raw': int(value), 'raw_str': str(value), @@ -568,8 +483,8 @@ class Disk(BaseObj): except (TypeError, ValueError): # Ignoring invalid attribute LOG.error('Invalid NVMe attribute: %s %s', name, value) - elif KEY_SMART in self.smartctl: - for attribute in self.smartctl[KEY_SMART].get('table', {}): + elif KEY_SMART in self.raw_smartctl: + for attribute in self.raw_smartctl[KEY_SMART].get('table', {}): try: _id = int(attribute['id']) except (KeyError, ValueError): @@ -586,37 +501,19 @@ class Disk(BaseObj): raw = int(match.group(1)) # Add to dict - self.attributes[_id] = { + updated_attributes[_id] = { 'name': name, 'raw': raw, 'raw_str': raw_str} # Add note if necessary - if not self.attributes: + if not updated_attributes: self.add_note('No NVMe or SMART data available', 'YELLOW') - -class Test(): - # pylint: disable=too-few-public-methods - """Object for tracking test specific data.""" - def __init__(self, dev, label): - self.dev = dev - self.disabled = False - self.failed = False - self.label = label - self.passed = False - self.report = [] - self.status = 'Pending' - - def set_status(self, status): - """Update status string.""" - if self.disabled: - # Don't change status if disabled - return - - self.status = status + # Done + self.attributes.update(updated_attributes) # Functions -def get_disk_details_linux(path): +def get_disk_details_linux(path) -> dict[Any, Any]: """Get disk details using lsblk, returns dict.""" cmd = ['lsblk', '--bytes', '--json', '--output-all', '--paths', path] json_data = get_json_from_command(cmd, check=False) @@ -636,7 +533,7 @@ def get_disk_details_linux(path): return details -def get_disk_details_macos(path): +def get_disk_details_macos(path) -> dict[Any, Any]: """Get disk details using diskutil, returns dict.""" details = {} @@ -696,14 +593,14 @@ def get_disk_details_macos(path): return details -def get_disk_serial_macos(path): +def get_disk_serial_macos(path) -> str: """Get disk serial using system_profiler, returns str.""" cmd = ['sudo', 'smartctl', '--info', '--json', path] smart_info = get_json_from_command(cmd) return smart_info.get('serial_number', 'Unknown Serial') -def get_disks(skip_kits=False): +def get_disks(skip_kits=False) -> list[Disk]: """Get disks using OS-specific methods, returns list.""" disks = [] if PLATFORM == 'Darwin': @@ -724,7 +621,7 @@ def get_disks(skip_kits=False): return disks -def get_disks_linux(): +def get_disks_linux() -> list[Disk]: """Get disks via lsblk, returns list.""" cmd = ['lsblk', '--json', '--nodeps', '--paths'] disks = [] @@ -735,7 +632,7 @@ def get_disks_linux(): disk_obj = Disk(disk['name']) # Skip loopback devices, optical devices, etc - if disk_obj.details['type'] != 'disk': + if disk_obj.raw_details.get('type', '???') != 'disk': continue # Add disk @@ -745,7 +642,7 @@ def get_disks_linux(): return disks -def get_disks_macos(): +def get_disks_macos() -> list[Disk]: """Get disks via diskutil, returns list.""" cmd = ['diskutil', 'list', '-plist', 'physical'] disks = [] @@ -779,8 +676,8 @@ def get_disks_macos(): return disks -def get_known_disk_attributes(model): - """Get known NVMe/SMART attributes (model specific), returns str.""" +def get_known_disk_attributes(model) -> dict[Any, dict]: + """Get known NVMe/SMART attributes (model specific), returns dict.""" known_attributes = KNOWN_DISK_ATTRIBUTES.copy() # Apply model-specific data @@ -796,77 +693,7 @@ def get_known_disk_attributes(model): return known_attributes -def get_ram_list_linux(): - """Get RAM list using dmidecode.""" - cmd = ['sudo', 'dmidecode', '--type', 'memory'] - dimm_list = [] - manufacturer = 'Unknown' - size = 0 - - # Get DMI data - proc = run_program(cmd) - dmi_data = proc.stdout.splitlines() - - # Parse data - for line in dmi_data: - line = line.strip() - if line == 'Memory Device': - # Reset vars - manufacturer = 'Unknown' - size = 0 - elif line.startswith('Size:'): - size = line.replace('Size: ', '') - try: - size = string_to_bytes(size, assume_binary=True) - except ValueError: - # Assuming empty module - size = 0 - elif line.startswith('Manufacturer:'): - manufacturer = line.replace('Manufacturer: ', '') - dimm_list.append([size, manufacturer]) - - # Save details - return dimm_list - - -def get_ram_list_macos(): - """Get RAM list using system_profiler.""" - dimm_list = [] - - # Get and parse plist data - cmd = [ - 'system_profiler', - '-xml', - 'SPMemoryDataType', - ] - proc = run_program(cmd, check=False, encoding=None, errors=None) - try: - plist_data = plistlib.loads(proc.stdout) - except (TypeError, ValueError): - # Ignore and return an empty list - return dimm_list - - # Check DIMM data - dimm_details = plist_data[0].get('_items', [{}])[0].get('_items', []) - for dimm in dimm_details: - manufacturer = dimm.get('dimm_manufacturer', None) - manufacturer = KNOWN_RAM_VENDOR_IDS.get( - manufacturer, - f'Unknown ({manufacturer})') - size = dimm.get('dimm_size', '0 GB') - try: - size = string_to_bytes(size, assume_binary=True) - except ValueError: - # Empty DIMM? - LOG.error('Invalid DIMM size: %s', size) - continue - dimm_list.append([size, manufacturer]) - - # Save details - return dimm_list - - -def is_4k_aligned_macos(disk_details): +def is_4k_aligned_macos(disk_details) -> bool: """Check partition alignment using diskutil info, returns bool.""" aligned = True @@ -883,7 +710,7 @@ def is_4k_aligned_macos(disk_details): return aligned -def is_4k_aligned_linux(dev_path, physical_sector_size): +def is_4k_aligned_linux(dev_path, physical_sector_size) -> bool: """Check partition alignment using lsblk, returns bool.""" aligned = True cmd = [ diff --git a/scripts/wk/hw/system.py b/scripts/wk/hw/system.py new file mode 100644 index 00000000..00abf736 --- /dev/null +++ b/scripts/wk/hw/system.py @@ -0,0 +1,183 @@ +"""WizardKit: System object and functions""" +# vim: sts=2 sw=2 ts=2 + +import logging +import plistlib +import re + +from dataclasses import dataclass, field +from typing import Any + +from wk.cfg.hw import KNOWN_RAM_VENDOR_IDS +from wk.exe import get_json_from_command, run_program +from wk.hw.test import Test +from wk.std import ( + PLATFORM, + bytes_to_string, + color_string, + string_to_bytes, + ) + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +@dataclass(slots=True) +class System: + """Object for tracking system specific hardware data.""" + cpu_description: str = field(init=False) + ram_dimms: list[str] = field(init=False, default_factory=list) + ram_total: str = field(init=False, default='Unknown') + raw_details: dict[Any, Any] = field(init=False, default_factory=dict) + tests: list[Test] = field(init=False, default_factory=list) + + def __post_init__(self) -> None: + self.get_cpu_details() + self.set_cpu_description() + self.get_ram_details() + + def generate_report(self) -> list[str]: + """Generate CPU & RAM report, returns list.""" + report = [] + report.append(color_string('Device', 'BLUE')) + report.append(f' {self.cpu_description}') + + # Include RAM details + report.append(color_string('RAM', 'BLUE')) + report.append(f' {self.ram_total} ({", ".join(self.ram_dimms)})') + + # Tests + for test in self.tests: + report.extend(test.report) + + return report + + def get_cpu_details(self) -> None: + """Get CPU details using OS specific methods.""" + cmd = ['lscpu', '--json'] + + # Bail early + if PLATFORM != 'Linux': + # Only Linux is supported ATM + return + + # Parse details + json_data = get_json_from_command(cmd) + for line in json_data.get('lscpu', [{}]): + _field = line.get('field', '').replace(':', '') + _data = line.get('data', '') + if not (_field or _data): + # Skip + continue + self.raw_details[_field] = _data + + def get_ram_details(self) -> None: + """Get RAM details using OS specific methods.""" + if PLATFORM == 'Darwin': + dimm_list = get_ram_list_macos() + elif PLATFORM == 'Linux': + dimm_list = get_ram_list_linux() + + details = {'Total': 0} + for dimm_details in dimm_list: + size, manufacturer = dimm_details + if size <= 0: + # Skip empty DIMMs + continue + description = f'{bytes_to_string(size)} {manufacturer}' + details['Total'] += size + if description in details: + details[description] += 1 + else: + details[description] = 1 + + # Save details + self.ram_total = bytes_to_string(details.pop('Total', 0)) + self.ram_dimms = [ + f'{count}x {desc}' for desc, count in sorted(details.items()) + ] + + def set_cpu_description(self) -> None: + """Set CPU description.""" + self.cpu_description = self.raw_details.get('Model name', 'Unknown CPU') + + # macOS + if PLATFORM == 'Darwin': + cmd = 'sysctl -n machdep.cpu.brand_string'.split() + proc = run_program(cmd, check=False) + self.cpu_description = re.sub(r'\s+', ' ', proc.stdout.strip()) + + +def get_ram_list_linux() -> list[list]: + """Get RAM list using dmidecode.""" + cmd = ['sudo', 'dmidecode', '--type', 'memory'] + dimm_list = [] + manufacturer = 'Unknown' + size = 0 + + # Get DMI data + proc = run_program(cmd) + dmi_data = proc.stdout.splitlines() + + # Parse data + for line in dmi_data: + line = line.strip() + if line == 'Memory Device': + # Reset vars + manufacturer = 'Unknown' + size = 0 + elif line.startswith('Size:'): + size = line.replace('Size: ', '') + try: + size = string_to_bytes(size, assume_binary=True) + except ValueError: + # Assuming empty module + size = 0 + elif line.startswith('Manufacturer:'): + manufacturer = line.replace('Manufacturer: ', '') + dimm_list.append([size, manufacturer]) + + # Save details + return dimm_list + + +def get_ram_list_macos() -> list[list]: + """Get RAM list using system_profiler.""" + dimm_list = [] + + # Get and parse plist data + cmd = [ + 'system_profiler', + '-xml', + 'SPMemoryDataType', + ] + proc = run_program(cmd, check=False, encoding=None, errors=None) + try: + plist_data = plistlib.loads(proc.stdout) + except (TypeError, ValueError): + # Ignore and return an empty list + return dimm_list + + # Check DIMM data + dimm_details = plist_data[0].get('_items', [{}])[0].get('_items', []) + for dimm in dimm_details: + manufacturer = dimm.get('dimm_manufacturer', None) + manufacturer = KNOWN_RAM_VENDOR_IDS.get( + manufacturer, + f'Unknown ({manufacturer})') + size = dimm.get('dimm_size', '0 GB') + try: + size = string_to_bytes(size, assume_binary=True) + except ValueError: + # Empty DIMM? + LOG.error('Invalid DIMM size: %s', size) + continue + dimm_list.append([size, manufacturer]) + + # Save details + return dimm_list + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/test.py b/scripts/wk/hw/test.py new file mode 100644 index 00000000..0f2305df --- /dev/null +++ b/scripts/wk/hw/test.py @@ -0,0 +1,27 @@ +"""WizardKit: Test object and functions""" +# vim: sts=2 sw=2 ts=2 + +from dataclasses import dataclass, field +from typing import Any + +@dataclass(slots=True) +class Test: + # pylint: disable=too-many-instance-attributes + """Object for tracking test specific data.""" + dev: Any + label: str + name: str + disabled: bool = field(init=False, default=False) + failed: bool = field(init=False, default=False) + hidden: bool = False + passed: bool = field(init=False, default=False) + report: list[str] = field(init=False, default_factory=list) + status: str = field(init=False, default='Pending') + + def set_status(self, status): + """Update status string.""" + if self.disabled: + # Don't change status if disabled + return + + self.status = status From 56e145942a7de07ff3984648310f9dc7ea803be3 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 4 Apr 2022 18:31:15 -0600 Subject: [PATCH 02/18] Update generate_object_report() to handle slots --- scripts/wk/debug.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/wk/debug.py b/scripts/wk/debug.py index 437ab0f8..e0fe75bf 100644 --- a/scripts/wk/debug.py +++ b/scripts/wk/debug.py @@ -20,13 +20,20 @@ METHOD_TYPE = type(DEBUG_CLASS.method) def generate_object_report(obj, indent=0): """Generate debug report for obj, returns list.""" report = [] + attr_list = [] + + # Get attribute list + if hasattr(obj, '__slots__'): + attr_list = list(obj.__slots__) + else: + attr_list = [name for name in dir(obj) if not name.startswith('_')] # Dump object data - for name in dir(obj): + for name in attr_list: attr = getattr(obj, name) - # Skip methods and private attributes - if isinstance(attr, METHOD_TYPE) or name.startswith('_'): + # Skip methods + if isinstance(attr, METHOD_TYPE): continue # Add attribute to report (expanded if necessary) From 8582046948c8e3f5da249a25f3f200d44246ce9f Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 4 Apr 2022 18:31:52 -0600 Subject: [PATCH 03/18] Update HW diags and ddrescue to use new HW classes --- scripts/wk/hw/ddrescue.py | 39 ++++++++++++++------------ scripts/wk/hw/diags.py | 59 ++++++++++++++++++++------------------- 2 files changed, 52 insertions(+), 46 deletions(-) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index e457dfe2..1184d583 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -27,7 +27,10 @@ from wk.cfg.ddrescue import ( DDRESCUE_SETTINGS, DDRESCUE_SPECIFIC_PASS_SETTINGS, ) -from wk.hw import obj as hw_obj +from wk.hw import disk as hw_disk +from wk.hw import sensors as hw_sensors +from wk.hw import system as hw_system +from wk.hw.test import Test # STATIC VARIABLES @@ -272,7 +275,7 @@ class BlockPair(): """Run safety check and abort if necessary.""" dest_size = -1 if self.destination.exists(): - dest_obj = hw_obj.Disk(self.destination) + dest_obj = hw_disk.Disk(self.destination) dest_size = dest_obj.details['size'] del dest_obj @@ -488,7 +491,7 @@ class State(): if settings['Partition Mapping']: # Resume previous run, load pairs from settings file for part_map in settings['Partition Mapping']: - bp_source = hw_obj.Disk( + bp_source = hw_disk.Disk( f'{self.source.path}{source_sep}{part_map[0]}', ) bp_dest = pathlib.Path( @@ -948,7 +951,7 @@ class State(): """Run safety checks for destination and abort if necessary.""" try: self.destination.safety_checks() - except hw_obj.CriticalHardwareError as err: + except hw_disk.CriticalHardwareError as err: std.print_error( f'Critical error(s) detected for: {self.destination.path}', ) @@ -1097,7 +1100,7 @@ class State(): string = '' # Build base string - if isinstance(obj, hw_obj.Disk): + if isinstance(obj, hw_disk.Disk): string = f'{obj.path} {obj.description}' elif obj.is_dir(): string = f'{obj}/' @@ -1122,7 +1125,7 @@ class State(): if self.source: source_exists = self.source.path.exists() if self.destination: - if isinstance(self.destination, hw_obj.Disk): + if isinstance(self.destination, hw_disk.Disk): dest_exists = self.destination.path.exists() else: dest_exists = self.destination.exists() @@ -1485,18 +1488,18 @@ def check_destination_health(destination): result = '' # Bail early - if not isinstance(destination, hw_obj.Disk): + if not isinstance(destination, hw_disk.Disk): # Return empty string return result # Run safety checks try: destination.safety_checks() - except hw_obj.CriticalHardwareError: + except hw_disk.CriticalHardwareError: result = 'Critical hardware error detected on destination' - except hw_obj.SMARTSelfTestInProgressError: + except hw_disk.SMARTSelfTestInProgressError: result = 'SMART self-test in progress on destination' - except hw_obj.SMARTNotSupportedError: + except hw_disk.SMARTNotSupportedError: pass # Done @@ -1668,20 +1671,20 @@ def get_object(path): # Check path path = pathlib.Path(path).resolve() if path.is_block_device() or path.is_char_device(): - obj = hw_obj.Disk(path) + obj = hw_disk.Disk(path) # Child/Parent check parent = obj.details['parent'] if parent: std.print_warning(f'"{obj.path}" is a child device') if std.ask(f'Use parent device "{parent}" instead?'): - obj = hw_obj.Disk(parent) + obj = hw_disk.Disk(parent) elif path.is_dir(): obj = path elif path.is_file(): # Assuming file is a raw image, mounting loop_path = mount_raw_image(path) - obj = hw_obj.Disk(loop_path) + obj = hw_disk.Disk(loop_path) # Abort if obj not set if not obj: @@ -1830,8 +1833,8 @@ def source_or_destination_changed(state): elif hasattr(obj, 'exists'): # Assuming dest path changed = changed or not obj.exists() - elif isinstance(obj, hw_obj.Disk): - compare_dev = hw_obj.Disk(obj.path) + elif isinstance(obj, hw_disk.Disk): + compare_dev = hw_disk.Disk(obj.path) for key in ('model', 'serial'): changed = changed or obj.details[key] != compare_dev.details[key] @@ -2217,7 +2220,7 @@ def run_recovery(state, main_menu, settings_menu, dry_run=True): def select_disk(prompt, skip_disk=None): """Select disk from list, returns Disk().""" std.print_info('Scanning disks...') - disks = hw_obj.get_disks() + disks = hw_disk.get_disks() menu = std.Menu( title=std.color_string(f'ddrescue TUI: {prompt} Selection', 'GREEN'), ) @@ -2327,10 +2330,10 @@ def select_disk_parts(prompt, disk): # Replace part list with whole disk obj object_list = [disk.path] - # Convert object_list to hw_obj.Disk() objects + # Convert object_list to hw_disk.Disk() objects print(' ') std.print_info('Getting disk/partition details...') - object_list = [hw_obj.Disk(path) for path in object_list] + object_list = [hw_disk.Disk(path) for path in object_list] # Done return object_list diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index f380239e..457d0116 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -15,8 +15,10 @@ from docopt import docopt from wk import cfg, debug, exe, graph, log, net, std, tmux from wk import os as wk_os -from wk.hw import obj as hw_obj +from wk.hw import disk as hw_disk from wk.hw import sensors as hw_sensors +from wk.hw import system as hw_system +from wk.hw.test import Test # STATIC VARIABLES @@ -100,11 +102,11 @@ class DeviceTooSmallError(RuntimeError): class State(): """Object for tracking hardware diagnostic data.""" def __init__(self): - self.cpu = None self.disks = [] self.layout = cfg.hw.TMUX_LAYOUT.copy() self.log_dir = None self.panes = {} + self.system = None self.tests = OrderedDict({ 'CPU & Cooling': { 'Enabled': False, @@ -165,12 +167,12 @@ class State(): disable_tests = False # Skip already disabled devices - if all(test.disabled for test in disk.tests.values()): + if all(test.disabled for test in disk.tests): continue try: disk.safety_checks() - except hw_obj.CriticalHardwareError: + except hw_disk.CriticalHardwareError: disable_tests = True disk.add_note('Critical hardware error detected.', 'RED') if 'Disk Attributes' in disk.tests: @@ -183,7 +185,7 @@ class State(): 'Critical hardware error detected during diagnostics', 'YELLOW', ) - except hw_obj.SMARTSelfTestInProgressError as err: + except hw_disk.SMARTSelfTestInProgressError as err: if prep: std.print_warning(f'SMART self-test(s) in progress for {disk.path}') if std.ask('Continue with all tests disabled for this device?'): @@ -293,8 +295,8 @@ class State(): ) # Add HW Objects - self.cpu = hw_obj.CpuRam() - self.disks = hw_obj.get_disks(skip_kits=True) + self.system = hw_system.System() + self.disks = hw_disk.get_disks(skip_kits=True) # Add test objects for name, details in menu.options.items(): @@ -304,16 +306,17 @@ class State(): if 'CPU' in name: # Create two Test objects which will both be used by cpu_stress_tests # NOTE: Prime95 should be added first - test_mprime_obj = hw_obj.Test(dev=self.cpu, label='Prime95') - test_cooling_obj = hw_obj.Test(dev=self.cpu, label='Cooling') - self.cpu.tests[test_mprime_obj.label] = test_mprime_obj - self.cpu.tests[test_cooling_obj.label] = test_cooling_obj - self.tests[name]['Objects'].append(test_mprime_obj) - self.tests[name]['Objects'].append(test_cooling_obj) + self.system.tests.append( + Test(dev=self.system, label='Prime95', name=name), + ) + self.system.tests.append( + Test(dev=self.system, label='Cooling', name=name), + ) + self.tests[name]['Objects'].extend(self.system.tests) elif 'Disk' in name: for disk in self.disks: - test_obj = hw_obj.Test(dev=disk, label=disk.path.name) - disk.tests[name] = test_obj + test_obj = Test(dev=disk, label=disk.path.name, name=name) + disk.test.append(test_obj) self.tests[name]['Objects'].append(test_obj) # Run safety checks @@ -360,14 +363,6 @@ class State(): with open(f'{debug_dir}/state.report', 'a', encoding='utf-8') as _f: _f.write('\n'.join(debug.generate_object_report(self))) - # CPU/RAM - with open(f'{debug_dir}/cpu.report', 'a', encoding='utf-8') as _f: - _f.write('\n'.join(debug.generate_object_report(self.cpu))) - _f.write('\n\n[Tests]') - for name, test in self.cpu.tests.items(): - _f.write(f'\n{name}:\n') - _f.write('\n'.join(debug.generate_object_report(test, indent=1))) - # Disks for disk in self.disks: with open( @@ -375,8 +370,8 @@ class State(): encoding='utf-8') as _f: _f.write('\n'.join(debug.generate_object_report(disk))) _f.write('\n\n[Tests]') - for name, test in disk.tests.items(): - _f.write(f'\n{name}:\n') + for test in disk.tests: + _f.write(f'\n{test.name}:\n') _f.write('\n'.join(debug.generate_object_report(test, indent=1))) # SMC @@ -394,6 +389,14 @@ class State(): with open(f'{debug_dir}/smc.data', 'a', encoding='utf-8') as _f: _f.write('\n'.join(data)) + # System + with open(f'{debug_dir}/system.report', 'a', encoding='utf-8') as _f: + _f.write('\n'.join(debug.generate_object_report(self.system))) + _f.write('\n\n[Tests]') + for test in self.system.tests: + _f.write(f'\n{test.name}:\n') + _f.write('\n'.join(debug.generate_object_report(test, indent=1))) + def update_clock(self): """Update 'Started' pane following clock sync.""" tmux.respawn_pane( @@ -725,7 +728,7 @@ def cpu_stress_tests(state, test_objects): return # Prep - state.update_top_pane(test_mprime_obj.dev.description) + state.update_top_pane(test_mprime_obj.dev.cpu_description) test_cooling_obj.set_status('Working') test_mprime_obj.set_status('Working') @@ -1008,7 +1011,7 @@ def disk_self_test(state, test_objects): except TimeoutError: test_obj.failed = True result = 'TimedOut' - except hw_obj.SMARTNotSupportedError: + except hw_disk.SMARTNotSupportedError: # Pass test since it doesn't apply test_obj.passed = True result = 'N/A' @@ -1463,7 +1466,7 @@ def show_results(state): if name.startswith('CPU')] if any(cpu_tests_enabled): std.print_success('CPU:') - std.print_report(state.cpu.generate_report()) + std.print_report(state.system.generate_report()) std.print_standard(' ') # Disk Tests From 4647efb971d92f7eb5871179b4cba2e86b59639e Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 4 Apr 2022 18:33:03 -0600 Subject: [PATCH 04/18] Fix broken import --- scripts/wk/os/linux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/wk/os/linux.py b/scripts/wk/os/linux.py index f3325fb7..df254e3f 100644 --- a/scripts/wk/os/linux.py +++ b/scripts/wk/os/linux.py @@ -9,7 +9,7 @@ import subprocess from wk import std from wk.exe import popen_program, run_program -from wk.hw.obj import Disk +from wk.hw.disk import Disk from wk.log import format_log_path From 5ffa6d8261c35f1092f3523d71d968ee6f517559 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 4 Apr 2022 18:58:45 -0600 Subject: [PATCH 05/18] Replace references to details with attributes --- scripts/wk/hw/ddrescue.py | 9 +++------ scripts/wk/hw/diags.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 1184d583..9267e83f 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -28,9 +28,6 @@ from wk.cfg.ddrescue import ( DDRESCUE_SPECIFIC_PASS_SETTINGS, ) from wk.hw import disk as hw_disk -from wk.hw import sensors as hw_sensors -from wk.hw import system as hw_system -from wk.hw.test import Test # STATIC VARIABLES @@ -276,7 +273,7 @@ class BlockPair(): dest_size = -1 if self.destination.exists(): dest_obj = hw_disk.Disk(self.destination) - dest_size = dest_obj.details['size'] + dest_size = dest_obj.size del dest_obj # Check destination size if cloning @@ -1674,7 +1671,7 @@ def get_object(path): obj = hw_disk.Disk(path) # Child/Parent check - parent = obj.details['parent'] + parent = obj.raw_details['parent'] if parent: std.print_warning(f'"{obj.path}" is a child device') if std.ask(f'Use parent device "{parent}" instead?'): @@ -1836,7 +1833,7 @@ def source_or_destination_changed(state): elif isinstance(obj, hw_disk.Disk): compare_dev = hw_disk.Disk(obj.path) for key in ('model', 'serial'): - changed = changed or obj.details[key] != compare_dev.details[key] + changed = changed or getattr(obj, key) != getattr(compare_dev, key) # Update top panes state.update_top_panes() diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 457d0116..22b8dac3 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -316,7 +316,7 @@ class State(): elif 'Disk' in name: for disk in self.disks: test_obj = Test(dev=disk, label=disk.path.name, name=name) - disk.test.append(test_obj) + disk.tests.append(test_obj) self.tests[name]['Objects'].append(test_obj) # Run safety checks @@ -592,7 +592,7 @@ def check_io_benchmark_results(test_obj, rate_list, graph_width): avg_read = sum(rate_list) / len(rate_list) min_read = min(rate_list) max_read = max(rate_list) - if test_obj.dev.details['ssd']: + if test_obj.dev.ssd: thresh_min = cfg.hw.THRESH_SSD_MIN thresh_avg_high = cfg.hw.THRESH_SSD_AVG_HIGH thresh_avg_low = cfg.hw.THRESH_SSD_AVG_LOW @@ -878,7 +878,7 @@ def disk_io_benchmark(state, test_objects, skip_usb=True): # Get dd values or bail try: - dd_values = calc_io_dd_values(test_obj.dev.details['size']) + dd_values = calc_io_dd_values(test_obj.dev.size) except DeviceTooSmallError: test_obj.set_status('N/A') test_obj.report.append( @@ -951,7 +951,7 @@ def disk_io_benchmark(state, test_objects, skip_usb=True): continue # Skip USB devices if requested - if skip_usb and test.dev.details['bus'] == 'USB': + if skip_usb and test.dev.bus == 'USB': test.set_status('Skipped') continue @@ -1098,14 +1098,14 @@ def disk_surface_scan(state, test_objects): test_obj.set_status('Working') # Increase block size if necessary - if (dev.details['phy-sec'] == 4096 - or dev.details['size'] >= cfg.hw.BADBLOCKS_LARGE_DISK): + if (dev.phy_sec == 4096 + or dev.size >= cfg.hw.BADBLOCKS_LARGE_DISK): block_size = '4096' # Start scan cmd = ['sudo', 'badblocks', '-sv', '-b', block_size, '-e', '1', dev_path] with open(log_path, 'a', encoding='utf-8') as _f: - size_str = std.bytes_to_string(dev.details["size"], use_binary=False) + size_str = std.bytes_to_string(dev.size, use_binary=False) _f.write( std.color_string( ['[', dev.path.name, ' ', size_str, ']\n'], @@ -1156,7 +1156,7 @@ def disk_surface_scan(state, test_objects): line for line in disk.generate_attribute_report() if 'failed' in line ] if failed_attributes: - size_str = std.bytes_to_string(disk.details["size"], use_binary=False) + size_str = std.bytes_to_string(disk.size, use_binary=False) std.print_colored( ['[', disk.path.name, ' ', size_str, ']'], [None, 'BLUE', None, 'CYAN', None], From fc2bb07d11ae971b24e2075970516e4421df257d Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Tue, 5 Apr 2022 14:45:14 -0600 Subject: [PATCH 06/18] Track test groups as list instead of a dict By only including selected tests we can skip checking for the enabled/disabled status. This will also simplify the process of disabling future tests for a disk if a failure is detected. --- scripts/wk/cfg/hw.py | 24 ++++++ scripts/wk/hw/diags.py | 168 +++++++++++++++++------------------------ scripts/wk/hw/test.py | 10 ++- 3 files changed, 101 insertions(+), 101 deletions(-) diff --git a/scripts/wk/cfg/hw.py b/scripts/wk/cfg/hw.py index 65fa156a..ae39da69 100644 --- a/scripts/wk/cfg/hw.py +++ b/scripts/wk/cfg/hw.py @@ -16,9 +16,21 @@ ATTRIBUTE_COLORS = ( ) # NOTE: Force 4K read block size for disks >= 3TB BADBLOCKS_LARGE_DISK = 3 * 1024**4 +BADBLOCKS_REGEX = re.compile( + r'^Pass completed, (\d+) bad blocks found. .(\d+)/(\d+)/(\d+) errors', + re.IGNORECASE, + ) CPU_CRITICAL_TEMP = 99 CPU_FAILURE_TEMP = 90 CPU_TEST_MINUTES = 7 +IO_GRAPH_WIDTH = 40 +IO_ALT_TEST_SIZE_FACTOR = 0.01 +IO_BLOCK_SIZE = 512 * 1024 +IO_CHUNK_SIZE = 32 * 1024**2 +IO_MINIMUM_TEST_SIZE = 10 * 1024**3 +IO_RATE_REGEX = re.compile( + r'(?P\d+) bytes.* (?P\S+) s(?:,|ecs )', + ) KEY_NVME = 'nvme_smart_health_information_log' KEY_SMART = 'ata_smart_attributes' KNOWN_DISK_ATTRIBUTES = { @@ -116,6 +128,18 @@ SMC_IDS = { 'Tp5P': {'CPU Temp': False, 'Source': 'PSU2 Secondary Component'}, 'TS0C': {'CPU Temp': False, 'Source': 'CPU B DIMM Exit Ambient'}, } +STATUS_COLORS = { + 'Passed': 'GREEN', + 'Aborted': 'YELLOW', + 'N/A': 'YELLOW', + 'Skipped': 'YELLOW', + 'Unknown': 'YELLOW', + 'Working': 'YELLOW', + 'Denied': 'RED', + 'ERROR': 'RED', + 'Failed': 'RED', + 'TimedOut': 'RED', + } TEMP_COLORS = { float('-inf'): 'CYAN', 00: 'BLUE', diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 22b8dac3..fdfaba55 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -10,15 +10,24 @@ import re import subprocess import time -from collections import OrderedDict from docopt import docopt from wk import cfg, debug, exe, graph, log, net, std, tmux from wk import os as wk_os +from wk.cfg.hw import ( + BADBLOCKS_REGEX, + IO_GRAPH_WIDTH, + IO_ALT_TEST_SIZE_FACTOR, + IO_BLOCK_SIZE, + IO_CHUNK_SIZE, + IO_MINIMUM_TEST_SIZE, + IO_RATE_REGEX, + STATUS_COLORS, + ) from wk.hw import disk as hw_disk from wk.hw import sensors as hw_sensors from wk.hw import system as hw_system -from wk.hw.test import Test +from wk.hw.test import Test, TestGroup # STATIC VARIABLES @@ -34,18 +43,15 @@ Options: -q --quick Skip menu and perform a quick check ''' LOG = logging.getLogger(__name__) -BADBLOCKS_REGEX = re.compile( - r'^Pass completed, (\d+) bad blocks found. .(\d+)/(\d+)/(\d+) errors', - re.IGNORECASE, - ) -IO_GRAPH_WIDTH = 40 -IO_ALT_TEST_SIZE_FACTOR = 0.01 -IO_BLOCK_SIZE = 512 * 1024 -IO_CHUNK_SIZE = 32 * 1024**2 -IO_MINIMUM_TEST_SIZE = 10 * 1024**3 -IO_RATE_REGEX = re.compile( - r'(?P\d+) bytes.* (?P\S+) s(?:,|ecs )', - ) +TEST_GROUPS = { + # Also used to build the menu options + ## NOTE: This needs to be above MENU_SETS + 'CPU & Cooling': 'cpu_stress_tests', + 'Disk Attributes': 'disk_attribute_check', + 'Disk Self-Test': 'disk_self_test', + 'Disk Surface Scan': 'disk_surface_scan', + 'Disk I/O Benchmark': 'disk_io_benchmark', + } MENU_ACTIONS = ( 'Audio Test', 'Keyboard Test', @@ -57,16 +63,9 @@ MENU_ACTIONS_SECRET = ( 'Matrix', 'Tubes', ) -MENU_OPTIONS = ( - 'CPU & Cooling', - 'Disk Attributes', - 'Disk Self-Test', - 'Disk Surface Scan', - 'Disk I/O Benchmark', -) MENU_OPTIONS_QUICK = ('Disk Attributes',) MENU_SETS = { - 'Full Diagnostic': (*MENU_OPTIONS,), + 'Full Diagnostic': (*TEST_GROUPS,), 'Disk Diagnostic': ( 'Disk Attributes', 'Disk Self-Test', @@ -79,19 +78,6 @@ MENU_TOGGLES = ( 'Skip USB Benchmarks', ) PLATFORM = std.PLATFORM -STATUS_COLORS = { - 'Passed': 'GREEN', - 'Aborted': 'YELLOW', - 'N/A': 'YELLOW', - 'Skipped': 'YELLOW', - 'Unknown': 'YELLOW', - 'Working': 'YELLOW', - 'Denied': 'RED', - 'ERROR': 'RED', - 'Failed': 'RED', - 'TimedOut': 'RED', - } - # Error Classes class DeviceTooSmallError(RuntimeError): @@ -107,33 +93,7 @@ class State(): self.log_dir = None self.panes = {} self.system = None - self.tests = OrderedDict({ - 'CPU & Cooling': { - 'Enabled': False, - 'Function': cpu_stress_tests, - 'Objects': [], - }, - 'Disk Attributes': { - 'Enabled': False, - 'Function': disk_attribute_check, - 'Objects': [], - }, - 'Disk Self-Test': { - 'Enabled': False, - 'Function': disk_self_test, - 'Objects': [], - }, - 'Disk Surface Scan': { - 'Enabled': False, - 'Function': disk_surface_scan, - 'Objects': [], - }, - 'Disk I/O Benchmark': { - 'Enabled': False, - 'Function': disk_io_benchmark, - 'Objects': [], - }, - }) + self.test_groups = [] self.top_text = std.color_string('Hardware Diagnostics', 'GREEN') # Init tmux and start a background process to maintain layout @@ -142,8 +102,8 @@ class State(): def abort_testing(self): """Set unfinished tests as aborted and cleanup tmux panes.""" - for details in self.tests.values(): - for test in details['Objects']: + for group in self.test_groups: + for test in group.test_objects: if test.status in ('Pending', 'Working'): test.set_status('Aborted') @@ -269,8 +229,7 @@ class State(): self.disks.clear() self.layout.clear() self.layout.update(cfg.hw.TMUX_LAYOUT) - for test_data in self.tests.values(): - test_data['Objects'].clear() + self.test_groups.clear() # Set log self.log_dir = log.format_log_path() @@ -300,9 +259,10 @@ class State(): # Add test objects for name, details in menu.options.items(): - self.tests[name]['Enabled'] = details['Selected'] if not details['Selected']: + # Only add selected options continue + if 'CPU' in name: # Create two Test objects which will both be used by cpu_stress_tests # NOTE: Prime95 should be added first @@ -312,12 +272,23 @@ class State(): self.system.tests.append( Test(dev=self.system, label='Cooling', name=name), ) - self.tests[name]['Objects'].extend(self.system.tests) - elif 'Disk' in name: + self.test_groups.append( + TestGroup( + name=name, + function=globals()[TEST_GROUPS[name]], + test_objects=self.system.tests, + ), + ) + + if 'Disk' in name: + test_group = TestGroup( + name=name, function=globals()[TEST_GROUPS[name]], + ) for disk in self.disks: test_obj = Test(dev=disk, label=disk.path.name, name=name) disk.tests.append(test_obj) - self.tests[name]['Objects'].append(test_obj) + test_group.test_objects.append(test_obj) + self.test_groups.append(test_group) # Run safety checks self.disk_safety_checks(prep=True) @@ -413,16 +384,12 @@ class State(): report = [] width = cfg.hw.TMUX_SIDE_WIDTH - for name, details in self.tests.items(): - if not details['Enabled']: - continue - - # Add test details - report.append(std.color_string(name, 'BLUE')) - for test_obj in details['Objects']: + for group in self.test_groups: + report.append(std.color_string(group.name, 'BLUE')) + for test in group.test_objects: report.append(std.color_string( - [test_obj.label, f'{test_obj.status:>{width-len(test_obj.label)}}'], - [None, STATUS_COLORS.get(test_obj.status, None)], + [test.label, f'{test.status:>{width-len(test.label)}}'], + [None, STATUS_COLORS.get(test.status, None)], sep='', )) @@ -471,7 +438,7 @@ def build_menu(cli_mode=False, quick_mode=False): menu.add_action(action) for action in MENU_ACTIONS_SECRET: menu.add_action(action, {'Hidden': True}) - for option in MENU_OPTIONS: + for option in TEST_GROUPS: menu.add_option(option, {'Selected': True}) for toggle in MENU_TOGGLES: menu.add_toggle(toggle, {'Selected': True}) @@ -1362,21 +1329,18 @@ def run_diags(state, menu, quick_mode=False): state.init_diags(menu) # Just return if no tests were selected - if not any(details['Enabled'] for details in state.tests.values()): + if not state.test_groups: std.print_warning('No tests selected?') std.pause() return # Run tests - for name, details in state.tests.items(): - if not details['Enabled']: - # Skip disabled tests - continue + for group in state.test_groups: # Run test(s) - function = details['Function'] - args = [details['Objects']] - if name == 'Disk I/O Benchmark': + function = group.function + args = [group.test_objects] + if group.name == 'Disk I/O Benchmark': args.append(menu.toggles['Skip USB Benchmarks']['Selected']) std.clear_screen() try: @@ -1388,15 +1352,17 @@ def run_diags(state, menu, quick_mode=False): break # Run safety checks - if name.startswith('Disk'): - state.disk_safety_checks(wait_for_self_tests=name != 'Disk Attributes') + if group.name.startswith('Disk'): + state.disk_safety_checks( + wait_for_self_tests=group.name != 'Disk Attributes', + ) # Handle aborts if aborted: - for details in state.tests.values(): - for test_obj in details['Objects']: - if test_obj.status == 'Pending': - test_obj.set_status('Aborted') + for group in state.test_groups: + for test in group.test_objects: + if test.status == 'Pending': + test.set_status('Aborted') # Show results show_results(state) @@ -1462,17 +1428,19 @@ def show_results(state): state.update_top_pane('Results') # CPU Tests - cpu_tests_enabled = [data['Enabled'] for name, data in state.tests.items() - if name.startswith('CPU')] - if any(cpu_tests_enabled): + cpu_tests_enabled = [ + group.name for group in state.test_groups if 'CPU' in group.name + ] + if cpu_tests_enabled: std.print_success('CPU:') std.print_report(state.system.generate_report()) std.print_standard(' ') # Disk Tests - disk_tests_enabled = [data['Enabled'] for name, data in state.tests.items() - if name.startswith('Disk')] - if any(disk_tests_enabled): + disk_tests_enabled = [ + group.name for group in state.test_groups if 'Disk' in group.name + ] + if disk_tests_enabled: std.print_success(f'Disk{"s" if len(state.disks) > 1 else ""}:') for disk in state.disks: std.print_report(disk.generate_report()) diff --git a/scripts/wk/hw/test.py b/scripts/wk/hw/test.py index 0f2305df..e9653d9a 100644 --- a/scripts/wk/hw/test.py +++ b/scripts/wk/hw/test.py @@ -2,7 +2,7 @@ # vim: sts=2 sw=2 ts=2 from dataclasses import dataclass, field -from typing import Any +from typing import Any, Callable @dataclass(slots=True) class Test: @@ -25,3 +25,11 @@ class Test: return self.status = status + + +@dataclass(slots=True) +class TestGroup: + """Object for tracking groups of tests.""" + name: str + function: Callable + test_objects: list[Test] = field(default_factory=list) From 99dd7661d4a660811fdb7ad950cca3a65f96f7f0 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Tue, 5 Apr 2022 18:11:06 -0600 Subject: [PATCH 07/18] Split hardware diagnostics into multiple files --- scripts/wk/hw/__init__.py | 7 + scripts/wk/hw/audio.py | 37 +++ scripts/wk/hw/benchmark.py | 212 ++++++++++++++ scripts/wk/hw/cpu.py | 205 +++++++++++++ scripts/wk/hw/diags.py | 538 ++-------------------------------- scripts/wk/hw/keyboard.py | 31 ++ scripts/wk/hw/network.py | 58 ++++ scripts/wk/hw/screensavers.py | 40 +++ scripts/wk/hw/surface_scan.py | 85 ++++++ 9 files changed, 697 insertions(+), 516 deletions(-) create mode 100644 scripts/wk/hw/audio.py create mode 100644 scripts/wk/hw/benchmark.py create mode 100644 scripts/wk/hw/cpu.py create mode 100644 scripts/wk/hw/keyboard.py create mode 100644 scripts/wk/hw/network.py create mode 100644 scripts/wk/hw/screensavers.py create mode 100644 scripts/wk/hw/surface_scan.py diff --git a/scripts/wk/hw/__init__.py b/scripts/wk/hw/__init__.py index 092a7428..821c9761 100644 --- a/scripts/wk/hw/__init__.py +++ b/scripts/wk/hw/__init__.py @@ -1,8 +1,15 @@ """WizardKit: hw module init""" +from . import audio +from . import benchmark +from . import cpu from . import ddrescue from . import diags from . import disk +from . import keyboard +from . import network +from . import screensavers from . import sensors +from . import surface_scan from . import system from . import test diff --git a/scripts/wk/hw/audio.py b/scripts/wk/hw/audio.py new file mode 100644 index 00000000..f6a984b5 --- /dev/null +++ b/scripts/wk/hw/audio.py @@ -0,0 +1,37 @@ +"""WizardKit: Audio test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging + +from wk.exe import run_program +from wk.std import PLATFORM + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Functions +def audio_test(): + """Run an OS-specific audio test.""" + if PLATFORM == 'Linux': + audio_test_linux() + + +def audio_test_linux(): + """Run an audio test using amixer and speaker-test.""" + LOG.info('Audio Test') + + # Set volume + for source in ('Master', 'PCM'): + cmd = f'amixer -q set "{source}" 80% unmute'.split() + run_program(cmd, check=False) + + # Run audio tests + for mode in ('pink', 'wav'): + cmd = f'speaker-test -c 2 -l 1 -t {mode}'.split() + run_program(cmd, check=False, pipe=False) + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/benchmark.py b/scripts/wk/hw/benchmark.py new file mode 100644 index 00000000..78664192 --- /dev/null +++ b/scripts/wk/hw/benchmark.py @@ -0,0 +1,212 @@ +"""WizardKit: Benchmark test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging + +from subprocess import PIPE, STDOUT + +from wk import graph +from wk.cfg.hw import ( + IO_ALT_TEST_SIZE_FACTOR, + IO_BLOCK_SIZE, + IO_CHUNK_SIZE, + IO_GRAPH_WIDTH, + IO_MINIMUM_TEST_SIZE, + IO_RATE_REGEX, + THRESH_HDD_AVG_HIGH, + THRESH_HDD_AVG_LOW, + THRESH_HDD_MIN, + THRESH_SSD_AVG_HIGH, + THRESH_SSD_AVG_LOW, + THRESH_SSD_MIN, + ) +from wk.exe import run_program +from wk.std import ( + PLATFORM, + strip_colors, + color_string, + ) + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Error Classes +class DeviceTooSmallError(RuntimeError): + """Raised when a device is too small to test.""" + + +# Functions +def calc_io_dd_values(dev_size): + """Calculate I/O benchmark dd values, returns dict. + + Calculations: + The minimum dev size is IO_GRAPH_WIDTH * IO_CHUNK_SIZE + (e.g. 1.25 GB for a width of 40 and a chunk size of 32MB) + + read_total is the area to be read in bytes + If the dev is < IO_MINIMUM_TEST_SIZE then it's the whole dev + Else it's the larger of IO_MINIMUM_TEST_SIZE or the alt test size + (determined by dev * IO_ALT_TEST_SIZE_FACTOR) + + read_chunks is the number of groups of IO_CHUNK_SIZE in test_obj.dev + This number is reduced to a multiple of IO_GRAPH_WIDTH in order + to allow for the data to be condensed cleanly + + read_blocks is the chunk size in number of blocks + (e.g. 64 if block size is 512KB and chunk size is 32MB + + skip_total is the number of IO_BLOCK_SIZE groups not tested + skip_blocks is the number of blocks to skip per IO_CHUNK_SIZE + skip_extra_rate is how often to add an additional skip block + This is needed to ensure an even testing across the dev + This is calculated by using the fractional amount left off + of the skip_blocks variable + """ + read_total = min(IO_MINIMUM_TEST_SIZE, dev_size) + read_total = max(read_total, dev_size*IO_ALT_TEST_SIZE_FACTOR) + read_chunks = int(read_total // IO_CHUNK_SIZE) + read_chunks -= read_chunks % IO_GRAPH_WIDTH + if read_chunks < IO_GRAPH_WIDTH: + raise DeviceTooSmallError + read_blocks = int(IO_CHUNK_SIZE / IO_BLOCK_SIZE) + read_total = read_chunks * IO_CHUNK_SIZE + skip_total = int((dev_size - read_total) // IO_BLOCK_SIZE) + skip_blocks = int((skip_total / read_chunks) // 1) + skip_extra_rate = 0 + try: + skip_extra_rate = 1 + int(1 / ((skip_total / read_chunks) % 1)) + except ZeroDivisionError: + # skip_extra_rate == 0 is fine + pass + + # Done + return { + 'Read Chunks': read_chunks, + 'Read Blocks': read_blocks, + 'Skip Blocks': skip_blocks, + 'Skip Extra': skip_extra_rate, + } + + +def check_io_results(test_obj, rate_list, graph_width): + """Generate colored report using rate_list, returns list of str.""" + avg_read = sum(rate_list) / len(rate_list) + min_read = min(rate_list) + max_read = max(rate_list) + if test_obj.dev.ssd: + thresh_min = THRESH_SSD_MIN + thresh_avg_high = THRESH_SSD_AVG_HIGH + thresh_avg_low = THRESH_SSD_AVG_LOW + else: + thresh_min = THRESH_HDD_MIN + thresh_avg_high = THRESH_HDD_AVG_HIGH + thresh_avg_low = THRESH_HDD_AVG_LOW + + # Add horizontal graph to report + for line in graph.generate_horizontal_graph(rate_list, graph_width): + if not strip_colors(line).strip(): + # Skip empty lines + continue + test_obj.report.append(line) + + # Add read rates to report + test_obj.report.append( + f'Read speeds avg: {avg_read/(1000**2):3.1f}' + f' min: {min_read/(1000**2):3.1f}' + f' max: {max_read/(1000**2):3.1f}' + ) + + # Compare against thresholds + if min_read <= thresh_min and avg_read <= thresh_avg_high: + test_obj.failed = True + elif avg_read <= thresh_avg_low: + test_obj.failed = True + else: + test_obj.passed = True + + # Set status + if test_obj.failed: + test_obj.set_status('Failed') + elif test_obj.passed: + test_obj.set_status('Passed') + else: + test_obj.set_status('Unknown') + + +def run_io_test(test_obj, log_path): + """Run I/O benchmark and handle exceptions.""" + dev_path = test_obj.dev.path + if PLATFORM == 'Darwin': + # Use "RAW" disks under macOS + dev_path = dev_path.with_name(f'r{dev_path.name}') + LOG.info('Using %s for better performance', dev_path) + offset = 0 + read_rates = [] + test_obj.report.append(color_string('I/O Benchmark', 'BLUE')) + + # Get dd values or bail + try: + dd_values = calc_io_dd_values(test_obj.dev.size) + except DeviceTooSmallError: + test_obj.set_status('N/A') + test_obj.report.append( + color_string('Disk too small to test', 'YELLOW'), + ) + return + + # Run dd read tests + for _i in range(dd_values['Read Chunks']): + _i += 1 + + # Build cmd + skip = dd_values['Skip Blocks'] + if dd_values['Skip Extra'] and _i % dd_values['Skip Extra'] == 0: + skip += 1 + cmd = [ + 'sudo', 'dd', + f'bs={IO_BLOCK_SIZE}', + f'skip={offset+skip}', + f'count={dd_values["Read Blocks"]}', + f'if={dev_path}', + 'of=/dev/null', + ] + if PLATFORM == 'Linux': + cmd.append('iflag=direct') + + # Run and get read rate + try: + proc = run_program( + cmd, + pipe=False, + stdout=PIPE, + stderr=STDOUT, + ) + except PermissionError as err: + # Since we're using sudo we can't kill dd + # Assuming this happened during a CTRL+c + raise KeyboardInterrupt from err + match = IO_RATE_REGEX.search(proc.stdout) + if match: + read_rates.append( + int(match.group('bytes')) / float(match.group('seconds')), + ) + match.group(1) + + # Show progress + with open(log_path, 'a', encoding='utf-8') as _f: + if _i % 5 == 0: + percent = (_i / dd_values['Read Chunks']) * 100 + _f.write(f' {graph.vertical_graph_line(percent, read_rates[-1])}\n') + + # Update offset + offset += dd_values['Read Blocks'] + skip + + # Check results + check_io_results(test_obj, read_rates, IO_GRAPH_WIDTH) + + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/cpu.py b/scripts/wk/hw/cpu.py new file mode 100644 index 00000000..44329e7a --- /dev/null +++ b/scripts/wk/hw/cpu.py @@ -0,0 +1,205 @@ +"""WizardKit: CPU test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging +import re +import subprocess + +from wk import exe +from wk.cfg.hw import CPU_FAILURE_TEMP +from wk.os.mac import set_fans as macos_set_fans +from wk.std import ( + PLATFORM, + color_string, + print_error, + print_warning, + ) +from wk.tmux import respawn_pane as tmux_respawn_pane + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Functions +def check_cooling_results(test_obj, sensors, run_sysbench=False): + """Check cooling results and update test_obj.""" + max_temp = sensors.cpu_max_temp() + temp_labels = ['Idle', 'Max', 'Cooldown'] + if run_sysbench: + temp_labels.append('Sysbench') + + # Check temps + if not max_temp: + test_obj.set_status('Unknown') + elif max_temp >= CPU_FAILURE_TEMP: + test_obj.failed = True + test_obj.set_status('Failed') + elif 'Aborted' not in test_obj.status: + test_obj.passed = True + test_obj.set_status('Passed') + + # Add temps to report + for line in sensors.generate_report(*temp_labels, only_cpu=True): + test_obj.report.append(f' {line}') + + +def check_mprime_results(test_obj, working_dir): + """Check mprime log files and update test_obj.""" + passing_lines = {} + warning_lines = {} + + def _read_file(log_name): + """Read file and split into lines, returns list.""" + lines = [] + try: + with open(f'{working_dir}/{log_name}', 'r', encoding='utf-8') as _f: + lines = _f.readlines() + except FileNotFoundError: + # File may be missing on older systems + lines = [] + + return lines + + # results.txt (check if failed) + for line in _read_file('results.txt'): + line = line.strip() + if re.search(r'(error|fail)', line, re.IGNORECASE): + warning_lines[line] = None + + # print.log (check if passed) + for line in _read_file('prime.log'): + line = line.strip() + match = re.search( + r'(completed.*(\d+) errors, (\d+) warnings)', line, re.IGNORECASE) + if match: + if int(match.group(2)) + int(match.group(3)) > 0: + # Errors and/or warnings encountered + warning_lines[match.group(1).capitalize()] = None + else: + # No errors/warnings + passing_lines[match.group(1).capitalize()] = None + + # Update status + if warning_lines: + test_obj.failed = True + test_obj.set_status('Failed') + elif passing_lines and 'Aborted' not in test_obj.status: + test_obj.passed = True + test_obj.set_status('Passed') + else: + test_obj.set_status('Unknown') + + # Update report + for line in passing_lines: + test_obj.report.append(f' {line}') + for line in warning_lines: + test_obj.report.append(color_string(f' {line}', 'YELLOW')) + if not (passing_lines or warning_lines): + test_obj.report.append(color_string(' Unknown result', 'YELLOW')) + + +def start_mprime(working_dir, log_path): + """Start mprime and save filtered output to log, returns Popen object.""" + set_apple_fan_speed('max') + proc_mprime = subprocess.Popen( # pylint: disable=consider-using-with + ['mprime', '-t'], + cwd=working_dir, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + proc_grep = subprocess.Popen( # pylint: disable=consider-using-with + 'grep --ignore-case --invert-match --line-buffered stress.txt'.split(), + stdin=proc_mprime.stdout, + stdout=subprocess.PIPE, + ) + proc_mprime.stdout.close() + save_nsbr = exe.NonBlockingStreamReader(proc_grep.stdout) + exe.start_thread( + save_nsbr.save_to_file, + args=(proc_grep, log_path), + ) + + # Return objects + return proc_mprime + + +def start_sysbench(sensors, sensors_out, log_path, pane): + """Start sysbench, returns tuple with Popen object and file handle.""" + set_apple_fan_speed('max') + sysbench_cmd = [ + 'sysbench', + f'--threads={exe.psutil.cpu_count()}', + '--cpu-max-prime=1000000000', + 'cpu', + 'run', + ] + + # Restart background monitor for Sysbench + sensors.stop_background_monitor() + sensors.start_background_monitor( + sensors_out, + alt_max='Sysbench', + thermal_action=('killall', 'sysbench', '-INT'), + ) + + # Update bottom pane + tmux_respawn_pane(pane, watch_file=log_path, watch_cmd='tail') + + # Start sysbench + filehandle_sysbench = open( # pylint: disable=consider-using-with + log_path, 'a', encoding='utf-8', + ) + proc_sysbench = exe.popen_program(sysbench_cmd, stdout=filehandle_sysbench) + + # Done + return (proc_sysbench, filehandle_sysbench) + + +def set_apple_fan_speed(speed): + """Set Apple fan speed.""" + cmd = None + + # Check + if speed not in ('auto', 'max'): + raise RuntimeError(f'Invalid speed {speed}') + + # Set cmd + if PLATFORM == 'Darwin': + try: + macos_set_fans(speed) + except (RuntimeError, ValueError, subprocess.CalledProcessError) as err: + LOG.error('Failed to set fans to %s', speed) + LOG.error('Error: %s', err) + print_error(f'Failed to set fans to {speed}') + for line in str(err).splitlines(): + print_warning(f' {line.strip()}') + elif PLATFORM == 'Linux': + cmd = ['apple-fans', speed] + exe.run_program(cmd, check=False) + + +def stop_mprime(proc_mprime): + """Stop mprime gracefully, then forcefully as needed.""" + proc_mprime.terminate() + try: + proc_mprime.wait(timeout=5) + except subprocess.TimeoutExpired: + proc_mprime.kill() + set_apple_fan_speed('auto') + + +def stop_sysbench(proc_sysbench, filehandle_sysbench): + """Stop sysbench.""" + proc_sysbench.terminate() + try: + proc_sysbench.wait(timeout=5) + except subprocess.TimeoutExpired: + proc_sysbench.kill() + filehandle_sysbench.flush() + filehandle_sysbench.close() + set_apple_fan_speed('auto') + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index fdfaba55..64fcabf9 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -1,32 +1,27 @@ """WizardKit: Hardware diagnostics""" -# pylint: disable=too-many-lines # vim: sts=2 sw=2 ts=2 import atexit import logging import os import pathlib -import re import subprocess import time from docopt import docopt -from wk import cfg, debug, exe, graph, log, net, std, tmux -from wk import os as wk_os -from wk.cfg.hw import ( - BADBLOCKS_REGEX, - IO_GRAPH_WIDTH, - IO_ALT_TEST_SIZE_FACTOR, - IO_BLOCK_SIZE, - IO_CHUNK_SIZE, - IO_MINIMUM_TEST_SIZE, - IO_RATE_REGEX, - STATUS_COLORS, - ) +from wk import cfg, debug, exe, log, std, tmux +from wk.cfg.hw import STATUS_COLORS +from wk.hw import benchmark as hw_benchmark +from wk.hw import cpu as hw_cpu from wk.hw import disk as hw_disk from wk.hw import sensors as hw_sensors +from wk.hw import surface_scan as hw_surface_scan from wk.hw import system as hw_system +from wk.hw.audio import audio_test +from wk.hw.keyboard import keyboard_test +from wk.hw.network import network_test +from wk.hw.screensavers import screensaver from wk.hw.test import Test, TestGroup @@ -79,11 +74,6 @@ MENU_TOGGLES = ( ) PLATFORM = std.PLATFORM -# Error Classes -class DeviceTooSmallError(RuntimeError): - """Raised when a device is too small to test.""" - - # Classes class State(): """Object for tracking hardware diagnostic data.""" @@ -407,27 +397,6 @@ class State(): # Functions -def audio_test(): - """Run an OS-specific audio test.""" - if PLATFORM == 'Linux': - audio_test_linux() - - -def audio_test_linux(): - """Run an audio test using amixer and speaker-test.""" - LOG.info('Audio Test') - - # Set volume - for source in ('Master', 'PCM'): - cmd = f'amixer -q set "{source}" 80% unmute'.split() - exe.run_program(cmd, check=False) - - # Run audio tests - for mode in ('pink', 'wav'): - cmd = f'speaker-test -c 2 -l 1 -t {mode}'.split() - exe.run_program(cmd, check=False, pipe=False) - - def build_menu(cli_mode=False, quick_mode=False): # pylint: disable=too-many-branches """Build main menu, returns wk.std.Menu.""" @@ -480,180 +449,6 @@ def build_menu(cli_mode=False, quick_mode=False): return menu -def calc_io_dd_values(dev_size): - """Calculate I/O benchmark dd values, returns dict. - - Calculations: - The minimum dev size is IO_GRAPH_WIDTH * IO_CHUNK_SIZE - (e.g. 1.25 GB for a width of 40 and a chunk size of 32MB) - - read_total is the area to be read in bytes - If the dev is < IO_MINIMUM_TEST_SIZE then it's the whole dev - Else it's the larger of IO_MINIMUM_TEST_SIZE or the alt test size - (determined by dev * IO_ALT_TEST_SIZE_FACTOR) - - read_chunks is the number of groups of IO_CHUNK_SIZE in test_obj.dev - This number is reduced to a multiple of IO_GRAPH_WIDTH in order - to allow for the data to be condensed cleanly - - read_blocks is the chunk size in number of blocks - (e.g. 64 if block size is 512KB and chunk size is 32MB - - skip_total is the number of IO_BLOCK_SIZE groups not tested - skip_blocks is the number of blocks to skip per IO_CHUNK_SIZE - skip_extra_rate is how often to add an additional skip block - This is needed to ensure an even testing across the dev - This is calculated by using the fractional amount left off - of the skip_blocks variable - """ - read_total = min(IO_MINIMUM_TEST_SIZE, dev_size) - read_total = max(read_total, dev_size*IO_ALT_TEST_SIZE_FACTOR) - read_chunks = int(read_total // IO_CHUNK_SIZE) - read_chunks -= read_chunks % IO_GRAPH_WIDTH - if read_chunks < IO_GRAPH_WIDTH: - raise DeviceTooSmallError - read_blocks = int(IO_CHUNK_SIZE / IO_BLOCK_SIZE) - read_total = read_chunks * IO_CHUNK_SIZE - skip_total = int((dev_size - read_total) // IO_BLOCK_SIZE) - skip_blocks = int((skip_total / read_chunks) // 1) - skip_extra_rate = 0 - try: - skip_extra_rate = 1 + int(1 / ((skip_total / read_chunks) % 1)) - except ZeroDivisionError: - # skip_extra_rate == 0 is fine - pass - - # Done - return { - 'Read Chunks': read_chunks, - 'Read Blocks': read_blocks, - 'Skip Blocks': skip_blocks, - 'Skip Extra': skip_extra_rate, - } - - -def check_cooling_results(test_obj, sensors, run_sysbench=False): - """Check cooling results and update test_obj.""" - max_temp = sensors.cpu_max_temp() - temp_labels = ['Idle', 'Max', 'Cooldown'] - if run_sysbench: - temp_labels.append('Sysbench') - - # Check temps - if not max_temp: - test_obj.set_status('Unknown') - elif max_temp >= cfg.hw.CPU_FAILURE_TEMP: - test_obj.failed = True - test_obj.set_status('Failed') - elif 'Aborted' not in test_obj.status: - test_obj.passed = True - test_obj.set_status('Passed') - - # Add temps to report - for line in sensors.generate_report(*temp_labels, only_cpu=True): - test_obj.report.append(f' {line}') - - -def check_io_benchmark_results(test_obj, rate_list, graph_width): - """Generate colored report using rate_list, returns list of str.""" - avg_read = sum(rate_list) / len(rate_list) - min_read = min(rate_list) - max_read = max(rate_list) - if test_obj.dev.ssd: - thresh_min = cfg.hw.THRESH_SSD_MIN - thresh_avg_high = cfg.hw.THRESH_SSD_AVG_HIGH - thresh_avg_low = cfg.hw.THRESH_SSD_AVG_LOW - else: - thresh_min = cfg.hw.THRESH_HDD_MIN - thresh_avg_high = cfg.hw.THRESH_HDD_AVG_HIGH - thresh_avg_low = cfg.hw.THRESH_HDD_AVG_LOW - - # Add horizontal graph to report - for line in graph.generate_horizontal_graph(rate_list, graph_width): - if not std.strip_colors(line).strip(): - # Skip empty lines - continue - test_obj.report.append(line) - - # Add read rates to report - test_obj.report.append( - f'Read speeds avg: {avg_read/(1000**2):3.1f}' - f' min: {min_read/(1000**2):3.1f}' - f' max: {max_read/(1000**2):3.1f}' - ) - - # Compare against thresholds - if min_read <= thresh_min and avg_read <= thresh_avg_high: - test_obj.failed = True - elif avg_read <= thresh_avg_low: - test_obj.failed = True - else: - test_obj.passed = True - - # Set status - if test_obj.failed: - test_obj.set_status('Failed') - elif test_obj.passed: - test_obj.set_status('Passed') - else: - test_obj.set_status('Unknown') - - -def check_mprime_results(test_obj, working_dir): - """Check mprime log files and update test_obj.""" - passing_lines = {} - warning_lines = {} - - def _read_file(log_name): - """Read file and split into lines, returns list.""" - lines = [] - try: - with open(f'{working_dir}/{log_name}', 'r', encoding='utf-8') as _f: - lines = _f.readlines() - except FileNotFoundError: - # File may be missing on older systems - lines = [] - - return lines - - # results.txt (check if failed) - for line in _read_file('results.txt'): - line = line.strip() - if re.search(r'(error|fail)', line, re.IGNORECASE): - warning_lines[line] = None - - # print.log (check if passed) - for line in _read_file('prime.log'): - line = line.strip() - match = re.search( - r'(completed.*(\d+) errors, (\d+) warnings)', line, re.IGNORECASE) - if match: - if int(match.group(2)) + int(match.group(3)) > 0: - # Errors and/or warnings encountered - warning_lines[match.group(1).capitalize()] = None - else: - # No errors/warnings - passing_lines[match.group(1).capitalize()] = None - - # Update status - if warning_lines: - test_obj.failed = True - test_obj.set_status('Failed') - elif passing_lines and 'Aborted' not in test_obj.status: - test_obj.passed = True - test_obj.set_status('Passed') - else: - test_obj.set_status('Unknown') - - # Update report - for line in passing_lines: - test_obj.report.append(f' {line}') - for line in warning_lines: - test_obj.report.append(std.color_string(f' {line}', 'YELLOW')) - if not (passing_lines or warning_lines): - test_obj.report.append(std.color_string(' Unknown result', 'YELLOW')) - - def check_self_test_results(test_obj, aborted=False): """Check SMART self-test results.""" test_obj.report.append(std.color_string('Self-Test', 'BLUE')) @@ -726,8 +521,8 @@ def cpu_stress_tests(state, test_objects): # Stress CPU std.print_info('Running stress test') - set_apple_fan_speed('max') - proc_mprime = start_mprime(state.log_dir, prime_log) + hw_cpu.set_apple_fan_speed('max') + proc_mprime = hw_cpu.start_mprime(state.log_dir, prime_log) # Show countdown print('') @@ -737,7 +532,7 @@ def cpu_stress_tests(state, test_objects): aborted = True # Stop Prime95 - stop_mprime(proc_mprime) + hw_cpu.stop_mprime(proc_mprime) # Update progress if necessary if sensors.cpu_reached_critical_temp() or aborted: @@ -754,7 +549,9 @@ def cpu_stress_tests(state, test_objects): # Check Prime95 results test_mprime_obj.report.append(std.color_string('Prime95', 'BLUE')) - check_mprime_results(test_obj=test_mprime_obj, working_dir=state.log_dir) + hw_cpu.check_mprime_results( + test_obj=test_mprime_obj, working_dir=state.log_dir, + ) # Run Sysbench test if necessary run_sysbench = ( @@ -767,7 +564,7 @@ def cpu_stress_tests(state, test_objects): std.clear_screen() std.print_info('Running alternate stress test') print('') - proc_sysbench, filehandle_sysbench = start_sysbench( + proc_sysbench, filehandle_sysbench = hw_cpu.start_sysbench( sensors, sensors_out, log_path=prime_log.with_name('sysbench.log'), @@ -780,7 +577,7 @@ def cpu_stress_tests(state, test_objects): LOG.error('Failed to find sysbench process', exc_info=True) except KeyboardInterrupt: aborted = True - stop_sysbench(proc_sysbench, filehandle_sysbench) + hw_cpu.stop_sysbench(proc_sysbench, filehandle_sysbench) # Update progress # NOTE: CPU critical temp check isn't really necessary @@ -792,7 +589,7 @@ def cpu_stress_tests(state, test_objects): # Check Cooling results test_cooling_obj.report.append(std.color_string('Temps', 'BLUE')) - check_cooling_results(test_cooling_obj, sensors, run_sysbench) + hw_cpu.check_cooling_results(test_cooling_obj, sensors, run_sysbench) # Cleanup state.update_progress_pane() @@ -832,77 +629,6 @@ def disk_io_benchmark(state, test_objects, skip_usb=True): LOG.info('Disk I/O Benchmark (dd)') aborted = False - def _run_io_benchmark(test_obj, log_path): - """Run I/O benchmark and handle exceptions.""" - dev_path = test_obj.dev.path - if PLATFORM == 'Darwin': - # Use "RAW" disks under macOS - dev_path = dev_path.with_name(f'r{dev_path.name}') - LOG.info('Using %s for better performance', dev_path) - offset = 0 - read_rates = [] - test_obj.report.append(std.color_string('I/O Benchmark', 'BLUE')) - - # Get dd values or bail - try: - dd_values = calc_io_dd_values(test_obj.dev.size) - except DeviceTooSmallError: - test_obj.set_status('N/A') - test_obj.report.append( - std.color_string('Disk too small to test', 'YELLOW'), - ) - return - - # Run dd read tests - for _i in range(dd_values['Read Chunks']): - _i += 1 - - # Build cmd - skip = dd_values['Skip Blocks'] - if dd_values['Skip Extra'] and _i % dd_values['Skip Extra'] == 0: - skip += 1 - cmd = [ - 'sudo', 'dd', - f'bs={IO_BLOCK_SIZE}', - f'skip={offset+skip}', - f'count={dd_values["Read Blocks"]}', - f'if={dev_path}', - 'of=/dev/null', - ] - if PLATFORM == 'Linux': - cmd.append('iflag=direct') - - # Run and get read rate - try: - proc = exe.run_program( - cmd, - pipe=False, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - except PermissionError as err: - # Since we're using sudo we can't kill dd - # Assuming this happened during a CTRL+c - raise KeyboardInterrupt from err - match = IO_RATE_REGEX.search(proc.stdout) - if match: - read_rates.append( - int(match.group('bytes')) / float(match.group('seconds')), - ) - match.group(1) - - # Show progress - with open(log_path, 'a', encoding='utf-8') as _f: - if _i % 5 == 0: - percent = (_i / dd_values['Read Chunks']) * 100 - _f.write(f' {graph.vertical_graph_line(percent, read_rates[-1])}\n') - - # Update offset - offset += dd_values['Read Blocks'] + skip - - # Check results - check_io_benchmark_results(test_obj, read_rates, IO_GRAPH_WIDTH) - # Run benchmarks state.update_top_pane( f'Disk I/O Benchmark{"s" if len(test_objects) > 1 else ""}', @@ -935,7 +661,7 @@ def disk_io_benchmark(state, test_objects, skip_usb=True): ) state.update_progress_pane() try: - _run_io_benchmark(test, test_log) + hw_benchmark.run_io_test(test, test_log) except KeyboardInterrupt: aborted = True except (subprocess.CalledProcessError, TypeError, ValueError) as err: @@ -1052,65 +778,6 @@ def disk_surface_scan(state, test_objects): threads = [] state.panes['badblocks'] = [] - def _run_surface_scan(test_obj, log_path): - """Run surface scan and handle exceptions.""" - block_size = '1024' - dev = test_obj.dev - dev_path = test_obj.dev.path - if PLATFORM == 'Darwin': - # Use "RAW" disks under macOS - dev_path = dev_path.with_name(f'r{dev_path.name}') - LOG.info('Using %s for better performance', dev_path) - test_obj.report.append(std.color_string('badblocks', 'BLUE')) - test_obj.set_status('Working') - - # Increase block size if necessary - if (dev.phy_sec == 4096 - or dev.size >= cfg.hw.BADBLOCKS_LARGE_DISK): - block_size = '4096' - - # Start scan - cmd = ['sudo', 'badblocks', '-sv', '-b', block_size, '-e', '1', dev_path] - with open(log_path, 'a', encoding='utf-8') as _f: - size_str = std.bytes_to_string(dev.size, use_binary=False) - _f.write( - std.color_string( - ['[', dev.path.name, ' ', size_str, ']\n'], - [None, 'BLUE', None, 'CYAN', None], - sep='', - ), - ) - _f.flush() - exe.run_program( - cmd, - check=False, - pipe=False, - stderr=subprocess.STDOUT, - stdout=_f, - ) - - # Check results - with open(log_path, 'r', encoding='utf-8') as _f: - for line in _f.readlines(): - line = std.strip_colors(line.strip()) - if not line or line.startswith('Checking') or line.startswith('['): - # Skip - continue - match = BADBLOCKS_REGEX.search(line) - if match: - if all(s == '0' for s in match.groups()): - test_obj.passed = True - test_obj.report.append(f' {line}') - test_obj.set_status('Passed') - else: - test_obj.failed = True - test_obj.report.append(f' {std.color_string(line, "YELLOW")}') - test_obj.set_status('Failed') - else: - test_obj.report.append(f' {std.color_string(line, "YELLOW")}') - if not (test_obj.passed or test_obj.failed): - test_obj.set_status('Unknown') - # Update panes state.update_top_pane( f'Disk Surface Scan{"s" if len(test_objects) > 1 else ""}', @@ -1141,7 +808,9 @@ def disk_surface_scan(state, test_objects): # Start thread test_log = f'{state.log_dir}/{test.dev.path.name}_badblocks.log' - threads.append(exe.start_thread(_run_surface_scan, args=(test, test_log))) + threads.append(exe.start_thread( + hw_surface_scan.run_scan, args=(test, test_log), + )) # Show progress if threads[-1].is_alive(): @@ -1182,13 +851,6 @@ def disk_surface_scan(state, test_objects): raise std.GenericAbort('Aborted') -def keyboard_test(): - """Test keyboard using xev.""" - LOG.info('Keyboard Test (xev)') - cmd = ['xev', '-event', 'keyboard'] - exe.run_program(cmd, check=False, pipe=False) - - def main(): # pylint: disable=too-many-branches """Main function for hardware diagnostics.""" @@ -1262,39 +924,6 @@ def main(): state.update_top_pane('Main Menu') -def network_test(): - """Run network tests.""" - LOG.info('Network Test') - try_and_print = std.TryAndPrint() - result = try_and_print.run( - message='Network connection...', - function=net.connected_to_private_network, - msg_good='OK', - raise_on_error=True, - ) - - # Bail if not connected - if result['Failed']: - std.print_warning('Please connect to a network and try again') - std.pause('Press Enter to return to main menu...') - return - - # Show IP address(es) - net.show_valid_addresses() - - # Ping tests - try_and_print.run( - 'Internet connection...', net.ping, msg_good='OK', addr='8.8.8.8') - try_and_print.run( - 'DNS resolution...', net.ping, msg_good='OK', addr='google.com') - - # Speedtest - try_and_print.run('Speedtest...', net.speedtest) - - # Done - std.pause('Press Enter to return to main menu...') - - def print_countdown(proc, seconds): """Print countdown to screen while proc is alive.""" for i in range(seconds): @@ -1376,51 +1005,6 @@ def run_diags(state, menu, quick_mode=False): std.pause('Press Enter to return to main menu...') -def screensaver(name): - """Show screensaver""" - LOG.info('Screensaver (%s)', name) - if name == 'matrix': - cmd = ['cmatrix', '-abs'] - elif name == 'pipes': - cmd = [ - 'pipes.sh', - '-t', '0', - '-t', '1', - '-t', '2', - '-t', '3', - '-t', '5', - '-R', '-r', '4000', - ] - - # Switch pane to fullscreen and start screensaver - tmux.zoom_pane() - exe.run_program(cmd, check=False, pipe=False, stderr=subprocess.PIPE) - tmux.zoom_pane() - - -def set_apple_fan_speed(speed): - """Set Apple fan speed.""" - cmd = None - - # Check - if speed not in ('auto', 'max'): - raise RuntimeError(f'Invalid speed {speed}') - - # Set cmd - if PLATFORM == 'Darwin': - try: - wk_os.mac.set_fans(speed) - except (RuntimeError, ValueError, subprocess.CalledProcessError) as err: - LOG.error('Failed to set fans to %s', speed) - LOG.error('Error: %s', err) - std.print_error(f'Failed to set fans to {speed}') - for line in str(err).splitlines(): - std.print_warning(f' {line.strip()}') - elif PLATFORM == 'Linux': - cmd = ['apple-fans', speed] - exe.run_program(cmd, check=False) - - def show_results(state): """Show test results by device.""" std.sleep(0.5) @@ -1450,84 +1034,6 @@ def show_results(state): std.print_standard(' ') -def start_mprime(working_dir, log_path): - """Start mprime and save filtered output to log, returns Popen object.""" - set_apple_fan_speed('max') - proc_mprime = subprocess.Popen( # pylint: disable=consider-using-with - ['mprime', '-t'], - cwd=working_dir, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - proc_grep = subprocess.Popen( # pylint: disable=consider-using-with - 'grep --ignore-case --invert-match --line-buffered stress.txt'.split(), - stdin=proc_mprime.stdout, - stdout=subprocess.PIPE, - ) - proc_mprime.stdout.close() - save_nsbr = exe.NonBlockingStreamReader(proc_grep.stdout) - exe.start_thread( - save_nsbr.save_to_file, - args=(proc_grep, log_path), - ) - - # Return objects - return proc_mprime - - -def start_sysbench(sensors, sensors_out, log_path, pane): - """Start sysbench, returns tuple with Popen object and file handle.""" - set_apple_fan_speed('max') - sysbench_cmd = [ - 'sysbench', - f'--threads={exe.psutil.cpu_count()}', - '--cpu-max-prime=1000000000', - 'cpu', - 'run', - ] - - # Restart background monitor for Sysbench - sensors.stop_background_monitor() - sensors.start_background_monitor( - sensors_out, - alt_max='Sysbench', - thermal_action=('killall', 'sysbench', '-INT'), - ) - - # Update bottom pane - tmux.respawn_pane(pane, watch_file=log_path, watch_cmd='tail') - - # Start sysbench - filehandle_sysbench = open( # pylint: disable=consider-using-with - log_path, 'a', encoding='utf-8', - ) - proc_sysbench = exe.popen_program(sysbench_cmd, stdout=filehandle_sysbench) - - # Done - return (proc_sysbench, filehandle_sysbench) - -def stop_mprime(proc_mprime): - """Stop mprime gracefully, then forcefully as needed.""" - proc_mprime.terminate() - try: - proc_mprime.wait(timeout=5) - except subprocess.TimeoutExpired: - proc_mprime.kill() - set_apple_fan_speed('auto') - - -def stop_sysbench(proc_sysbench, filehandle_sysbench): - """Stop sysbench.""" - proc_sysbench.terminate() - try: - proc_sysbench.wait(timeout=5) - except subprocess.TimeoutExpired: - proc_sysbench.kill() - filehandle_sysbench.flush() - filehandle_sysbench.close() - set_apple_fan_speed('auto') - - def sync_clock(): """Sync clock under macOS using sntp.""" cmd = ['sudo', 'sntp', '-Ss', 'us.pool.ntp.org'] diff --git a/scripts/wk/hw/keyboard.py b/scripts/wk/hw/keyboard.py new file mode 100644 index 00000000..20a3db0a --- /dev/null +++ b/scripts/wk/hw/keyboard.py @@ -0,0 +1,31 @@ +"""WizardKit: Keyboard test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging + +from wk.exe import run_program +from wk.std import PLATFORM, print_warning + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Functions +def keyboard_test(): + """Test keyboard using OS specific functions.""" + if PLATFORM == 'Linux': + run_xev() + else: + print_warning(f'Not supported under this OS: {PLATFORM}') + + +def run_xev(): + """Test keyboard using xev.""" + LOG.info('Keyboard Test (xev)') + cmd = ['xev', '-event', 'keyboard'] + run_program(cmd, check=False, pipe=False) + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/network.py b/scripts/wk/hw/network.py new file mode 100644 index 00000000..68ab30d9 --- /dev/null +++ b/scripts/wk/hw/network.py @@ -0,0 +1,58 @@ +"""WizardKit: Network test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging + +from wk.net import ( + connected_to_private_network, + ping, + show_valid_addresses, + speedtest, + ) +from wk.std import ( + TryAndPrint, + pause, + print_warning, + ) + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Functions +def network_test(): + """Run network tests.""" + LOG.info('Network Test') + try_and_print = TryAndPrint() + result = try_and_print.run( + message='Network connection...', + function=connected_to_private_network, + msg_good='OK', + raise_on_error=True, + ) + + # Bail if not connected + if result['Failed']: + print_warning('Please connect to a network and try again') + pause('Press Enter to return to main menu...') + return + + # Show IP address(es) + show_valid_addresses() + + # Ping tests + try_and_print.run( + 'Internet connection...', ping, msg_good='OK', addr='8.8.8.8') + try_and_print.run( + 'DNS resolution...', ping, msg_good='OK', addr='google.com') + + # Speedtest + try_and_print.run('Speedtest...', speedtest) + + # Done + pause('Press Enter to return to main menu...') + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/screensavers.py b/scripts/wk/hw/screensavers.py new file mode 100644 index 00000000..9bdfb719 --- /dev/null +++ b/scripts/wk/hw/screensavers.py @@ -0,0 +1,40 @@ +"""WizardKit: Screensaver functions""" +# vim: sts=2 sw=2 ts=2 + +import logging + +from subprocess import PIPE + +from wk.exe import run_program +from wk.tmux import zoom_pane as tmux_zoom_pane + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Functions +def screensaver(name): + """Show screensaver""" + LOG.info('Screensaver (%s)', name) + if name == 'matrix': + cmd = ['cmatrix', '-abs'] + elif name == 'pipes': + cmd = [ + 'pipes.sh', + '-t', '0', + '-t', '1', + '-t', '2', + '-t', '3', + '-t', '5', + '-R', '-r', '4000', + ] + + # Switch pane to fullscreen and start screensaver + tmux_zoom_pane() + run_program(cmd, check=False, pipe=False, stderr=PIPE) + tmux_zoom_pane() + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") diff --git a/scripts/wk/hw/surface_scan.py b/scripts/wk/hw/surface_scan.py new file mode 100644 index 00000000..085bc52c --- /dev/null +++ b/scripts/wk/hw/surface_scan.py @@ -0,0 +1,85 @@ +"""WizardKit: Surface scan test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging + +from subprocess import STDOUT + +from wk.cfg.hw import BADBLOCKS_LARGE_DISK, BADBLOCKS_REGEX +from wk.exe import run_program +from wk.std import ( + PLATFORM, + bytes_to_string, + color_string, + strip_colors, + ) + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Functions +def run_scan(test_obj, log_path): + """Run surface scan and handle exceptions.""" + block_size = '1024' + dev = test_obj.dev + dev_path = test_obj.dev.path + if PLATFORM == 'Darwin': + # Use "RAW" disks under macOS + dev_path = dev_path.with_name(f'r{dev_path.name}') + LOG.info('Using %s for better performance', dev_path) + test_obj.report.append(color_string('badblocks', 'BLUE')) + test_obj.set_status('Working') + + # Increase block size if necessary + if (dev.phy_sec == 4096 + or dev.size >= BADBLOCKS_LARGE_DISK): + block_size = '4096' + + # Start scan + cmd = ['sudo', 'badblocks', '-sv', '-b', block_size, '-e', '1', dev_path] + with open(log_path, 'a', encoding='utf-8') as _f: + size_str = bytes_to_string(dev.size, use_binary=False) + _f.write( + color_string( + ['[', dev.path.name, ' ', size_str, ']\n'], + [None, 'BLUE', None, 'CYAN', None], + sep='', + ), + ) + _f.flush() + run_program( + cmd, + check=False, + pipe=False, + stderr=STDOUT, + stdout=_f, + ) + + # Check results + with open(log_path, 'r', encoding='utf-8') as _f: + for line in _f.readlines(): + line = strip_colors(line.strip()) + if not line or line.startswith('Checking') or line.startswith('['): + # Skip + continue + match = BADBLOCKS_REGEX.search(line) + if match: + if all(s == '0' for s in match.groups()): + test_obj.passed = True + test_obj.report.append(f' {line}') + test_obj.set_status('Passed') + else: + test_obj.failed = True + test_obj.report.append(f' {color_string(line, "YELLOW")}') + test_obj.set_status('Failed') + else: + test_obj.report.append(f' {color_string(line, "YELLOW")}') + if not (test_obj.passed or test_obj.failed): + test_obj.set_status('Unknown') + + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") From 46eb737dc8dd2305569e10a86076647fb08c5811 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Wed, 6 Apr 2022 16:22:58 -0600 Subject: [PATCH 08/18] Move SMART functions to their own file --- scripts/wk/hw/__init__.py | 1 + scripts/wk/hw/ddrescue.py | 21 +- scripts/wk/hw/diags.py | 77 ++----- scripts/wk/hw/disk.py | 364 +-------------------------------- scripts/wk/hw/smart.py | 414 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 457 insertions(+), 420 deletions(-) create mode 100644 scripts/wk/hw/smart.py diff --git a/scripts/wk/hw/__init__.py b/scripts/wk/hw/__init__.py index 821c9761..fca26622 100644 --- a/scripts/wk/hw/__init__.py +++ b/scripts/wk/hw/__init__.py @@ -10,6 +10,7 @@ from . import keyboard from . import network from . import screensavers from . import sensors +from . import smart from . import surface_scan from . import system from . import test diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 9267e83f..4d757c27 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -28,6 +28,13 @@ from wk.cfg.ddrescue import ( DDRESCUE_SPECIFIC_PASS_SETTINGS, ) from wk.hw import disk as hw_disk +from wk.hw.smart import ( + CriticalHardwareError, + SMARTNotSupportedError, + SMARTSelfTestInProgressError, + safety_checks, + update_smart_details, + ) # STATIC VARIABLES @@ -947,8 +954,8 @@ class State(): def safety_check_destination(self): """Run safety checks for destination and abort if necessary.""" try: - self.destination.safety_checks() - except hw_disk.CriticalHardwareError as err: + safety_checks(self.destination) + except CriticalHardwareError as err: std.print_error( f'Critical error(s) detected for: {self.destination.path}', ) @@ -1491,12 +1498,12 @@ def check_destination_health(destination): # Run safety checks try: - destination.safety_checks() - except hw_disk.CriticalHardwareError: + safety_checks(destination) + except CriticalHardwareError: result = 'Critical hardware error detected on destination' - except hw_disk.SMARTSelfTestInProgressError: + except SMARTSelfTestInProgressError: result = 'SMART self-test in progress on destination' - except hw_disk.SMARTNotSupportedError: + except SMARTNotSupportedError: pass # Done @@ -2031,7 +2038,7 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): def _update_smart_pane(): """Update SMART pane every 30 seconds.""" - state.source.update_smart_details() + update_smart_details(state.source) now = datetime.datetime.now(tz=TIMEZONE).strftime('%Y-%m-%d %H:%M %Z') with open(f'{state.log_dir}/smart.out', 'w', encoding='utf-8') as _f: _f.write( diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 64fcabf9..051ab228 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -21,6 +21,16 @@ from wk.hw import system as hw_system from wk.hw.audio import audio_test from wk.hw.keyboard import keyboard_test from wk.hw.network import network_test +from wk.hw.smart import ( + CriticalHardwareError, + SMARTSelfTestInProgressError, + abort_self_test, + check_attributes, + check_self_test_results, + generate_attribute_report, + run_self_test, + safety_checks, + ) from wk.hw.screensavers import screensaver from wk.hw.test import Test, TestGroup @@ -121,8 +131,8 @@ class State(): continue try: - disk.safety_checks() - except hw_disk.CriticalHardwareError: + safety_checks(disk) + except CriticalHardwareError: disable_tests = True disk.add_note('Critical hardware error detected.', 'RED') if 'Disk Attributes' in disk.tests: @@ -135,7 +145,7 @@ class State(): 'Critical hardware error detected during diagnostics', 'YELLOW', ) - except hw_disk.SMARTSelfTestInProgressError as err: + except SMARTSelfTestInProgressError as err: if prep: std.print_warning(f'SMART self-test(s) in progress for {disk.path}') if std.ask('Continue with all tests disabled for this device?'): @@ -160,7 +170,7 @@ class State(): if ( 'Disk Attributes' in disk.tests and not disk.tests['Disk Attributes'].failed - and not disk.check_attributes(only_blocking=False) + and not check_attributes(disk, only_blocking=False) ): # No blocking errors encountered, but found minor attribute failures if not prep: @@ -449,32 +459,6 @@ def build_menu(cli_mode=False, quick_mode=False): return menu -def check_self_test_results(test_obj, aborted=False): - """Check SMART self-test results.""" - test_obj.report.append(std.color_string('Self-Test', 'BLUE')) - if test_obj.disabled or test_obj.status == 'Denied': - test_obj.report.append(std.color_string(f' {test_obj.status}', 'RED')) - elif test_obj.status == 'N/A' or not test_obj.dev.attributes: - test_obj.report.append(std.color_string(f' {test_obj.status}', 'YELLOW')) - else: - # Not updating SMART data here to preserve the test status for the report - # For instance if the test was aborted the report should inlcude the last - # known progress instead of just "was aborted buy host" - test_details = test_obj.dev.get_smart_self_test_details() - test_result = test_details.get('status', {}).get('string', 'Unknown') - test_obj.report.append(f' {test_result.capitalize()}') - if aborted and not (test_obj.passed or test_obj.failed): - test_obj.report.append(std.color_string(' Aborted', 'YELLOW')) - test_obj.set_status('Aborted') - elif test_obj.status == 'TimedOut': - test_obj.report.append(std.color_string(' TimedOut', 'YELLOW')) - test_obj.set_status('TimedOut') - else: - test_obj.failed = not test_obj.passed - if test_obj.failed: - test_obj.set_status('Failed') - - def cpu_stress_tests(state, test_objects): # pylint: disable=too-many-statements """CPU & cooling check using Prime95 and Sysbench.""" @@ -612,7 +596,7 @@ def disk_attribute_check(state, test_objects): test.set_status('N/A') continue - if test.dev.check_attributes(): + if check_attributes(test.dev): test.passed = True test.set_status('Passed') else: @@ -695,31 +679,6 @@ def disk_self_test(state, test_objects): threads = [] state.panes['SMART'] = [] - def _run_self_test(test_obj, log_path): - """Run self-test and handle exceptions.""" - result = None - - try: - test_obj.passed = test_obj.dev.run_self_test(log_path) - except TimeoutError: - test_obj.failed = True - result = 'TimedOut' - except hw_disk.SMARTNotSupportedError: - # Pass test since it doesn't apply - test_obj.passed = True - result = 'N/A' - - # Set status - if result: - test_obj.set_status(result) - else: - if test_obj.failed: - test_obj.set_status('Failed') - elif test_obj.passed: - test_obj.set_status('Passed') - else: - test_obj.set_status('Unknown') - # Run self-tests state.update_top_pane( f'Disk self-test{"s" if len(test_objects) > 1 else ""}', @@ -733,7 +692,7 @@ def disk_self_test(state, test_objects): # Start thread test.set_status('Working') test_log = f'{state.log_dir}/{test.dev.path.name}_selftest.log' - threads.append(exe.start_thread(_run_self_test, args=(test, test_log))) + threads.append(exe.start_thread(run_self_test, args=(test, test_log))) # Show progress if threads[-1].is_alive(): @@ -752,7 +711,7 @@ def disk_self_test(state, test_objects): except KeyboardInterrupt: aborted = True for test in test_objects: - test.dev.abort_self_test() + abort_self_test(test.dev) std.sleep(0.5) # Save report(s) @@ -787,7 +746,7 @@ def disk_surface_scan(state, test_objects): ) for disk in state.disks: failed_attributes = [ - line for line in disk.generate_attribute_report() if 'failed' in line + line for line in generate_attribute_report(disk) if 'failed' in line ] if failed_attributes: size_str = std.bytes_to_string(disk.size, use_binary=False) diff --git a/scripts/wk/hw/disk.py b/scripts/wk/hw/disk.py index 5703e1d2..471edc31 100644 --- a/scripts/wk/hw/disk.py +++ b/scripts/wk/hw/disk.py @@ -9,25 +9,15 @@ import re from dataclasses import dataclass, field from typing import Any, Union -from wk.cfg.hw import ( - ATTRIBUTE_COLORS, - KEY_NVME, - KEY_SMART, - KNOWN_DISK_ATTRIBUTES, - KNOWN_DISK_MODELS, - NVME_WARNING_KEYS, - REGEX_POWER_ON_TIME, - SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS, - ) from wk.cfg.main import KIT_NAME_SHORT from wk.exe import get_json_from_command, run_program from wk.hw.test import Test -from wk.std import ( - PLATFORM, - bytes_to_string, - color_string, - sleep, +from wk.hw.smart import ( + enable_smart, + generate_attribute_report, + update_smart_details, ) +from wk.std import PLATFORM, bytes_to_string, color_string # STATIC VARIABLES @@ -38,17 +28,6 @@ WK_LABEL_REGEX = re.compile( ) -# Exception Classes -class CriticalHardwareError(RuntimeError): - """Exception used for critical hardware failures.""" - -class SMARTNotSupportedError(TypeError): - """Exception used for disks lacking SMART support.""" - -class SMARTSelfTestInProgressError(RuntimeError): - """Exception used when a SMART self-test is in progress.""" - - # Classes @dataclass(slots=True) class Disk: @@ -76,23 +55,18 @@ class Disk: self.path = pathlib.Path(self.path).resolve() self.get_details() self.set_description() - self.enable_smart() - self.update_smart_details() + enable_smart(self) + update_smart_details(self) if not self.attributes and self.bus == 'USB': # Try using SAT LOG.warning('Using SAT for smartctl for %s', self.path) self.notes = [] self.use_sat = True - self.enable_smart() - self.update_smart_details() + enable_smart(self) + update_smart_details(self) if not self.is_4k_aligned(): self.add_note('One or more partitions are not 4K aligned', 'YELLOW') - def abort_self_test(self) -> None: - """Abort currently running non-captive self-test.""" - cmd = ['sudo', 'smartctl', '--abort', self.path] - run_program(cmd, check=False) - def add_note(self, note, color=None) -> None: """Add note that will be included in the disk report.""" if color: @@ -101,40 +75,6 @@ class Disk: self.notes.append(note) self.notes.sort() - def check_attributes(self, only_blocking=False) -> bool: - """Check if any known attributes are failing, returns bool.""" - attributes_ok = True - known_attributes = get_known_disk_attributes(self.model) - for attr, value in self.attributes.items(): - # Skip unknown attributes - if attr not in known_attributes: - continue - - # Get thresholds - blocking_attribute = known_attributes[attr].get('Blocking', False) - err_thresh = known_attributes[attr].get('Error', None) - max_thresh = known_attributes[attr].get('Maximum', None) - if not max_thresh: - max_thresh = float('inf') - - # Skip non-blocking attributes if necessary - if only_blocking and not blocking_attribute: - continue - - # Skip informational attributes - if not err_thresh: - continue - - # Check attribute - if known_attributes[attr].get('PercentageLife', False): - if 0 <= value['raw'] <= err_thresh: - attributes_ok = False - elif err_thresh <= value['raw'] < max_thresh: - attributes_ok = False - - # Done - return attributes_ok - def disable_disk_tests(self) -> None: """Disable all tests.""" LOG.warning('Disabling all tests for: %s', self.path) @@ -143,73 +83,6 @@ class Disk: test.set_status('Denied') test.disabled = True - def enable_smart(self) -> None: - """Try enabling SMART for this disk.""" - cmd = [ - 'sudo', - 'smartctl', - f'--device={"sat,auto" if self.use_sat else "auto"}', - '--tolerance=permissive', - '--smart=on', - self.path, - ] - run_program(cmd, check=False) - - def generate_attribute_report(self) -> list[str]: - """Generate attribute report, returns list.""" - known_attributes = get_known_disk_attributes(self.model) - report = [] - for attr, value in sorted(self.attributes.items()): - note = '' - value_color = 'GREEN' - - # Skip attributes not in our list - if attr not in known_attributes: - continue - - # Check for attribute note - note = known_attributes[attr].get('Note', '') - - # ID / Name - label = f'{attr:>3}' - if isinstance(attr, int): - # Assuming SMART, include hex ID and name - label += f' / {str(hex(attr))[2:].upper():0>2}: {value["name"]}' - label = f' {label.replace("_", " "):38}' - - # Value color - if known_attributes[attr].get('PercentageLife', False): - # PercentageLife values - if 0 <= value['raw'] <= known_attributes[attr]['Error']: - value_color = 'RED' - note = '(failed, % life remaining)' - elif value['raw'] < 0 or value['raw'] > 100: - value_color = 'PURPLE' - note = '(invalid?)' - else: - for threshold, color in ATTRIBUTE_COLORS: - threshold_val = known_attributes[attr].get(threshold, None) - if threshold_val and value['raw'] >= threshold_val: - value_color = color - if threshold == 'Error': - note = '(failed)' - elif threshold == 'Maximum': - note = '(invalid?)' - - # 199/C7 warning - if str(attr) == '199' and value['raw'] > 0: - note = '(bad cable?)' - - # Build colored string and append to report - line = color_string( - [label, value['raw_str'], note], - [None, value_color, 'YELLOW'], - ) - report.append(line) - - # Done - return report - def generate_report(self, header=True) -> list[str]: """Generate Disk report, returns list.""" report = [] @@ -221,7 +94,7 @@ class Disk: if self.attributes: if header: report.append(color_string('Attributes', 'BLUE')) - report.extend(self.generate_attribute_report()) + report.extend(generate_attribute_report(self)) # Notes if self.notes: @@ -294,18 +167,6 @@ class Disk: # Done return labels - def get_smart_self_test_details(self) -> dict[Any, Any]: - """Shorthand to get deeply nested self-test details, returns dict.""" - details = {} - try: - details = self.raw_smartctl['ata_smart_data']['self_test'] - except (KeyError, TypeError): - # Assuming disk lacks SMART support, ignore and return empty dict. - pass - - # Done - return details - def is_4k_aligned(self) -> bool: """Check that all disk partitions are aligned, returns bool.""" aligned = True @@ -316,138 +177,6 @@ class Disk: return aligned - def safety_checks(self) -> None: - """Run safety checks and raise an exception if necessary.""" - blocking_event_encountered = False - self.update_smart_details() - - # Attributes - if not self.check_attributes(only_blocking=True): - blocking_event_encountered = True - LOG.error('%s: Blocked for failing attribute(s)', self.path) - - # NVMe status - nvme_status = self.raw_smartctl.get('smart_status', {}).get('nvme', {}) - if nvme_status.get('media_read_only', False): - blocking_event_encountered = True - msg = 'Media has been placed in read-only mode' - self.add_note(msg, 'RED') - LOG.error('%s %s', self.path, msg) - for key in NVME_WARNING_KEYS: - if nvme_status.get(key, False): - msg = key.replace('_', ' ') - self.add_note(msg, 'YELLOW') - LOG.warning('%s %s', self.path, msg) - - # SMART overall assessment - smart_passed = True - try: - smart_passed = self.raw_smartctl['smart_status']['passed'] - except (KeyError, TypeError): - # Assuming disk doesn't support SMART overall assessment - pass - if not smart_passed: - blocking_event_encountered = True - msg = 'SMART overall self-assessment: Failed' - self.add_note(msg, 'RED') - LOG.error('%s %s', self.path, msg) - - # Raise blocking exception if necessary - if blocking_event_encountered: - raise CriticalHardwareError(f'Critical error(s) for: {self.path}') - - # SMART self-test status - test_details = self.get_smart_self_test_details() - if 'remaining_percent' in test_details.get('status', ''): - msg = f'SMART self-test in progress for: {self.path}' - LOG.error(msg) - raise SMARTSelfTestInProgressError(msg) - - def run_self_test(self, log_path) -> bool: - """Run disk self-test and check if it passed, returns bool. - - NOTE: This function is here to reserve a place for future - NVMe self-tests announced in NVMe spec v1.3. - """ - result = self.run_smart_self_test(log_path) - return result - - def run_smart_self_test(self, log_path) -> bool: - """Run SMART self-test and check if it passed, returns bool. - - NOTE: An exception will be raised if the disk lacks SMART support. - """ - finished = False - result = None - started = False - status_str = 'Starting self-test...' - test_details = self.get_smart_self_test_details() - test_minutes = 15 - size_str = bytes_to_string(self.size, use_binary=False) - header_str = color_string( - ['[', self.path.name, ' ', size_str, ']'], - [None, 'BLUE', None, 'CYAN', None], - sep='', - ) - - # Check if disk supports self-tests - if not test_details: - raise SMARTNotSupportedError( - f'SMART self-test not supported for {self.path}') - - # Get real test length - test_minutes = test_details.get('polling_minutes', {}).get('short', 5) - test_minutes = int(test_minutes) + 10 - - # Start test - with open(log_path, 'w', encoding='utf-8') as _f: - _f.write(f'{header_str}\nInitializing...') - cmd = [ - 'sudo', - 'smartctl', - '--tolerance=normal', - '--test=short', - self.path, - ] - run_program(cmd, check=False) - - # Monitor progress (in five second intervals) - for _i in range(int(test_minutes*60/5)): - sleep(5) - - # Update status - self.update_smart_details() - test_details = self.get_smart_self_test_details() - - # Check test progress - if started: - status_str = test_details.get('status', {}).get('string', 'Unknown') - status_str = status_str.capitalize() - - # Update log - with open(log_path, 'w', encoding='utf-8') as _f: - _f.write(f'{header_str}\nSMART self-test status:\n {status_str}') - - # Check if finished - if 'remaining_percent' not in test_details.get('status', {}): - finished = True - break - - elif 'remaining_percent' in test_details.get('status', {}): - started = True - elif _i * 5 >= SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS: - # Test didn't start within limit, stop waiting - break - - # Check result - if finished: - result = test_details.get('status', {}).get('passed', False) - elif started: - raise TimeoutError(f'SMART self-test timed out for {self.path}') - - # Done - return result - def set_description(self) -> None: """Set disk description from details.""" self.description = ( @@ -455,62 +184,6 @@ class Disk: f' ({self.bus}) {self.model} {self.serial}' ) - def update_smart_details(self) -> None: - """Update SMART details via smartctl.""" - updated_attributes = {} - - # Get SMART data - cmd = [ - 'sudo', - 'smartctl', - f'--device={"sat,auto" if self.use_sat else "auto"}', - '--tolerance=verypermissive', - '--all', - '--json', - self.path, - ] - self.raw_smartctl = get_json_from_command(cmd, check=False) - - # Check for attributes - if KEY_NVME in self.raw_smartctl: - for name, value in self.raw_smartctl[KEY_NVME].items(): - try: - updated_attributes[name] = { - 'name': name, - 'raw': int(value), - 'raw_str': str(value), - } - except (TypeError, ValueError): - # Ignoring invalid attribute - LOG.error('Invalid NVMe attribute: %s %s', name, value) - elif KEY_SMART in self.raw_smartctl: - for attribute in self.raw_smartctl[KEY_SMART].get('table', {}): - try: - _id = int(attribute['id']) - except (KeyError, ValueError): - # Ignoring invalid attribute - LOG.error('Invalid SMART attribute: %s', attribute) - continue - name = str(attribute.get('name', 'Unknown')).replace('_', ' ').title() - raw = int(attribute.get('raw', {}).get('value', -1)) - raw_str = attribute.get('raw', {}).get('string', 'Unknown') - - # Fix power-on time - match = REGEX_POWER_ON_TIME.match(raw_str) - if _id == 9 and match: - raw = int(match.group(1)) - - # Add to dict - updated_attributes[_id] = { - 'name': name, 'raw': raw, 'raw_str': raw_str} - - # Add note if necessary - if not updated_attributes: - self.add_note('No NVMe or SMART data available', 'YELLOW') - - # Done - self.attributes.update(updated_attributes) - # Functions def get_disk_details_linux(path) -> dict[Any, Any]: @@ -676,23 +349,6 @@ def get_disks_macos() -> list[Disk]: return disks -def get_known_disk_attributes(model) -> dict[Any, dict]: - """Get known NVMe/SMART attributes (model specific), returns dict.""" - known_attributes = KNOWN_DISK_ATTRIBUTES.copy() - - # Apply model-specific data - for regex, data in KNOWN_DISK_MODELS.items(): - if re.search(regex, model): - for attr, thresholds in data.items(): - if attr in known_attributes: - known_attributes[attr].update(thresholds) - else: - known_attributes[attr] = thresholds - - # Done - return known_attributes - - def is_4k_aligned_macos(disk_details) -> bool: """Check partition alignment using diskutil info, returns bool.""" aligned = True diff --git a/scripts/wk/hw/smart.py b/scripts/wk/hw/smart.py new file mode 100644 index 00000000..6e1a1ab5 --- /dev/null +++ b/scripts/wk/hw/smart.py @@ -0,0 +1,414 @@ +"""WizardKit: SMART test functions""" +# vim: sts=2 sw=2 ts=2 + +import logging +import re + +from typing import Any + +from wk.cfg.hw import ( + ATTRIBUTE_COLORS, + KEY_NVME, + KEY_SMART, + KNOWN_DISK_ATTRIBUTES, + KNOWN_DISK_MODELS, + NVME_WARNING_KEYS, + REGEX_POWER_ON_TIME, + SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS, + ) +from wk.exe import get_json_from_command, run_program +from wk.std import bytes_to_string, color_string, sleep + + +# STATIC VARIABLES +LOG = logging.getLogger(__name__) + + +# Exception Classes +class CriticalHardwareError(RuntimeError): + """Exception used for critical hardware failures.""" + +class SMARTNotSupportedError(TypeError): + """Exception used for disks lacking SMART support.""" + +class SMARTSelfTestInProgressError(RuntimeError): + """Exception used when a SMART self-test is in progress.""" + + +# Functions +def abort_self_test(dev) -> None: + """Abort currently running non-captive self-test.""" + cmd = ['sudo', 'smartctl', '--abort', dev.path] + run_program(cmd, check=False) + + +def check_attributes(dev, only_blocking=False) -> bool: + """Check if any known attributes are failing, returns bool.""" + attributes_ok = True + known_attributes = get_known_disk_attributes(dev.model) + for attr, value in dev.attributes.items(): + # Skip unknown attributes + if attr not in known_attributes: + continue + + # Get thresholds + blocking_attribute = known_attributes[attr].get('Blocking', False) + err_thresh = known_attributes[attr].get('Error', None) + max_thresh = known_attributes[attr].get('Maximum', None) + if not max_thresh: + max_thresh = float('inf') + + # Skip non-blocking attributes if necessary + if only_blocking and not blocking_attribute: + continue + + # Skip informational attributes + if not err_thresh: + continue + + # Check attribute + if known_attributes[attr].get('PercentageLife', False): + if 0 <= value['raw'] <= err_thresh: + attributes_ok = False + elif err_thresh <= value['raw'] < max_thresh: + attributes_ok = False + + # Done + return attributes_ok + + +def check_self_test_results(test_obj, aborted=False): + """Check SMART self-test results.""" + test_obj.report.append(color_string('Self-Test', 'BLUE')) + if test_obj.disabled or test_obj.status == 'Denied': + test_obj.report.append(color_string(f' {test_obj.status}', 'RED')) + elif test_obj.status == 'N/A' or not test_obj.dev.attributes: + test_obj.report.append(color_string(f' {test_obj.status}', 'YELLOW')) + else: + # Not updating SMART data here to preserve the test status for the report + # For instance if the test was aborted the report should inlcude the last + # known progress instead of just "was aborted by host" + test_details = get_smart_self_test_details(test_obj.dev) + test_result = test_details.get('status', {}).get('string', 'Unknown') + test_obj.report.append(f' {test_result.capitalize()}') + if aborted and not (test_obj.passed or test_obj.failed): + test_obj.report.append(color_string(' Aborted', 'YELLOW')) + test_obj.set_status('Aborted') + elif test_obj.status == 'TimedOut': + test_obj.report.append(color_string(' TimedOut', 'YELLOW')) + test_obj.set_status('TimedOut') + else: + test_obj.failed = not test_obj.passed + if test_obj.failed: + test_obj.set_status('Failed') + + +def enable_smart(dev) -> None: + """Try enabling SMART for this disk.""" + cmd = [ + 'sudo', + 'smartctl', + f'--device={"sat,auto" if dev.use_sat else "auto"}', + '--tolerance=permissive', + '--smart=on', + dev.path, + ] + run_program(cmd, check=False) + + +def generate_attribute_report(dev) -> list[str]: + """Generate attribute report, returns list.""" + known_attributes = get_known_disk_attributes(dev.model) + report = [] + for attr, value in sorted(dev.attributes.items()): + note = '' + value_color = 'GREEN' + + # Skip attributes not in our list + if attr not in known_attributes: + continue + + # Check for attribute note + note = known_attributes[attr].get('Note', '') + + # ID / Name + label = f'{attr:>3}' + if isinstance(attr, int): + # Assuming SMART, include hex ID and name + label += f' / {str(hex(attr))[2:].upper():0>2}: {value["name"]}' + label = f' {label.replace("_", " "):38}' + + # Value color + if known_attributes[attr].get('PercentageLife', False): + # PercentageLife values + if 0 <= value['raw'] <= known_attributes[attr]['Error']: + value_color = 'RED' + note = '(failed, % life remaining)' + elif value['raw'] < 0 or value['raw'] > 100: + value_color = 'PURPLE' + note = '(invalid?)' + else: + for threshold, color in ATTRIBUTE_COLORS: + threshold_val = known_attributes[attr].get(threshold, None) + if threshold_val and value['raw'] >= threshold_val: + value_color = color + if threshold == 'Error': + note = '(failed)' + elif threshold == 'Maximum': + note = '(invalid?)' + + # 199/C7 warning + if str(attr) == '199' and value['raw'] > 0: + note = '(bad cable?)' + + # Build colored string and append to report + line = color_string( + [label, value['raw_str'], note], + [None, value_color, 'YELLOW'], + ) + report.append(line) + + # Done + return report + + +def get_known_disk_attributes(model) -> dict[Any, dict]: + """Get known NVMe/SMART attributes (model specific), returns dict.""" + known_attributes = KNOWN_DISK_ATTRIBUTES.copy() + + # Apply model-specific data + for regex, data in KNOWN_DISK_MODELS.items(): + if re.search(regex, model): + for attr, thresholds in data.items(): + if attr in known_attributes: + known_attributes[attr].update(thresholds) + else: + known_attributes[attr] = thresholds + + # Done + return known_attributes + + +def get_smart_self_test_details(dev) -> dict[Any, Any]: + """Shorthand to get deeply nested self-test details, returns dict.""" + details = {} + try: + details = dev.raw_smartctl['ata_smart_data']['self_test'] + except (KeyError, TypeError): + # Assuming disk lacks SMART support, ignore and return empty dict. + pass + + # Done + return details + + +def safety_checks(dev) -> None: + """Run safety checks and raise an exception if necessary.""" + blocking_event_encountered = False + update_smart_details(dev) + + # Attributes + if not check_attributes(dev, only_blocking=True): + blocking_event_encountered = True + LOG.error('%s: Blocked for failing attribute(s)', dev.path) + + # NVMe status + nvme_status = dev.raw_smartctl.get('smart_status', {}).get('nvme', {}) + if nvme_status.get('media_read_only', False): + blocking_event_encountered = True + msg = 'Media has been placed in read-only mode' + dev.add_note(msg, 'RED') + LOG.error('%s %s', dev.path, msg) + for key in NVME_WARNING_KEYS: + if nvme_status.get(key, False): + msg = key.replace('_', ' ') + dev.add_note(msg, 'YELLOW') + LOG.warning('%s %s', dev.path, msg) + + # SMART overall assessment + smart_passed = True + try: + smart_passed = dev.raw_smartctl['smart_status']['passed'] + except (KeyError, TypeError): + # Assuming disk doesn't support SMART overall assessment + pass + if not smart_passed: + blocking_event_encountered = True + msg = 'SMART overall self-assessment: Failed' + dev.add_note(msg, 'RED') + LOG.error('%s %s', dev.path, msg) + + # Raise blocking exception if necessary + if blocking_event_encountered: + raise CriticalHardwareError(f'Critical error(s) for: {dev.path}') + + # SMART self-test status + test_details = get_smart_self_test_details(dev) + if 'remaining_percent' in test_details.get('status', ''): + msg = f'SMART self-test in progress for: {dev.path}' + LOG.error(msg) + raise SMARTSelfTestInProgressError(msg) + + +def run_self_test(test_obj, log_path) -> None: + """Run disk self-test and check if it passed, returns bool.""" + result = None + + try: + test_obj.passed = run_smart_self_test(test_obj.dev, log_path) + except TimeoutError: + test_obj.failed = True + result = 'TimedOut' + except SMARTNotSupportedError: + # Pass test since it doesn't apply + test_obj.passed = True + result = 'N/A' + + # Set status + if result: + test_obj.set_status(result) + else: + if test_obj.failed: + test_obj.set_status('Failed') + elif test_obj.passed: + test_obj.set_status('Passed') + else: + test_obj.set_status('Unknown') + + +def run_smart_self_test(dev, log_path) -> bool: + """Run SMART self-test and check if it passed, returns bool. + + NOTE: An exception will be raised if the disk lacks SMART support. + """ + finished = False + result = None + started = False + status_str = 'Starting self-test...' + test_details = get_smart_self_test_details(dev) + test_minutes = 15 + size_str = bytes_to_string(dev.size, use_binary=False) + header_str = color_string( + ['[', dev.path.name, ' ', size_str, ']'], + [None, 'BLUE', None, 'CYAN', None], + sep='', + ) + + # Check if disk supports self-tests + if not test_details: + raise SMARTNotSupportedError( + f'SMART self-test not supported for {dev.path}') + + # Get real test length + test_minutes = test_details.get('polling_minutes', {}).get('short', 5) + test_minutes = int(test_minutes) + 10 + + # Start test + with open(log_path, 'w', encoding='utf-8') as _f: + _f.write(f'{header_str}\nInitializing...') + cmd = [ + 'sudo', + 'smartctl', + '--tolerance=normal', + '--test=short', + dev.path, + ] + run_program(cmd, check=False) + + # Monitor progress (in five second intervals) + for _i in range(int(test_minutes*60/5)): + sleep(5) + + # Update status + update_smart_details(dev) + test_details = get_smart_self_test_details(dev) + + # Check test progress + if started: + status_str = test_details.get('status', {}).get('string', 'Unknown') + status_str = status_str.capitalize() + + # Update log + with open(log_path, 'w', encoding='utf-8') as _f: + _f.write(f'{header_str}\nSMART self-test status:\n {status_str}') + + # Check if finished + if 'remaining_percent' not in test_details.get('status', {}): + finished = True + break + + elif 'remaining_percent' in test_details.get('status', {}): + started = True + elif _i * 5 >= SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS: + # Test didn't start within limit, stop waiting + break + + # Check result + if finished: + result = test_details.get('status', {}).get('passed', False) + elif started: + raise TimeoutError(f'SMART self-test timed out for {dev.path}') + + # Done + return result + + +def update_smart_details(dev) -> None: + """Update SMART details via smartctl.""" + updated_attributes = {} + + # Get SMART data + cmd = [ + 'sudo', + 'smartctl', + f'--device={"sat,auto" if dev.use_sat else "auto"}', + '--tolerance=verypermissive', + '--all', + '--json', + dev.path, + ] + dev.raw_smartctl = get_json_from_command(cmd, check=False) + + # Check for attributes + if KEY_NVME in dev.raw_smartctl: + for name, value in dev.raw_smartctl[KEY_NVME].items(): + try: + updated_attributes[name] = { + 'name': name, + 'raw': int(value), + 'raw_str': str(value), + } + except (TypeError, ValueError): + # Ignoring invalid attribute + LOG.error('Invalid NVMe attribute: %s %s', name, value) + elif KEY_SMART in dev.raw_smartctl: + for attribute in dev.raw_smartctl[KEY_SMART].get('table', {}): + try: + _id = int(attribute['id']) + except (KeyError, ValueError): + # Ignoring invalid attribute + LOG.error('Invalid SMART attribute: %s', attribute) + continue + name = str(attribute.get('name', 'Unknown')).replace('_', ' ').title() + raw = int(attribute.get('raw', {}).get('value', -1)) + raw_str = attribute.get('raw', {}).get('string', 'Unknown') + + # Fix power-on time + match = REGEX_POWER_ON_TIME.match(raw_str) + if _id == 9 and match: + raw = int(match.group(1)) + + # Add to dict + updated_attributes[_id] = { + 'name': name, 'raw': raw, 'raw_str': raw_str} + + # Add note if necessary + if not updated_attributes: + dev.add_note('No NVMe or SMART data available', 'YELLOW') + + # Done + dev.attributes.update(updated_attributes) + + +if __name__ == '__main__': + print("This file is not meant to be called directly.") From f79e578aaf0267acda3193fd9e425d5e38dcf370 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Wed, 6 Apr 2022 16:23:44 -0600 Subject: [PATCH 09/18] Remove pylint warning from sensors --- scripts/wk/hw/sensors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/wk/hw/sensors.py b/scripts/wk/hw/sensors.py index f0e7b97c..7785794d 100644 --- a/scripts/wk/hw/sensors.py +++ b/scripts/wk/hw/sensors.py @@ -152,12 +152,11 @@ class Sensors(): sleep(0.5) def save_average_temps(self, temp_label, seconds=10): - # pylint: disable=unused-variable """Save average temps under temp_label over provided seconds..""" self.clear_temps() # Get temps - for i in range(seconds): + for _ in range(seconds): self.update_sensor_data(exit_on_thermal_limit=False) sleep(1) From 3d7881328fb67b3e6ae0f174c288efb6d6ca6f63 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Wed, 6 Apr 2022 19:06:32 -0600 Subject: [PATCH 10/18] Move ddrescue out of wk/hw section --- scripts/ddrescue-tui.py | 4 ++-- scripts/wk/__init__.py | 1 + scripts/wk/clone/__init__.py | 3 +++ scripts/wk/{hw => clone}/ddrescue.py | 0 scripts/wk/hw/__init__.py | 1 - 5 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 scripts/wk/clone/__init__.py rename scripts/wk/{hw => clone}/ddrescue.py (100%) diff --git a/scripts/ddrescue-tui.py b/scripts/ddrescue-tui.py index 2857053c..bd383ac5 100755 --- a/scripts/ddrescue-tui.py +++ b/scripts/ddrescue-tui.py @@ -10,14 +10,14 @@ import wk if __name__ == '__main__': try: - docopt(wk.hw.ddrescue.DOCSTRING) + docopt(wk.clone.ddrescue.DOCSTRING) except SystemExit: print('') wk.std.pause('Press Enter to exit...') raise try: - wk.hw.ddrescue.main() + wk.clone.ddrescue.main() except SystemExit: raise except: #pylint: disable=bare-except diff --git a/scripts/wk/__init__.py b/scripts/wk/__init__.py index 51521ea3..e6877c23 100644 --- a/scripts/wk/__init__.py +++ b/scripts/wk/__init__.py @@ -4,6 +4,7 @@ from sys import version_info as version from . import cfg +from . import clone from . import debug from . import exe from . import graph diff --git a/scripts/wk/clone/__init__.py b/scripts/wk/clone/__init__.py new file mode 100644 index 00000000..43ffabbc --- /dev/null +++ b/scripts/wk/clone/__init__.py @@ -0,0 +1,3 @@ +"""WizardKit: ddrescue-tui module init""" + +from . import ddrescue diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/clone/ddrescue.py similarity index 100% rename from scripts/wk/hw/ddrescue.py rename to scripts/wk/clone/ddrescue.py diff --git a/scripts/wk/hw/__init__.py b/scripts/wk/hw/__init__.py index fca26622..fd8daedb 100644 --- a/scripts/wk/hw/__init__.py +++ b/scripts/wk/hw/__init__.py @@ -3,7 +3,6 @@ from . import audio from . import benchmark from . import cpu -from . import ddrescue from . import diags from . import disk from . import keyboard From 20f91f01d1144fab22922a9a59a6fa3d0fe8f294 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Wed, 6 Apr 2022 19:08:23 -0600 Subject: [PATCH 11/18] Add type hints to hardware functions --- scripts/wk/hw/audio.py | 4 ++-- scripts/wk/hw/benchmark.py | 8 +++---- scripts/wk/hw/cpu.py | 19 ++++++++------- scripts/wk/hw/diags.py | 45 +++++++++++++++++------------------ scripts/wk/hw/keyboard.py | 4 ++-- scripts/wk/hw/network.py | 2 +- scripts/wk/hw/screensavers.py | 2 +- scripts/wk/hw/sensors.py | 41 ++++++++++++++++++------------- scripts/wk/hw/smart.py | 2 +- scripts/wk/hw/surface_scan.py | 2 +- scripts/wk/hw/test.py | 2 +- 11 files changed, 70 insertions(+), 61 deletions(-) diff --git a/scripts/wk/hw/audio.py b/scripts/wk/hw/audio.py index f6a984b5..484e2e24 100644 --- a/scripts/wk/hw/audio.py +++ b/scripts/wk/hw/audio.py @@ -12,13 +12,13 @@ LOG = logging.getLogger(__name__) # Functions -def audio_test(): +def audio_test() -> None: """Run an OS-specific audio test.""" if PLATFORM == 'Linux': audio_test_linux() -def audio_test_linux(): +def audio_test_linux() -> None: """Run an audio test using amixer and speaker-test.""" LOG.info('Audio Test') diff --git a/scripts/wk/hw/benchmark.py b/scripts/wk/hw/benchmark.py index 78664192..2ee08577 100644 --- a/scripts/wk/hw/benchmark.py +++ b/scripts/wk/hw/benchmark.py @@ -38,7 +38,7 @@ class DeviceTooSmallError(RuntimeError): # Functions -def calc_io_dd_values(dev_size): +def calc_io_dd_values(dev_size) -> dict[str, int]: """Calculate I/O benchmark dd values, returns dict. Calculations: @@ -90,8 +90,8 @@ def calc_io_dd_values(dev_size): } -def check_io_results(test_obj, rate_list, graph_width): - """Generate colored report using rate_list, returns list of str.""" +def check_io_results(test_obj, rate_list, graph_width) -> None: + """Check I/O restuls and generate report using rate_list.""" avg_read = sum(rate_list) / len(rate_list) min_read = min(rate_list) max_read = max(rate_list) @@ -135,7 +135,7 @@ def check_io_results(test_obj, rate_list, graph_width): test_obj.set_status('Unknown') -def run_io_test(test_obj, log_path): +def run_io_test(test_obj, log_path) -> None: """Run I/O benchmark and handle exceptions.""" dev_path = test_obj.dev.path if PLATFORM == 'Darwin': diff --git a/scripts/wk/hw/cpu.py b/scripts/wk/hw/cpu.py index 44329e7a..6fb1ec8f 100644 --- a/scripts/wk/hw/cpu.py +++ b/scripts/wk/hw/cpu.py @@ -5,6 +5,8 @@ import logging import re import subprocess +from typing import TextIO + from wk import exe from wk.cfg.hw import CPU_FAILURE_TEMP from wk.os.mac import set_fans as macos_set_fans @@ -19,10 +21,11 @@ from wk.tmux import respawn_pane as tmux_respawn_pane # STATIC VARIABLES LOG = logging.getLogger(__name__) +SysbenchType = tuple[subprocess.Popen, TextIO] # Functions -def check_cooling_results(test_obj, sensors, run_sysbench=False): +def check_cooling_results(test_obj, sensors, run_sysbench=False) -> None: """Check cooling results and update test_obj.""" max_temp = sensors.cpu_max_temp() temp_labels = ['Idle', 'Max', 'Cooldown'] @@ -44,12 +47,12 @@ def check_cooling_results(test_obj, sensors, run_sysbench=False): test_obj.report.append(f' {line}') -def check_mprime_results(test_obj, working_dir): +def check_mprime_results(test_obj, working_dir) -> None: """Check mprime log files and update test_obj.""" passing_lines = {} warning_lines = {} - def _read_file(log_name): + def _read_file(log_name) -> list[str]: """Read file and split into lines, returns list.""" lines = [] try: @@ -99,7 +102,7 @@ def check_mprime_results(test_obj, working_dir): test_obj.report.append(color_string(' Unknown result', 'YELLOW')) -def start_mprime(working_dir, log_path): +def start_mprime(working_dir, log_path) -> subprocess.Popen: """Start mprime and save filtered output to log, returns Popen object.""" set_apple_fan_speed('max') proc_mprime = subprocess.Popen( # pylint: disable=consider-using-with @@ -124,7 +127,7 @@ def start_mprime(working_dir, log_path): return proc_mprime -def start_sysbench(sensors, sensors_out, log_path, pane): +def start_sysbench(sensors, sensors_out, log_path, pane) -> SysbenchType: """Start sysbench, returns tuple with Popen object and file handle.""" set_apple_fan_speed('max') sysbench_cmd = [ @@ -156,7 +159,7 @@ def start_sysbench(sensors, sensors_out, log_path, pane): return (proc_sysbench, filehandle_sysbench) -def set_apple_fan_speed(speed): +def set_apple_fan_speed(speed) -> None: """Set Apple fan speed.""" cmd = None @@ -179,7 +182,7 @@ def set_apple_fan_speed(speed): exe.run_program(cmd, check=False) -def stop_mprime(proc_mprime): +def stop_mprime(proc_mprime) -> None: """Stop mprime gracefully, then forcefully as needed.""" proc_mprime.terminate() try: @@ -189,7 +192,7 @@ def stop_mprime(proc_mprime): set_apple_fan_speed('auto') -def stop_sysbench(proc_sysbench, filehandle_sysbench): +def stop_sysbench(proc_sysbench, filehandle_sysbench) -> None: """Stop sysbench.""" proc_sysbench.terminate() try: diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 051ab228..b4fccb13 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -100,7 +100,7 @@ class State(): self.init_tmux() exe.start_thread(self.fix_tmux_layout_loop) - def abort_testing(self): + def abort_testing(self) -> None: """Set unfinished tests as aborted and cleanup tmux panes.""" for group in self.test_groups: for test in group.test_objects: @@ -119,7 +119,7 @@ class State(): tmux.kill_pane(_id) self.panes.pop(key) - def disk_safety_checks(self, prep=False, wait_for_self_tests=True): + def disk_safety_checks(self, prep=False, wait_for_self_tests=True) -> None: # pylint: disable=too-many-branches,too-many-statements """Run disk safety checks.""" self_tests_in_progress = False @@ -204,8 +204,7 @@ class State(): std.sleep(60) self.disk_safety_checks(wait_for_self_tests=False) - def fix_tmux_layout(self, forced=True): - # pylint: disable=unused-argument + def fix_tmux_layout(self, forced=True) -> None: """Fix tmux layout based on cfg.hw.TMUX_LAYOUT.""" try: tmux.fix_layout(self.panes, self.layout, forced=forced) @@ -213,7 +212,7 @@ class State(): # Assuming self.panes changed while running pass - def fix_tmux_layout_loop(self): + def fix_tmux_layout_loop(self) -> None: """Fix tmux layout on a loop. NOTE: This should be called as a thread. @@ -222,7 +221,7 @@ class State(): self.fix_tmux_layout(forced=False) std.sleep(1) - def init_diags(self, menu): + def init_diags(self, menu) -> None: """Initialize diagnostic pass.""" # Reset objects @@ -293,7 +292,7 @@ class State(): # Run safety checks self.disk_safety_checks(prep=True) - def init_tmux(self): + def init_tmux(self) -> None: """Initialize tmux layout.""" tmux.kill_all_panes() @@ -322,7 +321,7 @@ class State(): text=' ', ) - def save_debug_reports(self): + def save_debug_reports(self) -> None: """Save debug reports to disk.""" LOG.info('Saving debug reports') debug_dir = pathlib.Path(f'{self.log_dir}/debug') @@ -368,7 +367,7 @@ class State(): _f.write(f'\n{test.name}:\n') _f.write('\n'.join(debug.generate_object_report(test, indent=1))) - def update_clock(self): + def update_clock(self) -> None: """Update 'Started' pane following clock sync.""" tmux.respawn_pane( pane_id=self.panes['Started'], @@ -379,7 +378,7 @@ class State(): ), ) - def update_progress_pane(self): + def update_progress_pane(self) -> None: """Update progress pane.""" report = [] width = cfg.hw.TMUX_SIDE_WIDTH @@ -401,13 +400,13 @@ class State(): with open(out_path, 'w', encoding='utf-8') as _f: _f.write('\n'.join(report)) - def update_top_pane(self, text): + def update_top_pane(self, text) -> None: """Update top pane with text.""" tmux.respawn_pane(self.panes['Top'], text=f'{self.top_text}\n{text}') # Functions -def build_menu(cli_mode=False, quick_mode=False): +def build_menu(cli_mode=False, quick_mode=False) -> std.Menu: # pylint: disable=too-many-branches """Build main menu, returns wk.std.Menu.""" menu = std.Menu(title=None) @@ -459,7 +458,7 @@ def build_menu(cli_mode=False, quick_mode=False): return menu -def cpu_stress_tests(state, test_objects): +def cpu_stress_tests(state, test_objects) -> None: # pylint: disable=too-many-statements """CPU & cooling check using Prime95 and Sysbench.""" LOG.info('CPU Test (Prime95)') @@ -587,7 +586,7 @@ def cpu_stress_tests(state, test_objects): raise std.GenericAbort('Aborted') -def disk_attribute_check(state, test_objects): +def disk_attribute_check(state, test_objects) -> None: """Disk attribute check.""" LOG.info('Disk Attribute Check') for test in test_objects: @@ -607,7 +606,7 @@ def disk_attribute_check(state, test_objects): state.update_progress_pane() -def disk_io_benchmark(state, test_objects, skip_usb=True): +def disk_io_benchmark(state, test_objects, skip_usb=True) -> None: # pylint: disable=too-many-statements """Disk I/O benchmark using dd.""" LOG.info('Disk I/O Benchmark (dd)') @@ -671,7 +670,7 @@ def disk_io_benchmark(state, test_objects, skip_usb=True): raise std.GenericAbort('Aborted') -def disk_self_test(state, test_objects): +def disk_self_test(state, test_objects) -> None: # pylint: disable=too-many-statements """Disk self-test if available.""" LOG.info('Disk Self-Test(s)') @@ -729,8 +728,8 @@ def disk_self_test(state, test_objects): raise std.GenericAbort('Aborted') -def disk_surface_scan(state, test_objects): - # pylint: disable=too-many-branches,too-many-statements +def disk_surface_scan(state, test_objects) -> None: + # pylint: disable=too-many-branches """Read-only disk surface scan using badblocks.""" LOG.info('Disk Surface Scan (badblocks)') aborted = False @@ -810,7 +809,7 @@ def disk_surface_scan(state, test_objects): raise std.GenericAbort('Aborted') -def main(): +def main() -> None: # pylint: disable=too-many-branches """Main function for hardware diagnostics.""" args = docopt(DOCSTRING) @@ -883,7 +882,7 @@ def main(): state.update_top_pane('Main Menu') -def print_countdown(proc, seconds): +def print_countdown(proc, seconds) -> None: """Print countdown to screen while proc is alive.""" for i in range(seconds): sec_left = (seconds - i) % 60 @@ -910,7 +909,7 @@ def print_countdown(proc, seconds): print('') -def run_diags(state, menu, quick_mode=False): +def run_diags(state, menu, quick_mode=False) -> None: """Run selected diagnostics.""" aborted = False atexit.register(state.save_debug_reports) @@ -964,7 +963,7 @@ def run_diags(state, menu, quick_mode=False): std.pause('Press Enter to return to main menu...') -def show_results(state): +def show_results(state) -> None: """Show test results by device.""" std.sleep(0.5) std.clear_screen() @@ -993,7 +992,7 @@ def show_results(state): std.print_standard(' ') -def sync_clock(): +def sync_clock() -> None: """Sync clock under macOS using sntp.""" cmd = ['sudo', 'sntp', '-Ss', 'us.pool.ntp.org'] proc = exe.run_program(cmd, check=False) diff --git a/scripts/wk/hw/keyboard.py b/scripts/wk/hw/keyboard.py index 20a3db0a..68e2d0a6 100644 --- a/scripts/wk/hw/keyboard.py +++ b/scripts/wk/hw/keyboard.py @@ -12,7 +12,7 @@ LOG = logging.getLogger(__name__) # Functions -def keyboard_test(): +def keyboard_test() -> None: """Test keyboard using OS specific functions.""" if PLATFORM == 'Linux': run_xev() @@ -20,7 +20,7 @@ def keyboard_test(): print_warning(f'Not supported under this OS: {PLATFORM}') -def run_xev(): +def run_xev() -> None: """Test keyboard using xev.""" LOG.info('Keyboard Test (xev)') cmd = ['xev', '-event', 'keyboard'] diff --git a/scripts/wk/hw/network.py b/scripts/wk/hw/network.py index 68ab30d9..700ebfea 100644 --- a/scripts/wk/hw/network.py +++ b/scripts/wk/hw/network.py @@ -21,7 +21,7 @@ LOG = logging.getLogger(__name__) # Functions -def network_test(): +def network_test() -> None: """Run network tests.""" LOG.info('Network Test') try_and_print = TryAndPrint() diff --git a/scripts/wk/hw/screensavers.py b/scripts/wk/hw/screensavers.py index 9bdfb719..4417777b 100644 --- a/scripts/wk/hw/screensavers.py +++ b/scripts/wk/hw/screensavers.py @@ -14,7 +14,7 @@ LOG = logging.getLogger(__name__) # Functions -def screensaver(name): +def screensaver(name) -> None: """Show screensaver""" LOG.info('Screensaver (%s)', name) if name == 'matrix': diff --git a/scripts/wk/hw/sensors.py b/scripts/wk/hw/sensors.py index 7785794d..6950f191 100644 --- a/scripts/wk/hw/sensors.py +++ b/scripts/wk/hw/sensors.py @@ -7,6 +7,7 @@ import pathlib import re from subprocess import CalledProcessError +from typing import Any from wk.cfg.hw import CPU_CRITICAL_TEMP, SMC_IDS, TEMP_COLORS from wk.exe import run_program, start_thread @@ -39,14 +40,14 @@ class Sensors(): self.data = get_sensor_data() self.out_path = None - def clear_temps(self): + def clear_temps(self) -> None: """Clear saved temps but keep structure""" for adapters in self.data.values(): for sources in adapters.values(): for source_data in sources.values(): source_data['Temps'] = [] - def cpu_max_temp(self): + def cpu_max_temp(self) -> float: """Get max temp from any CPU source, returns float. NOTE: If no temps are found this returns zero. @@ -64,7 +65,7 @@ class Sensors(): # Done return max_temp - def cpu_reached_critical_temp(self): + def cpu_reached_critical_temp(self) -> bool: """Check if CPU reached CPU_CRITICAL_TEMP, returns bool.""" for section, adapters in self.data.items(): if not section.startswith('CPU'): @@ -80,7 +81,8 @@ class Sensors(): # Didn't return above so temps are within the threshold return False - def generate_report(self, *temp_labels, colored=True, only_cpu=False): + def generate_report( + self, *temp_labels, colored=True, only_cpu=False) -> list[str]: """Generate report based on given temp_labels, returns list.""" report = [] @@ -117,7 +119,8 @@ class Sensors(): def monitor_to_file( self, out_path, alt_max=None, - exit_on_thermal_limit=True, temp_labels=None, thermal_action=None): + exit_on_thermal_limit=True, temp_labels=None, + thermal_action=None) -> None: # pylint: disable=too-many-arguments """Write report to path every second until stopped. @@ -151,7 +154,7 @@ class Sensors(): # Sleep before next loop sleep(0.5) - def save_average_temps(self, temp_label, seconds=10): + def save_average_temps(self, temp_label, seconds=10) -> None: """Save average temps under temp_label over provided seconds..""" self.clear_temps() @@ -177,7 +180,8 @@ class Sensors(): def start_background_monitor( self, out_path, alt_max=None, - exit_on_thermal_limit=True, temp_labels=None, thermal_action=None): + exit_on_thermal_limit=True, temp_labels=None, + thermal_action=None) -> None: # pylint: disable=too-many-arguments """Start background thread to save report to file. @@ -194,7 +198,7 @@ class Sensors(): ), ) - def stop_background_monitor(self): + def stop_background_monitor(self) -> None: """Stop background thread.""" self.out_path.with_suffix('.stop').touch() self.background_thread.join() @@ -203,14 +207,16 @@ class Sensors(): self.background_thread = None self.out_path = None - def update_sensor_data(self, alt_max=None, exit_on_thermal_limit=True): + def update_sensor_data( + self, alt_max=None, exit_on_thermal_limit=True) -> None: """Update sensor data via OS-specific means.""" if PLATFORM == 'Darwin': self.update_sensor_data_macos(alt_max, exit_on_thermal_limit) elif PLATFORM == 'Linux': self.update_sensor_data_linux(alt_max, exit_on_thermal_limit) - def update_sensor_data_linux(self, alt_max, exit_on_thermal_limit=True): + def update_sensor_data_linux( + self, alt_max, exit_on_thermal_limit=True) -> None: """Update sensor data via lm_sensors.""" lm_sensor_data = get_sensor_data_lm() for section, adapters in self.data.items(): @@ -233,7 +239,8 @@ class Sensors(): if source_data['Current'] >= CPU_CRITICAL_TEMP: raise ThermalLimitReachedError('CPU temps reached limit') - def update_sensor_data_macos(self, alt_max, exit_on_thermal_limit=True): + def update_sensor_data_macos( + self, alt_max, exit_on_thermal_limit=True) -> None: """Update sensor data via SMC.""" for section, adapters in self.data.items(): for sources in adapters.values(): @@ -261,7 +268,7 @@ class Sensors(): # Functions -def fix_sensor_name(name): +def fix_sensor_name(name) -> str: """Cleanup sensor name, returns str.""" name = re.sub(r'^(\w+)-(\w+)-(\w+)', r'\1 (\2 \3)', name, re.IGNORECASE) name = name.title() @@ -280,7 +287,7 @@ def fix_sensor_name(name): return name -def get_sensor_data(): +def get_sensor_data() -> dict[Any, Any]: """Get sensor data via OS-specific means, returns dict.""" sensor_data = {} if PLATFORM == 'Darwin': @@ -291,7 +298,7 @@ def get_sensor_data(): return sensor_data -def get_sensor_data_linux(): +def get_sensor_data_linux() -> dict[Any, Any]: """Get sensor data via lm_sensors, returns dict.""" raw_lm_sensor_data = get_sensor_data_lm() sensor_data = {'CPUTemps': {}, 'Others': {}} @@ -332,7 +339,7 @@ def get_sensor_data_linux(): return sensor_data -def get_sensor_data_lm(): +def get_sensor_data_lm() -> dict[Any, Any]: """Get raw sensor data via lm_sensors, returns dict.""" raw_lm_sensor_data = {} cmd = ['sensors', '-j'] @@ -363,7 +370,7 @@ def get_sensor_data_lm(): return raw_lm_sensor_data -def get_sensor_data_macos(): +def get_sensor_data_macos() -> dict[Any, Any]: """Get sensor data via SMC, returns dict. NOTE: The data is structured like the lm_sensor data. @@ -407,7 +414,7 @@ def get_sensor_data_macos(): return sensor_data -def get_temp_str(temp, colored=True): +def get_temp_str(temp, colored=True) -> str: """Get colored string based on temp, returns str.""" temp_color = None diff --git a/scripts/wk/hw/smart.py b/scripts/wk/hw/smart.py index 6e1a1ab5..827d1c04 100644 --- a/scripts/wk/hw/smart.py +++ b/scripts/wk/hw/smart.py @@ -77,7 +77,7 @@ def check_attributes(dev, only_blocking=False) -> bool: return attributes_ok -def check_self_test_results(test_obj, aborted=False): +def check_self_test_results(test_obj, aborted=False) -> None: """Check SMART self-test results.""" test_obj.report.append(color_string('Self-Test', 'BLUE')) if test_obj.disabled or test_obj.status == 'Denied': diff --git a/scripts/wk/hw/surface_scan.py b/scripts/wk/hw/surface_scan.py index 085bc52c..85d87f96 100644 --- a/scripts/wk/hw/surface_scan.py +++ b/scripts/wk/hw/surface_scan.py @@ -20,7 +20,7 @@ LOG = logging.getLogger(__name__) # Functions -def run_scan(test_obj, log_path): +def run_scan(test_obj, log_path) -> None: """Run surface scan and handle exceptions.""" block_size = '1024' dev = test_obj.dev diff --git a/scripts/wk/hw/test.py b/scripts/wk/hw/test.py index e9653d9a..ebed113a 100644 --- a/scripts/wk/hw/test.py +++ b/scripts/wk/hw/test.py @@ -18,7 +18,7 @@ class Test: report: list[str] = field(init=False, default_factory=list) status: str = field(init=False, default='Pending') - def set_status(self, status): + def set_status(self, status) -> None: """Update status string.""" if self.disabled: # Don't change status if disabled From 312df1ee9dac468d7483855c1931a9edf615fe32 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Fri, 8 Apr 2022 15:38:03 -0600 Subject: [PATCH 12/18] Refactor import logic for wk.hw.diags --- scripts/wk/hw/diags.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index b4fccb13..55d9f53c 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -16,21 +16,12 @@ from wk.hw import benchmark as hw_benchmark from wk.hw import cpu as hw_cpu from wk.hw import disk as hw_disk from wk.hw import sensors as hw_sensors +from wk.hw import smart as hw_smart from wk.hw import surface_scan as hw_surface_scan from wk.hw import system as hw_system from wk.hw.audio import audio_test from wk.hw.keyboard import keyboard_test from wk.hw.network import network_test -from wk.hw.smart import ( - CriticalHardwareError, - SMARTSelfTestInProgressError, - abort_self_test, - check_attributes, - check_self_test_results, - generate_attribute_report, - run_self_test, - safety_checks, - ) from wk.hw.screensavers import screensaver from wk.hw.test import Test, TestGroup @@ -131,8 +122,8 @@ class State(): continue try: - safety_checks(disk) - except CriticalHardwareError: + hw_smart.safety_checks(disk) + except hw_smart.CriticalHardwareError: disable_tests = True disk.add_note('Critical hardware error detected.', 'RED') if 'Disk Attributes' in disk.tests: @@ -145,7 +136,7 @@ class State(): 'Critical hardware error detected during diagnostics', 'YELLOW', ) - except SMARTSelfTestInProgressError as err: + except hw_smart.SMARTSelfTestInProgressError as err: if prep: std.print_warning(f'SMART self-test(s) in progress for {disk.path}') if std.ask('Continue with all tests disabled for this device?'): @@ -170,7 +161,7 @@ class State(): if ( 'Disk Attributes' in disk.tests and not disk.tests['Disk Attributes'].failed - and not check_attributes(disk, only_blocking=False) + and not hw_smart.check_attributes(disk, only_blocking=False) ): # No blocking errors encountered, but found minor attribute failures if not prep: @@ -595,7 +586,7 @@ def disk_attribute_check(state, test_objects) -> None: test.set_status('N/A') continue - if check_attributes(test.dev): + if hw_smart.check_attributes(test.dev): test.passed = True test.set_status('Passed') else: @@ -691,7 +682,7 @@ def disk_self_test(state, test_objects) -> None: # Start thread test.set_status('Working') test_log = f'{state.log_dir}/{test.dev.path.name}_selftest.log' - threads.append(exe.start_thread(run_self_test, args=(test, test_log))) + threads.append(exe.start_thread(hw_smart.run_self_test, args=(test, test_log))) # Show progress if threads[-1].is_alive(): @@ -710,12 +701,12 @@ def disk_self_test(state, test_objects) -> None: except KeyboardInterrupt: aborted = True for test in test_objects: - abort_self_test(test.dev) + hw_smart.abort_self_test(test.dev) std.sleep(0.5) # Save report(s) for test in test_objects: - check_self_test_results(test, aborted=aborted) + hw_smart.check_self_test_results(test, aborted=aborted) # Cleanup state.update_progress_pane() @@ -745,7 +736,7 @@ def disk_surface_scan(state, test_objects) -> None: ) for disk in state.disks: failed_attributes = [ - line for line in generate_attribute_report(disk) if 'failed' in line + line for line in hw_smart.generate_attribute_report(disk) if 'failed' in line ] if failed_attributes: size_str = std.bytes_to_string(disk.size, use_binary=False) From 41b4ffd9fb5b60be4be2fa3c3757fdfefecf2581 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Fri, 8 Apr 2022 18:38:55 -0600 Subject: [PATCH 13/18] Cleanup pylint sections --- scripts/wk/hw/diags.py | 49 ++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 55d9f53c..fcf77dd3 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -598,7 +598,6 @@ def disk_attribute_check(state, test_objects) -> None: def disk_io_benchmark(state, test_objects, skip_usb=True) -> None: - # pylint: disable=too-many-statements """Disk I/O benchmark using dd.""" LOG.info('Disk I/O Benchmark (dd)') aborted = False @@ -623,31 +622,31 @@ def disk_io_benchmark(state, test_objects, skip_usb=True) -> None: continue # Start benchmark - if not aborted: - std.clear_screen() - std.print_report(test.dev.generate_report()) - test.set_status('Working') - test_log = f'{state.log_dir}/{test.dev.path.name}_benchmark.out' - tmux.respawn_pane( - state.panes['I/O Benchmark'], - watch_cmd='tail', - watch_file=test_log, - ) - state.update_progress_pane() - try: - hw_benchmark.run_io_test(test, test_log) - except KeyboardInterrupt: - aborted = True - except (subprocess.CalledProcessError, TypeError, ValueError) as err: - # Something went wrong - LOG.error('%s', err) - test.set_status('ERROR') - test.report.append(std.color_string(' Unknown Error', 'RED')) + std.clear_screen() + std.print_report(test.dev.generate_report()) + test.set_status('Working') + test_log = f'{state.log_dir}/{test.dev.path.name}_benchmark.out' + tmux.respawn_pane( + state.panes['I/O Benchmark'], + watch_cmd='tail', + watch_file=test_log, + ) + state.update_progress_pane() + try: + hw_benchmark.run_io_test(test, test_log) + except KeyboardInterrupt: + aborted = True + except (subprocess.CalledProcessError, TypeError, ValueError) as err: + # Something went wrong + LOG.error('%s', err) + test.set_status('ERROR') + test.report.append(std.color_string(' Unknown Error', 'RED')) # Mark test(s) aborted if necessary if aborted: test.set_status('Aborted') test.report.append(std.color_string(' Aborted', 'YELLOW')) + break # Update progress after each test state.update_progress_pane() @@ -662,7 +661,6 @@ def disk_io_benchmark(state, test_objects, skip_usb=True) -> None: def disk_self_test(state, test_objects) -> None: - # pylint: disable=too-many-statements """Disk self-test if available.""" LOG.info('Disk Self-Test(s)') aborted = False @@ -720,7 +718,6 @@ def disk_self_test(state, test_objects) -> None: def disk_surface_scan(state, test_objects) -> None: - # pylint: disable=too-many-branches """Read-only disk surface scan using badblocks.""" LOG.info('Disk Surface Scan (badblocks)') aborted = False @@ -745,15 +742,11 @@ def disk_surface_scan(state, test_objects) -> None: [None, 'BLUE', None, 'CYAN', None], sep='', ) - #std.print_colored([disk.path.name, disk.description], [None, 'BLUE']) std.print_report(failed_attributes) std.print_standard('') # Run surface scans - for test in reversed(test_objects): - if test.disabled: - # Skip - continue + for test in reversed([test for test in test_objects if not test.disabled]): # Start thread test_log = f'{state.log_dir}/{test.dev.path.name}_badblocks.log' From af8b2b7dd317792e46bb2c3d752297f7c6d57e69 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Fri, 8 Apr 2022 18:47:08 -0600 Subject: [PATCH 14/18] Refactor SMART self-test sections --- scripts/wk/clone/ddrescue.py | 3 - scripts/wk/hw/diags.py | 7 +- scripts/wk/hw/smart.py | 300 ++++++++++++++++++++--------------- 3 files changed, 171 insertions(+), 139 deletions(-) diff --git a/scripts/wk/clone/ddrescue.py b/scripts/wk/clone/ddrescue.py index 4d757c27..be797355 100644 --- a/scripts/wk/clone/ddrescue.py +++ b/scripts/wk/clone/ddrescue.py @@ -30,7 +30,6 @@ from wk.cfg.ddrescue import ( from wk.hw import disk as hw_disk from wk.hw.smart import ( CriticalHardwareError, - SMARTNotSupportedError, SMARTSelfTestInProgressError, safety_checks, update_smart_details, @@ -1503,8 +1502,6 @@ def check_destination_health(destination): result = 'Critical hardware error detected on destination' except SMARTSelfTestInProgressError: result = 'SMART self-test in progress on destination' - except SMARTNotSupportedError: - pass # Done return result diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index fcf77dd3..253b3048 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -700,11 +700,8 @@ def disk_self_test(state, test_objects) -> None: aborted = True for test in test_objects: hw_smart.abort_self_test(test.dev) - std.sleep(0.5) - - # Save report(s) - for test in test_objects: - hw_smart.check_self_test_results(test, aborted=aborted) + std.sleep(0.5) + hw_smart.build_self_test_report(test, aborted=True) # Cleanup state.update_progress_pane() diff --git a/scripts/wk/hw/smart.py b/scripts/wk/hw/smart.py index 827d1c04..ac658615 100644 --- a/scripts/wk/hw/smart.py +++ b/scripts/wk/hw/smart.py @@ -28,9 +28,6 @@ LOG = logging.getLogger(__name__) class CriticalHardwareError(RuntimeError): """Exception used for critical hardware failures.""" -class SMARTNotSupportedError(TypeError): - """Exception used for disks lacking SMART support.""" - class SMARTSelfTestInProgressError(RuntimeError): """Exception used when a SMART self-test is in progress.""" @@ -42,6 +39,39 @@ def abort_self_test(dev) -> None: run_program(cmd, check=False) +def build_self_test_report(test_obj, aborted=False) -> None: + """Check self-test results and build report (saved to test_obj). + + NOTE: Not updating SMART data to preserve the result for the report. + + For instance if the test was aborted the report should include the + last known progress instead of just "was aborted by host." + """ + report = [color_string('Self-Test', 'BLUE')] + test_details = get_smart_self_test_details(test_obj.dev) + test_result = test_details.get('status', {}).get('string', 'Unknown') + + # Build report + if test_obj.disabled or test_obj.status == 'Denied': + report.append(color_string(f' {test_obj.status}', 'RED')) + elif test_obj.status == 'N/A' or not test_obj.dev.attributes: + report.append(color_string(f' {test_obj.status}', 'YELLOW')) + elif test_obj.status == 'TestInProgress': + report.append(color_string(' Failed to stop previous test', 'RED')) + test_obj.set_status('Failed') + else: + # Other cases include self-test result string + report.append(f' {test_result.capitalize()}') + if aborted and not (test_obj.passed or test_obj.failed): + report.append(color_string(' Aborted', 'YELLOW')) + test_obj.set_status('Aborted') + elif test_obj.status == 'TimedOut': + report.append(color_string(' TimedOut', 'YELLOW')) + + # Done + test_obj.report.extend(report) + + def check_attributes(dev, only_blocking=False) -> bool: """Check if any known attributes are failing, returns bool.""" attributes_ok = True @@ -77,32 +107,6 @@ def check_attributes(dev, only_blocking=False) -> bool: return attributes_ok -def check_self_test_results(test_obj, aborted=False) -> None: - """Check SMART self-test results.""" - test_obj.report.append(color_string('Self-Test', 'BLUE')) - if test_obj.disabled or test_obj.status == 'Denied': - test_obj.report.append(color_string(f' {test_obj.status}', 'RED')) - elif test_obj.status == 'N/A' or not test_obj.dev.attributes: - test_obj.report.append(color_string(f' {test_obj.status}', 'YELLOW')) - else: - # Not updating SMART data here to preserve the test status for the report - # For instance if the test was aborted the report should inlcude the last - # known progress instead of just "was aborted by host" - test_details = get_smart_self_test_details(test_obj.dev) - test_result = test_details.get('status', {}).get('string', 'Unknown') - test_obj.report.append(f' {test_result.capitalize()}') - if aborted and not (test_obj.passed or test_obj.failed): - test_obj.report.append(color_string(' Aborted', 'YELLOW')) - test_obj.set_status('Aborted') - elif test_obj.status == 'TimedOut': - test_obj.report.append(color_string(' TimedOut', 'YELLOW')) - test_obj.set_status('TimedOut') - else: - test_obj.failed = not test_obj.passed - if test_obj.failed: - test_obj.set_status('Failed') - - def enable_smart(dev) -> None: """Try enabling SMART for this disk.""" cmd = [ @@ -202,6 +206,138 @@ def get_smart_self_test_details(dev) -> dict[Any, Any]: return details +def monitor_smart_self_test(test_obj, header_str, log_path) -> bool: + """Monitor SMART self-test status and update test_obj, returns bool.""" + started = False + finished = False + status_str = 'Starting self-test...' + test_details = get_smart_self_test_details(test_obj.dev) + test_minutes = 15 + + # Get real test length + test_minutes = test_details.get('polling_minutes', {}).get('short', 5) + test_minutes = int(test_minutes) + 10 + + # Monitor progress (in five second intervals) + for _i in range(int(test_minutes*60/5)): + sleep(5) + + # Update log + ## NOTE: This is run at least once with the default "Starting..." status + with open(log_path, 'w', encoding='utf-8') as _f: + _f.write(f'{header_str}\nSMART self-test status:\n {status_str}') + + # Update status + update_smart_details(test_obj.dev) + test_details = get_smart_self_test_details(test_obj.dev) + + # Check if test started + started = started or 'remaining_percent' in test_details.get('status', {}) + if not started: + if _i * 5 >= SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS: + # Test didn't start within limit, stop waiting + abort_self_test(test_obj.dev) + test_obj.failed = True + test_obj.set_status('TimedOut') + break + # Still within starting limit, continue to next loop + continue + + # Check test progress + status_str = test_details.get('status', {}).get('string', 'Unknown') + status_str = status_str.capitalize() + + # Check if finished + if 'remaining_percent' not in test_details.get('status', {}): + finished = True + break + + # Done + return finished + + +def run_self_test(test_obj, log_path) -> None: + """Run disk self-test and update test results. + + NOTE: This function is here to reserve a place for future + NVMe self-tests announced in NVMe spec v1.3. + """ + run_smart_self_test(test_obj, log_path) + + +def run_smart_self_test(test_obj, log_path) -> bool: + """Run SMART self-test and check if it passed, returns bool. + + NOTE: An exception will be raised if the disk lacks SMART support. + """ + finished = False + test_details = get_smart_self_test_details(test_obj.dev) + size_str = bytes_to_string(test_obj.dev.size, use_binary=False) + header_str = color_string( + ['[', test_obj.dev.path.name, ' ', size_str, ']'], + [None, 'BLUE', None, 'CYAN', None], + sep='', + ) + + # Check if disk supports self-tests + if not test_details: + # Mark test as passed since it doesn't apply + test_obj.passed = True + test_obj.set_status('N/A') + build_self_test_report(test_obj) + return + + # Update status + with open(log_path, 'w', encoding='utf-8') as _f: + _f.write(f'{header_str}\nInitializing...') + + # Check for, and stop, self-test if currently in-progress + if self_test_in_progress(test_obj.dev): + abort_self_test(test_obj.dev) + for _ in range(6): + # Wait up to a minute for current test to exit + sleep(10) + update_smart_details(test_obj.dev) + if not self_test_in_progress(test_obj.dev): + break + + # Recheck if self-test is in-progress, bail if so + if self_test_in_progress(test_obj.dev): + test_obj.failed = True + test_obj.set_status('TestInProgress') + build_self_test_report(test_obj) + return + + # Start test + cmd = [ + 'sudo', + 'smartctl', + '--tolerance=normal', + '--test=short', + test_obj.dev.path, + ] + run_program(cmd, check=False) + + # Monitor progress + finished = monitor_smart_self_test(test_obj, header_str, log_path) + + # Check result + if finished: + test_obj.passed = test_details.get('status', {}).get('passed', False) + test_obj.failed = test_obj.failed or not test_obj.passed + + # Set status + if test_obj.failed and test_obj.status != 'TimedOut': + test_obj.set_status('Failed') + elif test_obj.passed: + test_obj.set_status('Passed') + else: + test_obj.set_status('Unknown') + + # Build report + build_self_test_report(test_obj) + + def safety_checks(dev) -> None: """Run safety checks and raise an exception if necessary.""" blocking_event_encountered = False @@ -243,114 +379,16 @@ def safety_checks(dev) -> None: raise CriticalHardwareError(f'Critical error(s) for: {dev.path}') # SMART self-test status - test_details = get_smart_self_test_details(dev) - if 'remaining_percent' in test_details.get('status', ''): + if self_test_in_progress(dev): msg = f'SMART self-test in progress for: {dev.path}' LOG.error(msg) raise SMARTSelfTestInProgressError(msg) -def run_self_test(test_obj, log_path) -> None: - """Run disk self-test and check if it passed, returns bool.""" - result = None - - try: - test_obj.passed = run_smart_self_test(test_obj.dev, log_path) - except TimeoutError: - test_obj.failed = True - result = 'TimedOut' - except SMARTNotSupportedError: - # Pass test since it doesn't apply - test_obj.passed = True - result = 'N/A' - - # Set status - if result: - test_obj.set_status(result) - else: - if test_obj.failed: - test_obj.set_status('Failed') - elif test_obj.passed: - test_obj.set_status('Passed') - else: - test_obj.set_status('Unknown') - - -def run_smart_self_test(dev, log_path) -> bool: - """Run SMART self-test and check if it passed, returns bool. - - NOTE: An exception will be raised if the disk lacks SMART support. - """ - finished = False - result = None - started = False - status_str = 'Starting self-test...' +def self_test_in_progress(dev) -> bool: + """Check if SMART self-test is in progress, returns bool.""" test_details = get_smart_self_test_details(dev) - test_minutes = 15 - size_str = bytes_to_string(dev.size, use_binary=False) - header_str = color_string( - ['[', dev.path.name, ' ', size_str, ']'], - [None, 'BLUE', None, 'CYAN', None], - sep='', - ) - - # Check if disk supports self-tests - if not test_details: - raise SMARTNotSupportedError( - f'SMART self-test not supported for {dev.path}') - - # Get real test length - test_minutes = test_details.get('polling_minutes', {}).get('short', 5) - test_minutes = int(test_minutes) + 10 - - # Start test - with open(log_path, 'w', encoding='utf-8') as _f: - _f.write(f'{header_str}\nInitializing...') - cmd = [ - 'sudo', - 'smartctl', - '--tolerance=normal', - '--test=short', - dev.path, - ] - run_program(cmd, check=False) - - # Monitor progress (in five second intervals) - for _i in range(int(test_minutes*60/5)): - sleep(5) - - # Update status - update_smart_details(dev) - test_details = get_smart_self_test_details(dev) - - # Check test progress - if started: - status_str = test_details.get('status', {}).get('string', 'Unknown') - status_str = status_str.capitalize() - - # Update log - with open(log_path, 'w', encoding='utf-8') as _f: - _f.write(f'{header_str}\nSMART self-test status:\n {status_str}') - - # Check if finished - if 'remaining_percent' not in test_details.get('status', {}): - finished = True - break - - elif 'remaining_percent' in test_details.get('status', {}): - started = True - elif _i * 5 >= SMART_SELF_TEST_START_TIMEOUT_IN_SECONDS: - # Test didn't start within limit, stop waiting - break - - # Check result - if finished: - result = test_details.get('status', {}).get('passed', False) - elif started: - raise TimeoutError(f'SMART self-test timed out for {dev.path}') - - # Done - return result + return 'remaining_percent' in test_details.get('status', '') def update_smart_details(dev) -> None: From d667695e9eafa145f14c8bb4b64fbe0e208e95d1 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 11 Apr 2022 14:30:42 -0600 Subject: [PATCH 15/18] Move surface scan check to its own function --- scripts/wk/cfg/hw.py | 1 + scripts/wk/hw/surface_scan.py | 52 ++++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/scripts/wk/cfg/hw.py b/scripts/wk/cfg/hw.py index ae39da69..e0ae4cd8 100644 --- a/scripts/wk/cfg/hw.py +++ b/scripts/wk/cfg/hw.py @@ -20,6 +20,7 @@ BADBLOCKS_REGEX = re.compile( r'^Pass completed, (\d+) bad blocks found. .(\d+)/(\d+)/(\d+) errors', re.IGNORECASE, ) +BADBLOCKS_SKIP_REGEX = re.compile(r'^(Checking|\[)', re.IGNORECASE) CPU_CRITICAL_TEMP = 99 CPU_FAILURE_TEMP = 90 CPU_TEST_MINUTES = 7 diff --git a/scripts/wk/hw/surface_scan.py b/scripts/wk/hw/surface_scan.py index 85d87f96..f3a7212d 100644 --- a/scripts/wk/hw/surface_scan.py +++ b/scripts/wk/hw/surface_scan.py @@ -5,7 +5,11 @@ import logging from subprocess import STDOUT -from wk.cfg.hw import BADBLOCKS_LARGE_DISK, BADBLOCKS_REGEX +from wk.cfg.hw import ( + BADBLOCKS_LARGE_DISK, + BADBLOCKS_REGEX, + BADBLOCKS_SKIP_REGEX, + ) from wk.exe import run_program from wk.std import ( PLATFORM, @@ -20,6 +24,30 @@ LOG = logging.getLogger(__name__) # Functions +def check_surface_scan_results(test_obj, log_path) -> None: + """Check results and set test status.""" + with open(log_path, 'r', encoding='utf-8') as _f: + for line in _f.readlines(): + line = strip_colors(line.strip()) + if not line or BADBLOCKS_SKIP_REGEX.match(line): + # Skip + continue + match = BADBLOCKS_REGEX.search(line) + if match: + if all(s == '0' for s in match.groups()): + test_obj.passed = True + test_obj.report.append(f' {line}') + test_obj.set_status('Passed') + else: + test_obj.failed = True + test_obj.report.append(f' {color_string(line, "YELLOW")}') + test_obj.set_status('Failed') + else: + test_obj.report.append(f' {color_string(line, "YELLOW")}') + if not (test_obj.passed or test_obj.failed): + test_obj.set_status('Unknown') + + def run_scan(test_obj, log_path) -> None: """Run surface scan and handle exceptions.""" block_size = '1024' @@ -58,27 +86,7 @@ def run_scan(test_obj, log_path) -> None: ) # Check results - with open(log_path, 'r', encoding='utf-8') as _f: - for line in _f.readlines(): - line = strip_colors(line.strip()) - if not line or line.startswith('Checking') or line.startswith('['): - # Skip - continue - match = BADBLOCKS_REGEX.search(line) - if match: - if all(s == '0' for s in match.groups()): - test_obj.passed = True - test_obj.report.append(f' {line}') - test_obj.set_status('Passed') - else: - test_obj.failed = True - test_obj.report.append(f' {color_string(line, "YELLOW")}') - test_obj.set_status('Failed') - else: - test_obj.report.append(f' {color_string(line, "YELLOW")}') - if not (test_obj.passed or test_obj.failed): - test_obj.set_status('Unknown') - + check_surface_scan_results(test_obj, log_path) if __name__ == '__main__': From 95cd3b645d2527fb1e7252abb9c672bf7a3de9f4 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 11 Apr 2022 16:50:21 -0600 Subject: [PATCH 16/18] Remove SMARTSelfTestInProgressError sections Going forward this is a non-blocking error. --- scripts/wk/clone/ddrescue.py | 3 --- scripts/wk/hw/diags.py | 35 ++--------------------------------- scripts/wk/hw/smart.py | 9 --------- 3 files changed, 2 insertions(+), 45 deletions(-) diff --git a/scripts/wk/clone/ddrescue.py b/scripts/wk/clone/ddrescue.py index be797355..a09b276f 100644 --- a/scripts/wk/clone/ddrescue.py +++ b/scripts/wk/clone/ddrescue.py @@ -30,7 +30,6 @@ from wk.cfg.ddrescue import ( from wk.hw import disk as hw_disk from wk.hw.smart import ( CriticalHardwareError, - SMARTSelfTestInProgressError, safety_checks, update_smart_details, ) @@ -1500,8 +1499,6 @@ def check_destination_health(destination): safety_checks(destination) except CriticalHardwareError: result = 'Critical hardware error detected on destination' - except SMARTSelfTestInProgressError: - result = 'SMART self-test in progress on destination' # Done return result diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 253b3048..6da145c8 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -110,10 +110,9 @@ class State(): tmux.kill_pane(_id) self.panes.pop(key) - def disk_safety_checks(self, prep=False, wait_for_self_tests=True) -> None: + def disk_safety_checks(self, prep=False) -> None: # pylint: disable=too-many-branches,too-many-statements """Run disk safety checks.""" - self_tests_in_progress = False for disk in self.disks: disable_tests = False @@ -136,27 +135,6 @@ class State(): 'Critical hardware error detected during diagnostics', 'YELLOW', ) - except hw_smart.SMARTSelfTestInProgressError as err: - if prep: - std.print_warning(f'SMART self-test(s) in progress for {disk.path}') - if std.ask('Continue with all tests disabled for this device?'): - disable_tests = True - else: - std.print_standard('Diagnostics aborted.') - std.print_standard(' ') - std.pause('Press Enter to exit...') - raise SystemExit(1) from err - elif wait_for_self_tests: - self_tests_in_progress = True - else: - # Other tests will NOT be disabled - LOG.warning('SMART data may not be reliable for: %s', disk.path) - # Add note to report - if 'Disk Self-Test' in disk.tests: - disk.tests['Disk Self-Test'].failed = True - disk.tests['Disk Self-Test'].report.append( - std.color_string('Please manually review SMART data', 'YELLOW'), - ) else: if ( 'Disk Attributes' in disk.tests @@ -188,13 +166,6 @@ class State(): if disable_tests: disk.disable_disk_tests() - # Wait for self-test(s) - if self_tests_in_progress: - std.print_warning('SMART self-test(s) in progress') - std.print_standard('Waiting 60 seconds before continuing...') - std.sleep(60) - self.disk_safety_checks(wait_for_self_tests=False) - def fix_tmux_layout(self, forced=True) -> None: """Fix tmux layout based on cfg.hw.TMUX_LAYOUT.""" try: @@ -921,9 +892,7 @@ def run_diags(state, menu, quick_mode=False) -> None: # Run safety checks if group.name.startswith('Disk'): - state.disk_safety_checks( - wait_for_self_tests=group.name != 'Disk Attributes', - ) + state.disk_safety_checks() # Handle aborts if aborted: diff --git a/scripts/wk/hw/smart.py b/scripts/wk/hw/smart.py index ac658615..0b78af18 100644 --- a/scripts/wk/hw/smart.py +++ b/scripts/wk/hw/smart.py @@ -28,9 +28,6 @@ LOG = logging.getLogger(__name__) class CriticalHardwareError(RuntimeError): """Exception used for critical hardware failures.""" -class SMARTSelfTestInProgressError(RuntimeError): - """Exception used when a SMART self-test is in progress.""" - # Functions def abort_self_test(dev) -> None: @@ -378,12 +375,6 @@ def safety_checks(dev) -> None: if blocking_event_encountered: raise CriticalHardwareError(f'Critical error(s) for: {dev.path}') - # SMART self-test status - if self_test_in_progress(dev): - msg = f'SMART self-test in progress for: {dev.path}' - LOG.error(msg) - raise SMARTSelfTestInProgressError(msg) - def self_test_in_progress(dev) -> bool: """Check if SMART self-test is in progress, returns bool.""" From 2585ed584c4200e4556a285cca2fd3676c2869dc Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Mon, 18 Apr 2022 09:21:29 -0700 Subject: [PATCH 17/18] Refactor disk safety checks --- scripts/wk/clone/ddrescue.py | 34 ++++++---- scripts/wk/hw/diags.py | 117 ++++++++++++++--------------------- scripts/wk/hw/smart.py | 14 ++--- 3 files changed, 75 insertions(+), 90 deletions(-) diff --git a/scripts/wk/clone/ddrescue.py b/scripts/wk/clone/ddrescue.py index a09b276f..f56854cf 100644 --- a/scripts/wk/clone/ddrescue.py +++ b/scripts/wk/clone/ddrescue.py @@ -29,8 +29,8 @@ from wk.cfg.ddrescue import ( ) from wk.hw import disk as hw_disk from wk.hw.smart import ( - CriticalHardwareError, - safety_checks, + check_attributes, + smart_status_ok, update_smart_details, ) @@ -951,13 +951,23 @@ class State(): def safety_check_destination(self): """Run safety checks for destination and abort if necessary.""" - try: - safety_checks(self.destination) - except CriticalHardwareError as err: + errors_detected = False + + # Check for critical errors + if not smart_status_ok(self.destination): std.print_error( f'Critical error(s) detected for: {self.destination.path}', ) - raise std.GenericAbort() from err + + # Check for minor errors + if not check_attributes(self.destination, only_blocking=False): + std.print_warning( + f'Attribute error(s) detected for: {self.destination.path}', + ) + + # Done + if errors_detected: + raise std.GenericAbort() def safety_check_size(self): """Run size safety check and abort if necessary.""" @@ -1494,11 +1504,13 @@ def check_destination_health(destination): # Return empty string return result - # Run safety checks - try: - safety_checks(destination) - except CriticalHardwareError: - result = 'Critical hardware error detected on destination' + # Check for critical errors + if not smart_status_ok(destination): + result = 'Critical error(s) detected for: {destination.path}' + + # Check for minor errors + if not check_attributes(destination, only_blocking=False): + result = f'Attribute error(s) detected for: {destination.path}' # Done return result diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index 6da145c8..1307190a 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -110,61 +110,12 @@ class State(): tmux.kill_pane(_id) self.panes.pop(key) - def disk_safety_checks(self, prep=False) -> None: - # pylint: disable=too-many-branches,too-many-statements - """Run disk safety checks.""" - for disk in self.disks: - disable_tests = False - - # Skip already disabled devices - if all(test.disabled for test in disk.tests): - continue - - try: - hw_smart.safety_checks(disk) - except hw_smart.CriticalHardwareError: - disable_tests = True - disk.add_note('Critical hardware error detected.', 'RED') - if 'Disk Attributes' in disk.tests: - disk.tests['Disk Attributes'].failed = True - disk.tests['Disk Attributes'].set_status('Failed') - if not prep: - # Mid-diag failure detected - LOG.warning('Critical hardware error detected during diagnostics') - disk.add_note( - 'Critical hardware error detected during diagnostics', - 'YELLOW', - ) - else: - if ( - 'Disk Attributes' in disk.tests - and not disk.tests['Disk Attributes'].failed - and not hw_smart.check_attributes(disk, only_blocking=False) - ): - # No blocking errors encountered, but found minor attribute failures - if not prep: - # Mid-diag failure detected - LOG.warning('Attribute(s) failure detected during diagnostics') - disk.add_note( - 'Attribute(s) failure detected during diagnostics', - 'YELLOW', - ) - disk.tests['Disk Attributes'].failed = True - disk.tests['Disk Attributes'].set_status('Failed') - - # Check Surface Scan - if ( - 'Disk Surface Scan' in disk.tests - and disk.tests['Disk Surface Scan'].failed - and 'Disk I/O Benchmark' in disk.tests - ): - # Disable I/O Benchmark test - disk.tests['Disk I/O Benchmark'].set_status('Skipped') - disk.tests['Disk I/O Benchmark'].disabled = True - - # Disable tests if necessary - if disable_tests: - disk.disable_disk_tests() + def disk_safety_checks(self) -> None: + """Check for mid-run SMART failures and failed test(s).""" + for dev in self.disks: + disk_smart_status_check(dev, mid_run=True) + if any(test.failed for test in dev.tests): + dev.disable_disk_tests() def fix_tmux_layout(self, forced=True) -> None: """Fix tmux layout based on cfg.hw.TMUX_LAYOUT.""" @@ -251,9 +202,6 @@ class State(): test_group.test_objects.append(test_obj) self.test_groups.append(test_group) - # Run safety checks - self.disk_safety_checks(prep=True) - def init_tmux(self) -> None: """Initialize tmux layout.""" tmux.kill_all_panes() @@ -552,18 +500,12 @@ def disk_attribute_check(state, test_objects) -> None: """Disk attribute check.""" LOG.info('Disk Attribute Check') for test in test_objects: + disk_smart_status_check(test.dev, mid_run=False) if not test.dev.attributes: # No NVMe/SMART data test.set_status('N/A') continue - if hw_smart.check_attributes(test.dev): - test.passed = True - test.set_status('Passed') - else: - test.failed = True - test.set_status('Failed') - # Done state.update_progress_pane() @@ -685,6 +627,43 @@ def disk_self_test(state, test_objects) -> None: raise std.GenericAbort('Aborted') +def disk_smart_status_check(dev, mid_run=True) -> None: + """Check SMART status.""" + msg = None + color = None + disable_tests = False + + # Check SMART status and attributes + if not hw_smart.smart_status_ok(dev): + msg = 'Critical SMART error detected' + color = 'RED' + disable_tests = True + elif not hw_smart.check_attributes(dev, only_blocking=False): + # Non-blocking errors + msg = 'SMART attribute failure(s) detected' + color = 'YELLOW' + + # Log errors if detected + if msg: + msg = f'{msg}{" during diagnostics" if mid_run else ""}.' + LOG.warning(msg) + dev.add_note(msg, color) + + # Set Disk Attributes test result + for test in dev.tests: + if test.name == 'Disk Attributes': + test.failed = test.failed or msg + test.passed = not test.failed + if test.failed: + test.set_status('Failed') + elif 'N/A' not in test.status: + test.set_status('Passed') + + # Disable further testing if needed + if disable_tests: + dev.disable_disk_tests() + + def disk_surface_scan(state, test_objects) -> None: """Read-only disk surface scan using badblocks.""" LOG.info('Disk Surface Scan (badblocks)') @@ -889,10 +868,10 @@ def run_diags(state, menu, quick_mode=False) -> None: state.abort_testing() state.update_progress_pane() break - - # Run safety checks - if group.name.startswith('Disk'): - state.disk_safety_checks() + else: + # Run safety checks after disk tests + if group.name.startswith('Disk'): + state.disk_safety_checks() # Handle aborts if aborted: diff --git a/scripts/wk/hw/smart.py b/scripts/wk/hw/smart.py index 0b78af18..f4292b06 100644 --- a/scripts/wk/hw/smart.py +++ b/scripts/wk/hw/smart.py @@ -24,11 +24,6 @@ from wk.std import bytes_to_string, color_string, sleep LOG = logging.getLogger(__name__) -# Exception Classes -class CriticalHardwareError(RuntimeError): - """Exception used for critical hardware failures.""" - - # Functions def abort_self_test(dev) -> None: """Abort currently running non-captive self-test.""" @@ -335,8 +330,8 @@ def run_smart_self_test(test_obj, log_path) -> bool: build_self_test_report(test_obj) -def safety_checks(dev) -> None: - """Run safety checks and raise an exception if necessary.""" +def smart_status_ok(dev) -> bool: + """Check SMART attributes and overall assessment, returns bool.""" blocking_event_encountered = False update_smart_details(dev) @@ -371,9 +366,8 @@ def safety_checks(dev) -> None: dev.add_note(msg, 'RED') LOG.error('%s %s', dev.path, msg) - # Raise blocking exception if necessary - if blocking_event_encountered: - raise CriticalHardwareError(f'Critical error(s) for: {dev.path}') + # Done + return not blocking_event_encountered def self_test_in_progress(dev) -> bool: From 6642aad2c83c6cf993fe1dea8c98c5915e7ffc0c Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Sun, 1 May 2022 16:20:39 -0700 Subject: [PATCH 18/18] Update ddrescue-tui to use new Disk object vars --- scripts/wk/clone/ddrescue.py | 80 ++++++++++++++++++------------------ scripts/wk/hw/disk.py | 2 + 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/scripts/wk/clone/ddrescue.py b/scripts/wk/clone/ddrescue.py index f56854cf..bfd3af05 100644 --- a/scripts/wk/clone/ddrescue.py +++ b/scripts/wk/clone/ddrescue.py @@ -133,12 +133,12 @@ class BlockPair(): NOTE: source should be a wk.hw.obj.Disk() object and destination should be a pathlib.Path() object. """ - self.sector_size = source.details.get('phy-sec', 512) + self.sector_size = source.phy_sec self.source = source.path self.destination = destination self.map_data = {} self.map_path = None - self.size = source.details['size'] + self.size = source.size self.status = OrderedDict({ 'read-skip': 'Pending', 'read-full': 'Pending', @@ -150,9 +150,9 @@ class BlockPair(): # Set map path # e.g. '(Clone|Image)_Model[_p#]_Size[_Label].map' map_name = model if model else 'None' - if source.details['bus'] == 'Image': + if source.bus == 'Image': map_name = 'Image' - if source.details['parent']: + if source.parent: part_num = re.sub(r"^.*?(\d+)$", r"\1", source.path.name) map_name += f'_p{part_num}' size_str = std.bytes_to_string( @@ -160,8 +160,8 @@ class BlockPair(): use_binary=False, ) map_name += f'_{size_str.replace(" ", "")}' - if source.details.get('label', ''): - map_name += f'_{source.details["label"]}' + if source.raw_details.get('label', ''): + map_name += f'_{source.raw_details["label"]}' map_name = map_name.replace(' ', '_') map_name = map_name.replace('/', '_') if destination.is_dir(): @@ -316,8 +316,8 @@ class BlockPair(): # Mark future passes as skipped if applicable if percent == 100: status_keys = list(self.status.keys()) - for i in status_keys[status_keys.index(pass_name)+1:]: - self.status[status_keys[i]] = 'Skipped' + for pass_n in status_keys[status_keys.index(pass_name)+1:]: + self.status[pass_n] = 'Skipped' class State(): @@ -342,13 +342,13 @@ class State(): BlockPair( source=source, destination=destination, - model=self.source.details['model'], + model=self.source.model, working_dir=self.working_dir, )) def _get_clone_settings_path(self): """get Clone settings file path, returns pathlib.Path obj.""" - description = self.source.details['model'] + description = self.source.model if not description: description = self.source.path.name return pathlib.Path(f'{self.working_dir}/Clone_{description}.json') @@ -441,10 +441,10 @@ class State(): else: bail = False for key in ('model', 'serial'): - if settings['Source'][key] != self.source.details[key]: + if settings['Source'][key] != getattr(self.source, key): std.print_error(f"Clone settings don't match source {key}") bail = True - if settings['Destination'][key] != self.destination.details[key]: + if settings['Destination'][key] != getattr(self.destination, key): std.print_error(f"Clone settings don't match destination {key}") bail = True if bail: @@ -455,13 +455,13 @@ class State(): settings = CLONE_SETTINGS.copy() if not settings['Source']: settings['Source'] = { - 'model': self.source.details['model'], - 'serial': self.source.details['serial'], + 'model': self.source.model, + 'serial': self.source.serial, } if not settings['Destination']: settings['Destination'] = { - 'model': self.destination.details['model'], - 'serial': self.destination.details['serial'], + 'model': self.destination.model, + 'serial': self.destination.serial, } # Done @@ -621,7 +621,7 @@ class State(): for part in source_parts: report.append( f'{part.path.name:<9} ' - f'{std.bytes_to_string(part.details["size"], use_binary=False)}' + f'{std.bytes_to_string(part.size, use_binary=False)}' ) report.append(' ') @@ -880,7 +880,7 @@ class State(): # Add selected partition(s) for part in source_parts: - num_sectors = part.details['size'] / self.destination.details['log-sec'] + num_sectors = part.size / self.destination.log_sec num_sectors = math.ceil(num_sectors) part_num += 1 sfdisk_script.append( @@ -888,7 +888,7 @@ class State(): table_type=settings['Table Type'], dev_path=f'{dest_prefix}{part_num}', size=num_sectors, - details=part.details, + details=part.raw_details, ), ) @@ -981,7 +981,7 @@ class State(): # 1 LBA for the protective MBR # 33 LBAs each for the primary and backup GPT tables # Source: https://en.wikipedia.org/wiki/GUID_Partition_Table - required_size += (1 + 33 + 33) * self.destination.details['phy-sec'] + required_size += (1 + 33 + 33) * self.destination.phy_sec if settings['Create Boot Partition']: # 384MiB EFI System Partition and a 16MiB MS Reserved partition required_size += (384 + 16) * 1024**2 @@ -1004,7 +1004,7 @@ class State(): # Check destination size if self.mode == 'Clone': - destination_size = self.destination.details['size'] + destination_size = self.destination.size error_msg = 'A larger destination disk is required' else: # NOTE: Adding an extra 5% here to better ensure it will fit @@ -1311,13 +1311,13 @@ def build_directory_report(path): def build_disk_report(dev): """Build device report, returns list.""" - children = dev.details.get('children', []) + children = dev.raw_details.get('children', []) report = [] # Get widths widths = { - 'fstype': max(6, len(str(dev.details.get('fstype', '')))), - 'label': max(5, len(str(dev.details.get('label', '')))), + 'fstype': max(6, len(str(dev.filesystem))), + 'label': max(5, len(str(dev.raw_details.get('label', '')))), 'name': max(4, len(dev.path.name)), } for child in children: @@ -1332,10 +1332,10 @@ def build_disk_report(dev): # Disk details report.append(f'{dev.path.name} {dev.description}') report.append(' ') - dev_fstype = dev.details.get('fstype', '') - dev_label = dev.details.get('label', '') + dev_fstype = dev.filesystem + dev_label = dev.raw_details.get('label', '') dev_name = dev.path.name - dev_size = std.bytes_to_string(dev.details["size"], use_binary=False) + dev_size = std.bytes_to_string(dev.size, use_binary=False) # Partition details report.append( @@ -1684,11 +1684,10 @@ def get_object(path): obj = hw_disk.Disk(path) # Child/Parent check - parent = obj.raw_details['parent'] - if parent: + if obj.parent: std.print_warning(f'"{obj.path}" is a child device') - if std.ask(f'Use parent device "{parent}" instead?'): - obj = hw_disk.Disk(parent) + if std.ask(f'Use parent device "{obj.parent}" instead?'): + obj = hw_disk.Disk(obj.parent) elif path.is_dir(): obj = path elif path.is_file(): @@ -1736,7 +1735,7 @@ def get_table_type(disk): NOTE: If resulting table type is not GPT or MBR then an exception is raised. """ - table_type = str(disk.details.get('pttype', '')).upper() + table_type = str(disk.raw_details.get('pttype', '')).upper() table_type = table_type.replace('DOS', 'MBR') # Check type @@ -2239,23 +2238,22 @@ def select_disk(prompt, skip_disk=None): menu.add_action('Quit') for disk in disks: disable_option = False - size = disk.details["size"] + size = disk.size # Check if option should be disabled if skip_disk: - parent = skip_disk.details.get('parent', None) if (disk.path.samefile(skip_disk.path) - or (parent and disk.path.samefile(parent))): + or (skip_disk.parent and disk.path.samefile(skip_disk.parent))): disable_option = True # Add to menu menu.add_option( name=( f'{str(disk.path):<12} ' - f'{disk.details["bus"]:<5} ' + f'{disk.bus:<5} ' f'{std.bytes_to_string(size, decimals=1, use_binary=False):<8} ' - f'{disk.details["model"]} ' - f'{disk.details["serial"]}' + f'{disk.model} ' + f'{disk.serial}' ), details={'Disabled': disable_option, 'Object': disk}, ) @@ -2305,12 +2303,12 @@ def select_disk_parts(prompt, disk): return [disk] # Bail early if child device selected - if disk.details.get('parent', False): + if disk.parent: return [disk] # Add parts whole_disk_str = f'{str(disk.path):<14} (Whole device)' - for part in disk.details.get('children', []): + for part in disk.raw_details.get('children', []): size = part["size"] name = ( f'{str(part["path"]):<14} ' @@ -2333,7 +2331,7 @@ def select_disk_parts(prompt, disk): object_list.append(option['Path']) # Check if whole disk selected - if len(object_list) == len(disk.details.get('children', [])): + if len(object_list) == len(disk.raw_details.get('children', [])): # NOTE: This is not true if the disk has no partitions msg = f'Preserve partition table and unused space in {prompt.lower()}?' if std.ask(msg): diff --git a/scripts/wk/hw/disk.py b/scripts/wk/hw/disk.py index 471edc31..9325dbb3 100644 --- a/scripts/wk/hw/disk.py +++ b/scripts/wk/hw/disk.py @@ -42,6 +42,7 @@ class Disk: name: str = field(init=False) notes: list[str] = field(init=False, default_factory=list) path: Union[pathlib.Path, str] + parent: str = field(init=False) phy_sec: int = field(init=False) raw_details: dict[str, Any] = field(init=False) raw_smartctl: dict[str, Any] = field(init=False) @@ -127,6 +128,7 @@ class Disk: self.log_sec = self.raw_details.get('log-sec', 512) self.model = self.raw_details.get('model', 'Unknown Model') self.name = self.raw_details.get('name', self.path) + self.parent = self.raw_details.get('parent', None) self.phy_sec = self.raw_details.get('phy-sec', 512) self.serial = self.raw_details.get('serial', 'Unknown Serial') self.size = self.raw_details.get('size', -1)