diff --git a/scripts/wk/cfg/hw.py b/scripts/wk/cfg/hw.py index 64d1f333..0a84e1e0 100644 --- a/scripts/wk/cfg/hw.py +++ b/scripts/wk/cfg/hw.py @@ -14,6 +14,7 @@ ATTRIBUTE_COLORS = ( # NOTE: Force 4K read block size for disks >= 3TB BADBLOCKS_EXTRA_LARGE_DISK = 15 * 1024**4 BADBLOCKS_LARGE_DISK = 3 * 1024**4 +BADBLOCKS_MAX_ERRORS = 1 BADBLOCKS_REGEX = re.compile( r'^Pass completed, (\d+) bad blocks found. .(\d+)/(\d+)/(\d+) errors', re.IGNORECASE, diff --git a/scripts/wk/hw/diags.py b/scripts/wk/hw/diags.py index ae9739bc..75226472 100644 --- a/scripts/wk/hw/diags.py +++ b/scripts/wk/hw/diags.py @@ -11,7 +11,7 @@ import subprocess from docopt import docopt from wk import cfg, debug, exe, log, osticket, std -from wk.cfg.hw import STATUS_COLORS +from wk.cfg.hw import CPU_TEST_MINUTES, STATUS_COLORS from wk.hw import benchmark as hw_benchmark from wk.hw import cpu as hw_cpu from wk.hw import disk as hw_disk @@ -42,12 +42,6 @@ Options: -h --help Show this page -q --quick Skip menu and perform a quick check -t --test-mode Run diags in test mode - - --ignore-smart-errors NOT RECOMMENDED! - Only use if you have RTFM, - know what you're doing, - understand the risks, - and accept responsibililty. ''' LOG = logging.getLogger(__name__) IO_SIZE_SKIP_NAME = ( @@ -77,7 +71,13 @@ MENU_ACTIONS = ( MENU_ACTIONS_SECRET = ( 'Matrix', 'Tubes', + '?Secrets', ) +MENU_OPTIONS_SECRET = { + 'Ignore SMART errors': False, + 'Override CPU Testing Time (minutes)': CPU_TEST_MINUTES, + 'Override Surface Scan Error Limit': 1, +} MENU_OPTIONS_QUICK = ('Disk Attributes',) MENU_SETS = { 'Full Diagnostic': (*TEST_GROUPS,), @@ -107,6 +107,7 @@ class State(): self.log_dir: pathlib.Path | None = None self.ost = osticket.osTicket() self.progress_file: pathlib.Path | None = None + self.secret_menu = build_secret_menu() self.sensors: hw_sensors.Sensors = hw_sensors.Sensors() self.system: hw_system.System | None = None self.test_groups: list[TestGroup] = [] @@ -370,6 +371,24 @@ def build_menu(cli_mode=False, quick_mode=False) -> cli.Menu: return menu +def build_secret_menu() -> cli.Menu: + title_text = [ + ansi.color_string(('Expert Settings', "(It's a secret to everyone!)"), ('ORANGE', None)), + ' ', + ansi.color_string( + [' !!', 'These settings can cause', 'MAJOR DAMAGE', 'to systems'], + [None, 'YELLOW', 'RED', 'YELLOW'], + ), + ' !! Please read the manual before making changes', + ' ', + ] + menu = cli.Menu(title='\n'.join(title_text)) + for option, value in MENU_OPTIONS_SECRET.items(): + menu.add_option(option, {'Selected': False, 'Value': value}) + menu.add_action('Main Menu') + return menu + + def cpu_tests_init(state: State) -> None: """Initialize CPU tests.""" sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out') @@ -426,6 +445,11 @@ def cpu_test_mprime(state: State, test_object, test_mode=False) -> None: test_minutes = cfg.hw.CPU_TEST_MINUTES if test_mode: test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT + if state.secret_menu.options.get( + 'Override CPU Testing Time (minutes)')['Selected']: + test_minutes = int( + state.secret_menu.options.get('Override CPU Testing Time (minutes)')['Value'], + ) # Bail early if test_object.disabled: @@ -502,6 +526,11 @@ def cpu_test_sysbench(state: State, test_object, test_mode=False) -> None: test_minutes = cfg.hw.CPU_TEST_MINUTES if test_mode: test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT + if state.secret_menu.options.get( + 'Override CPU Testing Time (minutes)')['Selected']: + test_minutes = int( + state.secret_menu.options.get('Override CPU Testing Time (minutes)')['Value'], + ) # Bail early if test_object.disabled: @@ -758,7 +787,12 @@ def disk_surface_scan(state: State, test_objects, test_mode=False) -> None: """Read-only disk surface scan using badblocks.""" LOG.info('Disk Surface Scan (badblocks)') aborted = False + max_errors = 1 threads = [] + if state.secret_menu.options.get( + 'Override Surface Scan Error Limit')['Selected']: + max_errors = state.secret_menu.options.get( + 'Override Surface Scan Error Limit')['Value'] # Update panes state.update_title_text( @@ -775,7 +809,7 @@ def disk_surface_scan(state: State, test_objects, test_mode=False) -> None: # Start thread test_log = f'{state.log_dir}/{test.dev.path.name}_badblocks.log' threads.append(exe.start_thread( - hw_surface_scan.run_scan, args=(test, test_log, test_mode), + hw_surface_scan.run_scan, args=(test, test_log, test_mode, max_errors), )) # Show progress @@ -832,7 +866,6 @@ def main() -> None: # Init menu = build_menu(cli_mode=args['--cli'], quick_mode=args['--quick']) state = State(test_mode=args['--test-mode']) - state.override_all_smart_errors = args['--ignore-smart-errors'] # Quick Mode if args['--quick']: @@ -868,9 +901,11 @@ def main() -> None: state.ui.update_clock() # Secrets + if '?Secrets' in selection: + state.secret_menu.settings_select() if 'Matrix' in selection: screensaver('matrix') - elif 'Tubes' in selection: + if 'Tubes' in selection: # Tubes ≈≈ Pipes? screensaver('pipes') diff --git a/scripts/wk/hw/surface_scan.py b/scripts/wk/hw/surface_scan.py index ef99cc20..9440c5a1 100644 --- a/scripts/wk/hw/surface_scan.py +++ b/scripts/wk/hw/surface_scan.py @@ -70,7 +70,7 @@ def check_surface_scan_results(test_obj, log_path) -> None: test_obj.set_status('Unknown') -def run_scan(test_obj, log_path, test_mode=False) -> None: +def run_scan(test_obj, log_path, test_mode=False, max_errors=1) -> None: """Run surface scan and handle exceptions.""" block_size = '1024' dev = test_obj.dev @@ -89,8 +89,17 @@ def run_scan(test_obj, log_path, test_mode=False) -> None: or dev.size >= BADBLOCKS_LARGE_DISK): block_size = '4096' + # Max errors + if int(max_errors) <= 0: + max_errors = '' + else: + max_errors = f'-e{max_errors}' + # Start scan - cmd = ['sudo', 'badblocks', '-sv', '-b', block_size, '-e', '1', dev_path] + cmd = ['sudo', 'badblocks', '-sv', '-b', block_size] + if max_errors: + cmd.append(max_errors) + cmd.append(dev_path) if test_mode: # Only test a limited scope instead of the whole device cmd.append(TEST_MODE_BADBLOCKS_LIMIT)