From 1a3b57da663b97ae1a5dd9f61789b49894fa50a1 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Thu, 8 Apr 2021 19:41:05 -0600 Subject: [PATCH 1/6] Poweroff source drives after ddrescue errors Addresses issue #165 --- scripts/wk/hw/ddrescue.py | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 496ac01f..6a78128b 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -1847,10 +1847,43 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): # pylint: disable=too-many-statements """Run ddrescue using passed settings.""" cmd = build_ddrescue_cmd(block_pair, pass_name, settings) + poweroff_source_after_idle = True state.update_progress_pane('Active') std.clear_screen() warning_message = '' + def _poweroff_source_drive(idle_minutes=120): + """Power off source drive after a while.""" + source_dev = state.source.path.name + + # Sleep + i = 0 + while i < idle_minutes*60: + if not poweroff_source_after_idle: + # Countdown canceled, exit without powering-down drives + return + if i % 600 == 0 and i > 0: + if i == 600: + std.print_standard(' ', flush=True) + std.print_warning( + f'Powering off source in {int((idle_minutes*60-i)/60)} minutes...', + ) + std.sleep(5) + i += 5 + + # Power off drive + cmd = f'echo 1 | sudo tee /sys/block/{source_dev}/device/delete' + proc = exe.run_program(cmd, check=False, shell=True) + if proc.returncode: + LOG.error('Failed to poweroff source %s', state.source.path) + std.print_error(f'Failed to poweroff source {state.source.path}') + else: + LOG.info('Powered off source %s', state.source.path) + std.print_error(f'Powered off source {state.source.path}') + std.print_standard( + 'Press Enter to return to main menu...', end='', flush=True, + ) + def _update_smart_pane(): """Update SMART pane every 30 seconds.""" state.source.update_smart_details() @@ -1922,6 +1955,7 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): # Check result if proc.poll(): # True if return code is non-zero (poll() returns None if still running) + poweroff_thread = exe.start_thread(_poweroff_source_drive) warning_message = 'Error(s) encountered, see message above' if warning_message: print(' ') @@ -1934,6 +1968,13 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): if str(proc.poll()) != '0': state.update_progress_pane('NEEDS ATTENTION') std.pause('Press Enter to return to main menu...') + + # Stop source poweroff countdown + std.print_standard('Stopping device poweroff countdown...', flush=True) + poweroff_source_after_idle = False + poweroff_thread.join() + + # Done raise std.GenericAbort() From c452256fe7e9c066bf75cea46465b6e51868557b Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Thu, 8 Apr 2021 20:49:21 -0600 Subject: [PATCH 2/6] Fix pylint issues in wk/hw/ddrescue.py --- scripts/wk/hw/ddrescue.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 6a78128b..a0e7a92b 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -389,10 +389,10 @@ class State(): with open(settings_file, 'r') as _f: try: settings = json.loads(_f.read()) - except (OSError, json.JSONDecodeError): + except (OSError, json.JSONDecodeError) as err: LOG.error('Failed to load clone settings') std.print_error('Invalid clone settings detected.') - raise std.GenericAbort() + raise std.GenericAbort() from err # Check settings if settings: @@ -436,9 +436,9 @@ class State(): try: with open(settings_file, 'w') as _f: json.dump(settings, _f) - except OSError: + except OSError as err: std.print_error('Failed to save clone settings') - raise std.GenericAbort() + raise std.GenericAbort() from err def add_clone_block_pairs(self): """Add device to device block pairs and set settings if necessary.""" @@ -645,11 +645,11 @@ class State(): def get_rescued_size(self): """Get total rescued size from all block pairs, returns int.""" - return sum([pair.get_rescued_size() for pair in self.block_pairs]) + return sum(pair.get_rescued_size() for pair in self.block_pairs) def get_total_size(self): """Get total size of all block_pairs in bytes, returns int.""" - return sum([pair.size for pair in self.block_pairs]) + return sum(pair.size for pair in self.block_pairs) def init_recovery(self, docopt_args): """Select source/dest and set env.""" @@ -759,12 +759,12 @@ class State(): """Check if all block_pairs meet the pass threshold, returns bool.""" threshold = cfg.ddrescue.AUTO_PASS_THRESHOLDS[pass_name] return all( - [p.get_percent_recovered() >= threshold for p in self.block_pairs], + p.get_percent_recovered() >= threshold for p in self.block_pairs ) def pass_complete(self, pass_name): """Check if all block_pairs completed pass_name, returns bool.""" - return all([p.pass_complete(pass_name) for p in self.block_pairs]) + return all(p.pass_complete(pass_name) for p in self.block_pairs) def prep_destination(self, source_parts, dry_run=True): """Prep destination as necessary.""" @@ -898,15 +898,15 @@ class State(): """Run safety checks for destination and abort if necessary.""" try: self.destination.safety_checks() - except hw_obj.CriticalHardwareError: + except hw_obj.CriticalHardwareError as err: std.print_error( f'Critical error(s) detected for: {self.destination.path}', ) - raise std.GenericAbort() + raise std.GenericAbort() from err def safety_check_size(self): """Run size safety check and abort if necessary.""" - required_size = sum([pair.size for pair in self.block_pairs]) + required_size = sum(pair.size for pair in self.block_pairs) settings = self._load_settings() if self.mode == 'Clone' else {} # Increase required_size if necessary @@ -1136,7 +1136,7 @@ def build_block_pair_report(block_pairs, settings): ['BLUE', None], ), ) - if any([pair.get_rescued_size() > 0 for pair in block_pairs]): + if any(pair.get_rescued_size() > 0 for pair in block_pairs): notes.append( std.color_string( ['NOTE:', 'Resume data loaded from map file(s).'], @@ -1544,7 +1544,7 @@ def get_etoc(): output = tmux.capture_pane() # Search for EToC delta - matches = re.findall(f'remaining time:.*$', output, re.MULTILINE) + matches = re.findall(r'remaining time:.*$', output, re.MULTILINE) if matches: match = REGEX_REMAINING_TIME.search(matches[-1]) if match.group('na'): @@ -1679,9 +1679,9 @@ def get_working_dir(mode, destination, force_local=False): if mode == 'Image': try: path = pathlib.Path(destination).resolve() - except TypeError: + except TypeError as err: std.print_error(f'Invalid destination: {destination}') - raise std.GenericAbort() + raise std.GenericAbort() from err if path.exists() and fstype_is_ok(path, map_dir=False): working_dir = path elif mode == 'Clone' and not force_local: @@ -1713,6 +1713,7 @@ def get_working_dir(mode, destination, force_local=False): def main(): + # pylint: disable=too-many-branches """Main function for ddrescue TUI.""" args = docopt(DOCSTRING) log.update_log_path(dest_name='ddrescue-TUI', timestamp=True) @@ -1735,7 +1736,6 @@ def main(): # Show menu while True: - action = None selection = main_menu.advanced_select() # Change settings @@ -2092,7 +2092,7 @@ def select_disk(prompt, skip_disk=None): def select_disk_parts(prompt, disk): """Select disk parts from list, returns list of Disk().""" - title = std.color_string(f'ddrescue TUI: Partition Selection', 'GREEN') + title = std.color_string('ddrescue TUI: Partition Selection', 'GREEN') title += f'\n\nDisk: {disk.path} {disk.description}' menu = std.Menu(title) menu.separator = ' ' @@ -2115,7 +2115,7 @@ def select_disk_parts(prompt, disk): for option in menu.options.values(): option['Selected'] = False elif 'Proceed' in selection: - if any([option['Selected'] for option in menu.options.values()]): + if any(option['Selected'] for option in menu.options.values()): # At least one partition/device selected/device selected break elif 'Quit' in selection: @@ -2174,7 +2174,7 @@ def select_path(prompt): ) menu.separator = ' ' menu.add_action('Quit') - menu.add_option(f'Current directory') + menu.add_option('Current directory') menu.add_option('Enter manually') path = None From 18bc139d255fd9a7240771c99c79912833b5bc01 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Thu, 8 Apr 2021 20:55:28 -0600 Subject: [PATCH 3/6] Add 'Detect drives' option to ddrescue-tui --- scripts/wk/hw/ddrescue.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index a0e7a92b..943edb67 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -39,6 +39,12 @@ Options: --force-local-map Skip mounting shares and save map to local drive --start-fresh Ignore previous runs and start new recovery ''' +DETECT_DRIVES_NOTICE = ''' +This option will force the drive controllers to rescan for devices. +The method used is not 100% reliable and may cause issues. If you see +any script errors or crashes after running this option then please +restart the computer and try again. +''' CLONE_SETTINGS = { 'Source': None, 'Destination': None, @@ -70,6 +76,7 @@ LOG = logging.getLogger(__name__) MENU_ACTIONS = ( 'Start', f'Change settings {std.color_string("(experts only)", "YELLOW")}', + f'Detect drives {std.color_string("(experts only)", "YELLOW")}', 'Quit') MENU_TOGGLES = { 'Auto continue (if recovery % over threshold)': True, @@ -1748,6 +1755,15 @@ def main(): else: break + # Detect drives + if 'Detect drives' in selection[0]: + std.clear_screen() + std.print_warning(DETECT_DRIVES_NOTICE) + if std.ask('Are you sure you proceed?'): + std.print_standard('Forcing controllers to rescan for devices...') + cmd = 'echo "- - -" | sudo tee /sys/class/scsi_host/host*/scan' + exe.run_program(cmd, check=False, shell=True) + # Start recovery if 'Start' in selection: std.clear_screen() From 43fd30322e08bcc72b9aaae842f5e049d4e0e748 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Thu, 8 Apr 2021 22:43:13 -0600 Subject: [PATCH 4/6] Expand checks for missing source or destination Addresses issue #155 --- scripts/wk/hw/ddrescue.py | 81 ++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 943edb67..966d65a4 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -290,6 +290,7 @@ class BlockPair(): class State(): + # pylint: disable=too-many-public-methods """Object for tracking hardware diagnostic data.""" def __init__(self): self.block_pairs = [] @@ -1043,6 +1044,8 @@ class State(): def update_top_panes(self): """(Re)create top source/destination panes.""" + source_exists = True + dest_exists = True width = tmux.get_pane_size()[0] width = int(width / 2) - 1 @@ -1072,6 +1075,15 @@ class State(): # Done return string + # Check source/dest existance + if self.source: + source_exists = self.source.path.exists() + if self.destination: + if isinstance(self.destination, hw_obj.Disk): + dest_exists = self.destination.path.exists() + else: + dest_exists = self.destination.exists() + # Kill destination pane if 'Destination' in self.panes: tmux.kill_pane(self.panes.pop('Destination')) @@ -1083,9 +1095,9 @@ class State(): tmux.respawn_pane( self.panes['Source'], text=std.color_string( - ['Source', source_str], - ['BLUE', None], - sep='\n', + ['Source', '' if source_exists else ' (Missing)', '\n', source_str], + ['BLUE', 'RED', None, None], + sep='', ), ) @@ -1098,9 +1110,9 @@ class State(): vertical=False, target_id=self.panes['Source'], text=std.color_string( - ['Destination', dest_str], - ['BLUE', None], - sep='\n', + ['Destination', '' if dest_exists else ' (Missing)', '\n', dest_str], + ['BLUE', 'RED', None, None], + sep='', ), ) @@ -1421,25 +1433,6 @@ def check_destination_health(destination): return result -def check_for_missing_items(state): - """Check if source or destination dissapeared.""" - items = { - 'Source': state.source, - 'Destination': state.destination, - } - for name, item in items.items(): - if not item: - continue - if hasattr(item, 'path'): - if not item.path.exists(): - std.print_error(f'{name} disappeared') - elif hasattr(item, 'exists'): - if not item.exists(): - std.print_error(f'{name} disappeared') - else: - LOG.error('Unknown %s type: %s', name, item) - - def clean_working_dir(working_dir): """Clean working directory to ensure a fresh recovery session. @@ -1719,6 +1712,30 @@ def get_working_dir(mode, destination, force_local=False): return working_dir +def is_missing_source_or_destination(state): + """Check if source or destination dissapeared, returns bool.""" + missing = False + items = { + 'Source': state.source, + 'Destination': state.destination, + } + for name, item in items.items(): + if not item: + continue + if hasattr(item, 'path'): + if not item.path.exists(): + missing = True + std.print_error(f'{name} disappeared') + elif hasattr(item, 'exists'): + if not item.exists(): + missing = True + std.print_error(f'{name} disappeared') + else: + LOG.error('Unknown %s type: %s', name, item) + + return missing + + def main(): # pylint: disable=too-many-branches """Main function for ddrescue TUI.""" @@ -1738,11 +1755,12 @@ def main(): try: state.init_recovery(args) except (FileNotFoundError, std.GenericAbort): - check_for_missing_items(state) + is_missing_source_or_destination(state) std.abort() # Show menu while True: + state.update_top_panes() selection = main_menu.advanced_select() # Change settings @@ -1973,6 +1991,7 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): # True if return code is non-zero (poll() returns None if still running) poweroff_thread = exe.start_thread(_poweroff_source_drive) warning_message = 'Error(s) encountered, see message above' + state.update_top_panes() if warning_message: print(' ') print(' ') @@ -1995,11 +2014,19 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): def run_recovery(state, main_menu, settings_menu, dry_run=True): + # pylint: disable=too-many-branches """Run recovery passes.""" atexit.register(state.save_debug_reports) attempted_recovery = False auto_continue = False + # Bail early + if is_missing_source_or_destination(state): + state.update_top_panes() + std.print_standard('') + std.pause('Press Enter to return to main menu...') + return + # Get settings for name, details in main_menu.toggles.items(): if 'Auto continue' in name and details['Selected']: @@ -2036,7 +2063,7 @@ def run_recovery(state, main_menu, settings_menu, dry_run=True): try: run_ddrescue(state, pair, pass_name, settings, dry_run=dry_run) except (FileNotFoundError, KeyboardInterrupt, std.GenericAbort): - check_for_missing_items(state) + is_missing_source_or_destination(state) abort = True break From 5a2d35d3ccb2b818968f069dde63432da330856d Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Thu, 8 Apr 2021 23:09:00 -0600 Subject: [PATCH 5/6] Prevent recovering to wrong devices or paths Before starting a recovery run verify the source and destination have not changed. This will prevent issues on some extreme edge cases but the main goal is for disappearing source drives with heavy damage. e.g. A very damaged source drive disappears mid-recovery, drops off and before would need a restart, or unplug/replug, to continue. Now we can attempt to re-detect the drive and resume recovery without leaving the script. If for some reason the drive order were to change then we'll avoid using the wrong source or destination device. --- scripts/wk/hw/ddrescue.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 966d65a4..0a063a98 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -1719,6 +1719,8 @@ def is_missing_source_or_destination(state): 'Source': state.source, 'Destination': state.destination, } + + # Check items for name, item in items.items(): if not item: continue @@ -1733,9 +1735,38 @@ def is_missing_source_or_destination(state): else: LOG.error('Unknown %s type: %s', name, item) + # Update top panes + state.update_top_panes() + + # Done return missing +def source_or_destination_changed(state): + """Verify the source and destination objects are still valid.""" + changed = False + + # Compare objects + for obj in (state.source, state.destination): + if not obj: + changed = True + elif hasattr(obj, 'exists'): + # Assuming dest path + changed = changed or not obj.exists() + elif isinstance(obj, hw_obj.Disk): + compare_dev = hw_obj.Disk(obj.path) + for key in ('model', 'serial'): + changed = changed or obj.details[key] != compare_dev.details[key] + + # Update top panes + state.update_top_panes() + + # Done + if changed: + std.print_error('Source and/or Destination changed') + return changed + + def main(): # pylint: disable=too-many-branches """Main function for ddrescue TUI.""" @@ -1760,7 +1791,6 @@ def main(): # Show menu while True: - state.update_top_panes() selection = main_menu.advanced_select() # Change settings @@ -1781,6 +1811,8 @@ def main(): std.print_standard('Forcing controllers to rescan for devices...') cmd = 'echo "- - -" | sudo tee /sys/class/scsi_host/host*/scan' exe.run_program(cmd, check=False, shell=True) + if source_or_destination_changed(state): + std.abort() # Start recovery if 'Start' in selection: @@ -2022,10 +2054,12 @@ def run_recovery(state, main_menu, settings_menu, dry_run=True): # Bail early if is_missing_source_or_destination(state): - state.update_top_panes() std.print_standard('') std.pause('Press Enter to return to main menu...') return + if source_or_destination_changed(state): + std.print_standard('') + std.abort() # Get settings for name, details in main_menu.toggles.items(): From 830e088ccffc200deafadb47450a31ae3ab7d230 Mon Sep 17 00:00:00 2001 From: 2Shirt <2xShirt@gmail.com> Date: Thu, 8 Apr 2021 23:17:00 -0600 Subject: [PATCH 6/6] Report if the dest starts failing during recovery --- scripts/wk/hw/ddrescue.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/wk/hw/ddrescue.py b/scripts/wk/hw/ddrescue.py index 0a063a98..91f01f2c 100644 --- a/scripts/wk/hw/ddrescue.py +++ b/scripts/wk/hw/ddrescue.py @@ -1984,6 +1984,7 @@ def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True): if warning_message: # Error detected on destination, stop recovery exe.stop_process(proc) + std.print_error(warning_message) break if _i % 60 == 0: # Clear ddrescue pane