WizardKit/scripts/wk/clone/ddrescue.py

659 lines
19 KiB
Python

"""WizardKit: ddrescue TUI"""
# vim: sts=2 sw=2 ts=2
import atexit
import datetime
import logging
import os
import pathlib
import subprocess
import time
from random import randint
import pytz
from docopt import docopt
from wk import cfg, exe, io, log, std
from wk.cfg.ddrescue import DDRESCUE_SPECIFIC_PASS_SETTINGS
from wk.clone import menus
from wk.clone.state import State
from wk.hw import disk as hw_disk
from wk.hw.smart import (
check_attributes,
smart_status_ok,
update_smart_details,
)
from wk.ui import ansi, cli
# STATIC VARIABLES
LOG = logging.getLogger(__name__)
DOCSTRING = f'''{cfg.main.KIT_NAME_FULL}: ddrescue TUI
Usage:
ddrescue-tui
ddrescue-tui [options] (clone|image) [<source> [<destination>]]
ddrescue-tui (-h | --help)
Options:
-h --help Show this page
-s --dry-run Print commands to be used instead of running them
--force-local-map Skip mounting shares and save map to local drive
--start-fresh Ignore previous runs and start new recovery
'''
DETECT_DRIVES_NOTICE = '''
This option will force the drive controllers to rescan for devices.
The method used is not 100% reliable and may cause issues. If you see
any script errors or crashes after running this option then please
restart the computer and try again.
'''
DDRESCUE_OUTPUT_HEIGHT = 14
INITIAL_SKIP_MIN = 64 * 1024 # This is ddrescue's minimum accepted value
PLATFORM = std.PLATFORM
TIMEZONE = pytz.timezone(cfg.main.LINUX_TIME_ZONE)
# Functions
def build_ddrescue_cmd(block_pair, pass_name, settings_menu) -> list[str]:
"""Build ddrescue cmd using passed details, returns list."""
cmd = ['sudo', 'ddrescue']
if (block_pair.destination.is_block_device()
or block_pair.destination.is_char_device()):
cmd.append('--force')
cmd.extend(DDRESCUE_SPECIFIC_PASS_SETTINGS.get(pass_name, []))
# Fix domain size based on starting position
domain_size = block_pair.size
if settings_menu.options['--input-position']['Selected']:
settings_menu.options['--reverse']['Selected'] = False
input_position = std.string_to_bytes(
settings_menu.options['--input-position']['Value'],
)
domain_size -= input_position
cmd.append(f'--size={domain_size}')
# Determine skip sizes
if settings_menu.options['--skip-size']['Selected']:
skip_sizes = settings_menu.options['--skip-size']['Value'].split(',')
skip_sizes = [float(s) for s in skip_sizes]
initial_skip = max(INITIAL_SKIP_MIN, int(block_pair.size * skip_sizes[0]))
max_skip = min(int(block_pair.size * skip_sizes[1]), domain_size)
max_skip = max(INITIAL_SKIP_MIN, max_skip)
cmd.append(f'--skip-size={initial_skip},{max_skip}')
cmd.extend(get_ddrescue_settings(settings_menu))
# Add source physical sector size (if possible)
cmd.append(f'--sector-size={block_pair.sector_size}')
# Generate test map if needed
if '--test-mode' in cmd:
cmd.remove('--test-mode')
if not block_pair.test_map:
block_pair.test_map = block_pair.map_path.with_stem(
f'{block_pair.map_path.stem}-testing'
)
generate_test_map(map_path=block_pair.test_map, size=domain_size)
cmd.append(f'--test-mode={block_pair.test_map}')
# Add block pair and map file
if PLATFORM == 'Darwin':
# Use Raw disks if possible
for dev in (block_pair.source, block_pair.destination):
raw_dev = pathlib.Path(dev.with_name(f'r{dev.name}'))
if raw_dev.exists():
cmd.append(raw_dev)
else:
cmd.append(dev)
else:
cmd.append(block_pair.source)
cmd.append(block_pair.destination)
cmd.append(block_pair.map_path)
# Done
LOG.debug('ddrescue cmd: %s', cmd)
return cmd
def check_destination_health(destination) -> str:
"""Check destination health, returns str."""
result = ''
# Bail early
if not isinstance(destination, hw_disk.Disk):
# Return empty string
return result
# Check for critical errors
if not smart_status_ok(destination):
result = 'Critical error(s) detected for: {destination.path}'
# Check for minor errors
if not check_attributes(destination, only_blocking=False):
result = f'Attribute error(s) detected for: {destination.path}'
# Done
return result
def generate_test_map(map_path: pathlib.Path, size: int) -> None:
"""Generate test map with roughly 20% of the space marked as bad."""
chunk = 2*1024**2
output = [
'# Mapfile. Created by WizardKit',
'0x0 ? 1',
]
position = 0
# Generate "holes"
steps, remainder = divmod(size, chunk)
for _ in range(steps):
bad = randint(1, 5) % 5 == 0
output.append(f'{hex(position)} {hex(chunk)} {"-" if bad else "+"}')
position += chunk
if remainder:
output.append(f'{hex(position)} {hex(remainder)} +')
# Save map
map_path.write_text('\n'.join(output), encoding='utf-8')
def get_ddrescue_settings(settings_menu) -> list:
"""Get ddrescue settings from menu selections, returns list."""
settings = []
# Check menu selections
for name, details in settings_menu.options.items():
if name == '--skip-size':
continue
if details['Selected']:
if 'Value' in details:
settings.append(f'{name}={details["Value"]}')
else:
settings.append(name)
# Done
return settings
def finalize_recovery(state: State, dry_run: bool = True) -> None:
"""Show recovery finalization options."""
zero_fill_destination(state, dry_run=dry_run)
if state.mode == 'Clone':
relocate_backup_gpt(state, dry_run=dry_run)
def is_missing_source_or_destination(state) -> bool:
"""Check if source or destination dissapeared, returns bool."""
missing = False
items = {
'Source': state.source,
'Destination': state.destination,
}
# Check items
for name, item in items.items():
if not item:
continue
if hasattr(item, 'path'):
if not item.path.exists():
missing = True
cli.print_error(f'{name} disappeared')
elif hasattr(item, 'exists'):
if not item.exists():
missing = True
cli.print_error(f'{name} disappeared')
else:
LOG.error('Unknown %s type: %s', name, item)
# Update top panes
state.update_top_panes()
# Done
return missing
def main() -> None:
"""Main function for ddrescue TUI."""
args = docopt(DOCSTRING)
# Log setup
log_dir = log.format_log_path()
log_dir = pathlib.Path(
f'{log_dir.parent}/'
f'ddrescue-TUI_{time.strftime("%Y-%m-%d_%H%M%S%z")}/'
)
log.update_log_path(
dest_dir=log_dir,
dest_name='main',
keep_history=False,
timestamp=False,
)
LOG.info('ddrescue-tui Start')
# Check if running inside tmux
if 'TMUX' not in os.environ:
LOG.error('tmux session not found')
raise RuntimeError('tmux session not found')
# Init
state = State()
try:
state.init_recovery(args)
except (FileNotFoundError, std.GenericAbort):
is_missing_source_or_destination(state)
cli.abort()
# Show menu
main_menu = menus.main()
settings_menu = menus.settings(state.mode)
while True:
selection = main_menu.advanced_select()
# Change settings
if 'Change settings' in selection[0]:
while True:
selection = settings_menu.settings_select()
if 'Load Preset' in selection:
# Rebuild settings menu using preset
settings_menu = menus.settings(state.mode, silent=False)
else:
break
# Detect drives
if 'Detect drives' in selection[0]:
cli.clear_screen()
cli.print_warning(DETECT_DRIVES_NOTICE)
if cli.ask('Are you sure you proceed?'):
cli.print_standard('Forcing controllers to rescan for devices...')
cmd = 'echo "- - -" | sudo tee /sys/class/scsi_host/host*/scan'
exe.run_program([cmd], check=False, shell=True)
if source_or_destination_changed(state):
cli.abort()
# Start recovery
if 'Start' in selection:
cli.clear_screen()
run_recovery(state, main_menu, settings_menu, dry_run=args['--dry-run'])
# Quit
if 'Quit' in selection:
total_percent = state.get_percent_recovered()
# Confirm exit if recovery is less than 100%
if total_percent < 100:
cli.print_warning('Recovery is less than 100%')
if not cli.ask('Are you sure you want to quit?'):
continue
finalize_recovery(state, dry_run=args['--dry-run'])
break
# Save results to log
LOG.info('')
for line in state.generate_report():
LOG.info(' %s', ansi.strip_colors(line))
def relocate_backup_gpt(state: State, dry_run: bool = True) -> None:
"""Relocate backup GPT on the destination if applicable and approved."""
cmd = ['sudo', 'sfdisk', '--relocate', 'gpt-bak-std', state.destination.path]
state.destination.update_details(skip_children=False)
# Safety checks
## Breakdown of below tests:
## Only offer this option when cloning a whole, non-child device
## where the source is smaller than the destination
## and both the source and destination are using a GUID_Partition_Table
if not (
len(state.block_pairs) == 1
and str(state.destination.raw_details.get('pttype', 'Unknown')).lower() == 'gpt'
and state.source.size < state.destination.size
and not state.source.parent
and str(state.source.raw_details.get('pttype', 'Unknown')).lower() == 'gpt'
and cli.ask('Relocate backup GPT to the end of the device?')
):
LOG.warning('Refusing to attempt a backup GPT relocation.')
return
# Dry run
if dry_run:
cli.print_standard(f'Dry-run: Relocate GPT with command: {cmd}')
return
# Relocate GPT data
proc = exe.run_program(cmd, check=False)
if proc.returncode:
cli.print_error('ERROR: Failed to relocate backup GPT.')
LOG.error('sfdisk result: %s, %s', proc.stdout, proc.stderr)
def run_ddrescue(state, block_pair, pass_name, settings, dry_run=True) -> None:
"""Run ddrescue using passed settings."""
cmd = build_ddrescue_cmd(block_pair, pass_name, settings)
poweroff_source_after_idle = True
state.update_progress_pane('Active')
state.ui.clear_current_pane()
state.ui.clear_on_resize = True
warning_message = ''
def _poweroff_source_drive(idle_minutes) -> None:
"""Power off source drive after a while."""
source_dev = state.source.path
# Bail early
if PLATFORM == 'Darwin':
return
# Sleep
for i in range(1, idle_minutes*60, 1):
if not poweroff_source_after_idle:
# Countdown canceled, exit without powering-down drives
return
if i % 60 == 0:
cli.print_warning(
f'Powering off source in {int((idle_minutes*60-i)/60)} minutes...',
)
std.sleep(1)
# Power off drive
cmd = ['sudo', 'hdparm', '-Y', source_dev]
proc = exe.run_program(cmd, check=False)
if proc.returncode:
cli.print_error(f'Failed to poweroff source {source_dev}')
else:
cli.print_warning(f'Powered off source {source_dev}')
cli.print_standard(
'Press Enter to return to main menu...', end='', flush=True,
)
def _update_smart_panes() -> None:
"""Update SMART panes every 30 seconds."""
now = datetime.datetime.now(tz=TIMEZONE).strftime('%Y-%m-%d %H:%M %Z')
for dev_str in ('source', 'destination'):
dev = getattr(state, dev_str)
# Safety check
if not hasattr(dev, 'attributes'):
continue
# Update SMART data
out_path = f'{state.log_dir}/smart_{dev_str}.out'
update_smart_details(dev)
with open(out_path, 'w', encoding='utf-8') as _f:
_f.write(
ansi.color_string(
['SMART Attributes', f'Updated: {now}\n'],
['BLUE', 'YELLOW'],
sep='\t\t',
),
)
_f.write('\n'.join(dev.generate_report(header=False)))
# Dry run
if dry_run:
LOG.info('ddrescue cmd: %s', cmd)
return
# Start ddrescue and ddrescueview (if enabled)
proc = exe.popen_program(cmd)
if (
block_pair.view_map
and (not block_pair.view_proc or block_pair.view_proc.poll() is not None)
):
block_pair.view_proc = exe.popen_program(
['ddrescueview', '-r', '5s', block_pair.map_path],
pipe=True,
)
# ddrescue loop
_i = 0
while True:
if _i % 30 == 0:
# Update SMART pane
_update_smart_panes()
# Check destination
warning_message = check_destination_health(state.destination)
if warning_message:
# Error detected on destination, stop recovery
proc.terminate()
cli.print_error(warning_message)
break
_i += 1
# Update progress
block_pair.update_progress(pass_name)
state.update_progress_pane('Active')
# Check if complete
try:
proc.wait(timeout=1)
break
except KeyboardInterrupt:
# Wait a bit to let ddrescue exit safely
LOG.warning('ddrescue stopped by user')
warning_message = 'Aborted'
std.sleep(2)
proc.terminate()
break
except subprocess.TimeoutExpired:
# Continue to next loop to update panes
pass
else:
# Done
std.sleep(1)
break
# Update progress
# NOTE: Using 'Active' here to avoid flickering between block pairs
block_pair.update_progress(pass_name)
state.update_progress_pane('Active')
state.ui.clear_on_resize = False
# Check result
if proc.poll():
# True if return code is non-zero (poll() returns None if still running)
poweroff_thread = exe.start_thread(
_poweroff_source_drive,
[cfg.ddrescue.DRIVE_POWEROFF_TIMEOUT],
)
warning_message = 'Error(s) encountered, see message above'
state.update_top_panes()
if warning_message:
cli.print_standard(' ')
cli.print_standard(' ')
cli.print_error('DDRESCUE PROCESS HALTED')
cli.print_standard(' ')
cli.print_warning(warning_message)
# Needs attention?
if str(proc.poll()) != '0':
state.update_progress_pane('NEEDS ATTENTION')
cli.pause('Press Enter to return to main menu...')
# Stop source poweroff countdown
cli.print_standard('Stopping device poweroff countdown...', flush=True)
poweroff_source_after_idle = False
poweroff_thread.join() # type: ignore[reportUnboundVariable]
# Done
raise std.GenericAbort()
def run_recovery(state: State, main_menu, settings_menu, dry_run=True) -> None:
"""Run recovery passes."""
atexit.register(state.save_debug_reports)
attempted_recovery = False
auto_continue = False
# Bail early
if is_missing_source_or_destination(state):
cli.print_standard('')
cli.pause('Press Enter to return to main menu...')
return
if source_or_destination_changed(state):
cli.print_standard('')
cli.abort()
# Get settings
for name, details in main_menu.toggles.items():
if 'Auto continue' in name and details['Selected']:
auto_continue = True
if 'Retry' in name and details['Selected']:
details['Selected'] = False
state.retry_all_passes()
# Start SMART/Journal
state.ui.add_info_pane(
percent=50,
update_layout=False,
watch_file=f'{state.log_dir}/smart_source.out',
)
if hasattr(state.destination, 'attributes'):
state.ui.add_info_pane(
percent=50,
update_layout=False,
watch_file=f'{state.log_dir}/smart_destination.out',
)
if PLATFORM == 'Linux':
state.ui.add_worker_pane(lines=4, cmd='journal-datarec-monitor')
state.ui.set_current_pane_height(DDRESCUE_OUTPUT_HEIGHT)
# Run pass(es)
for pass_name in ('read-skip', 'read-full', 'trim', 'scrape'):
abort = False
# Skip to next pass
if state.pass_complete(pass_name):
# NOTE: This bypasses auto_continue
state.skip_pass(pass_name)
continue
# Run ddrescue
for pair in state.block_pairs:
if not pair.pass_complete(pass_name):
attempted_recovery = True
state.mark_started()
try:
run_ddrescue(state, pair, pass_name, settings_menu, dry_run=dry_run)
except (FileNotFoundError, KeyboardInterrupt, std.GenericAbort):
is_missing_source_or_destination(state)
abort = True
break
# Continue or return to menu
all_complete = state.pass_complete(pass_name)
all_above_threshold = state.pass_above_threshold(pass_name)
if abort or not (all_complete and all_above_threshold and auto_continue):
LOG.warning('Recovery halted')
break
# Stop SMART/Journal
state.ui.remove_all_info_panes()
state.ui.remove_all_worker_panes()
state.ui.clear_current_pane_height()
# Show warning if nothing was done
if not attempted_recovery:
cli.print_warning('No actions performed')
cli.print_standard(' ')
cli.pause('Press Enter to return to main menu...')
# Done
state.save_debug_reports()
atexit.unregister(state.save_debug_reports)
state.update_progress_pane('Idle')
def source_or_destination_changed(state) -> bool:
"""Verify the source and destination objects are still valid."""
changed = False
# Compare objects
for obj in (state.source, state.destination):
if not obj:
changed = True
elif hasattr(obj, 'exists'):
# Assuming dest path
changed = changed or not obj.exists()
elif isinstance(obj, hw_disk.Disk):
compare_dev = hw_disk.Disk(obj.path)
for key in ('model', 'serial'):
changed = changed or getattr(obj, key) != getattr(compare_dev, key)
# Update top panes
state.update_top_panes()
# Done
if changed:
cli.print_error('Source and/or Destination changed')
return changed
def zero_fill_destination(state: State, dry_run: bool = True) -> None:
"""Zero-fill any gaps and space on destination beyond the source size."""
full_disk_clone = False
larger_destination = False
percent_recovered = state.get_percent_recovered()
if state.mode == 'Clone' and len(state.block_pairs) == 1:
full_disk_clone = True
# Bail early
if not (
(percent_recovered < 100
or (full_disk_clone and state.source.size < state.destination.size))
and cli.ask('Fill gaps with zeros?')):
return
for block_pair in state.block_pairs:
destination_size = block_pair.size
if (full_disk_clone and state.source.size < state.destination.size):
destination_size = state.destination.size
larger_destination = True
# Prep zero-fill map file
zero_map_path = block_pair.map_path.with_stem(
f'{block_pair.map_path.stem}_zero-fill',
)
io.copy_file(block_pair.map_path, zero_map_path, overwrite=True)
if larger_destination:
with open(zero_map_path, 'a', encoding='utf-8') as f:
f.write(
f'{hex(block_pair.size)} '
f'{hex(destination_size - block_pair.size)} ?'
)
# Build cmd
cmd = [
'sudo',
'ddrescue',
'--force',
f'--size={destination_size}',
'--binary-prefixes',
'--complete-only',
'--data-preview=5',
'--odirect',
'--retry-passes=0',
f'--sector-size={block_pair.sector_size}',
'-vvvv',
'/dev/zero',
block_pair.destination,
zero_map_path,
]
# Dry run
if dry_run:
cli.print_standard(f'Zero-fill with command: {cmd}')
return
# Re-run ddrescue to zero-fill gaps
proc = exe.run_program(cmd, check=False, pipe=False)
if proc.returncode:
cli.print_error('ERROR: Failed to zero-fill: {block_pair.destination}')
LOG.error('zero-fill error: %s, %s', proc.stdout, proc.stderr)
if __name__ == '__main__':
print("This file is not meant to be called directly.")