parent
172f00e4e9
commit
f9a6850c1a
5 changed files with 318 additions and 198 deletions
|
|
@ -20,8 +20,10 @@ BADBLOCKS_REGEX = re.compile(
|
|||
)
|
||||
BADBLOCKS_RESULTS_REGEX = re.compile(r'^(.*?)\x08.*\x08(.*)')
|
||||
BADBLOCKS_SKIP_REGEX = re.compile(r'^(Checking|\[)', re.IGNORECASE)
|
||||
CPU_CRITICAL_TEMP = 100
|
||||
CPU_FAILURE_TEMP = 90
|
||||
CPU_TEMP_COOLING_DELTA = 20
|
||||
CPU_TEMP_CRITICAL = 100
|
||||
CPU_TEMP_IDLE_HIGH = 85
|
||||
CPU_TEMP_LOW_THRESHOLD = 50
|
||||
CPU_TEST_MINUTES = 7
|
||||
IO_GRAPH_WIDTH = 40
|
||||
IO_ALT_TEST_SIZE_FACTOR = 0.01
|
||||
|
|
|
|||
|
|
@ -8,7 +8,12 @@ import subprocess
|
|||
from typing import TextIO
|
||||
|
||||
from wk import exe
|
||||
from wk.cfg.hw import CPU_FAILURE_TEMP
|
||||
from wk.cfg.hw import (
|
||||
CPU_TEMP_COOLING_DELTA,
|
||||
CPU_TEMP_CRITICAL,
|
||||
CPU_TEMP_IDLE_HIGH,
|
||||
CPU_TEMP_LOW_THRESHOLD,
|
||||
)
|
||||
from wk.os.mac import set_fans as macos_set_fans
|
||||
from wk.std import PLATFORM
|
||||
from wk.ui import ansi
|
||||
|
|
@ -20,26 +25,57 @@ SysbenchType = tuple[subprocess.Popen, TextIO]
|
|||
|
||||
|
||||
# Functions
|
||||
def check_cooling_results(test_obj, sensors, run_sysbench=False) -> None:
|
||||
"""Check cooling results and update test_obj."""
|
||||
max_temp = sensors.cpu_max_temp()
|
||||
temp_labels = ['Idle', 'Max', 'Cooldown']
|
||||
if run_sysbench:
|
||||
temp_labels.append('Sysbench')
|
||||
def check_cooling_results(sensors, test_object) -> None:
|
||||
"""Check cooling result via sensor data."""
|
||||
idle_temp = sensors.get_cpu_temp('Idle')
|
||||
cooldown_temp = sensors.get_cpu_temp('Cooldown')
|
||||
max_temp = sensors.get_cpu_temp('Max')
|
||||
test_object.report.append(ansi.color_string('Temps', 'BLUE'))
|
||||
|
||||
# Check temps
|
||||
if not max_temp:
|
||||
test_obj.set_status('Unknown')
|
||||
elif max_temp >= CPU_FAILURE_TEMP:
|
||||
test_obj.failed = True
|
||||
test_obj.set_status('Failed')
|
||||
elif 'Aborted' not in test_obj.status:
|
||||
test_obj.passed = True
|
||||
test_obj.set_status('Passed')
|
||||
if max_temp > CPU_TEMP_CRITICAL:
|
||||
test_object.failed = True
|
||||
test_object.set_status('Failed')
|
||||
test_object.report.extend([
|
||||
ansi.color_string(
|
||||
f' WARNING: Critical CPU temp of {CPU_TEMP_CRITICAL} exceeded.',
|
||||
'RED',
|
||||
),
|
||||
'',
|
||||
])
|
||||
elif idle_temp >= CPU_TEMP_IDLE_HIGH:
|
||||
test_object.failed = True
|
||||
test_object.set_status('Failed')
|
||||
test_object.report.extend([
|
||||
ansi.color_string(
|
||||
f' WARNING: Max idle temp of {CPU_TEMP_IDLE_HIGH} exceeded.',
|
||||
'YELLOW',
|
||||
),
|
||||
'',
|
||||
])
|
||||
elif (
|
||||
cooldown_temp <= CPU_TEMP_LOW_THRESHOLD
|
||||
or abs(max_temp - cooldown_temp) >= CPU_TEMP_COOLING_DELTA
|
||||
):
|
||||
test_object.passed = True
|
||||
test_object.set_status('Passed')
|
||||
else:
|
||||
test_object.passed = False
|
||||
test_object.set_status('Unknown')
|
||||
|
||||
# Add temps to report
|
||||
for line in sensors.generate_report(*temp_labels, only_cpu=True):
|
||||
test_obj.report.append(f' {line}')
|
||||
# Build report
|
||||
report_labels = ['Idle']
|
||||
if 'Sysbench' in sensors.temp_labels:
|
||||
report_labels.extend(['Sysbench', 'Cooldown'])
|
||||
if 'Prime95' in sensors.temp_labels:
|
||||
report_labels.append('Prime95')
|
||||
if 'Cooldown' not in report_labels:
|
||||
report_labels.append('Cooldown')
|
||||
if len(sensors.temp_labels.intersection(['Prime95', 'Sysbench'])) < 1:
|
||||
# Include overall max temp if needed
|
||||
report_labels.append('Max')
|
||||
for line in sensors.generate_report(*report_labels, only_cpu=True):
|
||||
test_object.report.append(f' {line}')
|
||||
|
||||
|
||||
def check_mprime_results(test_obj, working_dir) -> None:
|
||||
|
|
@ -112,7 +148,9 @@ def start_mprime(working_dir, log_path) -> subprocess.Popen:
|
|||
stdout=subprocess.PIPE,
|
||||
)
|
||||
proc_mprime.stdout.close() # type: ignore[reportOptionalMemberAccess]
|
||||
save_nbsr = exe.NonBlockingStreamReader(proc_grep.stdout)
|
||||
save_nbsr = exe.NonBlockingStreamReader(
|
||||
proc_grep.stdout, # type: ignore[reportGeneralTypeIssues]
|
||||
)
|
||||
exe.start_thread(
|
||||
save_nbsr.save_to_file,
|
||||
args=(proc_grep, log_path),
|
||||
|
|
@ -122,35 +160,6 @@ def start_mprime(working_dir, log_path) -> subprocess.Popen:
|
|||
return proc_mprime
|
||||
|
||||
|
||||
def start_sysbench(sensors, sensors_out, log_path) -> SysbenchType:
|
||||
"""Start sysbench, returns tuple with Popen object and file handle."""
|
||||
set_apple_fan_speed('max')
|
||||
sysbench_cmd = [
|
||||
'sysbench',
|
||||
f'--threads={exe.psutil.cpu_count()}',
|
||||
'--cpu-max-prime=1000000000',
|
||||
'cpu',
|
||||
'run',
|
||||
]
|
||||
|
||||
# Restart background monitor for Sysbench
|
||||
sensors.stop_background_monitor()
|
||||
sensors.start_background_monitor(
|
||||
sensors_out,
|
||||
alt_max='Sysbench',
|
||||
thermal_action=('killall', 'sysbench', '-INT'),
|
||||
)
|
||||
|
||||
# Start sysbench
|
||||
filehandle_sysbench = open(
|
||||
log_path, 'a', encoding='utf-8',
|
||||
)
|
||||
proc_sysbench = exe.popen_program(sysbench_cmd, stdout=filehandle_sysbench)
|
||||
|
||||
# Done
|
||||
return (proc_sysbench, filehandle_sysbench)
|
||||
|
||||
|
||||
def set_apple_fan_speed(speed) -> None:
|
||||
"""Set Apple fan speed."""
|
||||
cmd = None
|
||||
|
|
@ -174,6 +183,27 @@ def set_apple_fan_speed(speed) -> None:
|
|||
exe.run_program(cmd, check=False)
|
||||
|
||||
|
||||
def start_sysbench(log_path) -> SysbenchType:
|
||||
"""Start sysbench, returns tuple with Popen object and file handle."""
|
||||
set_apple_fan_speed('max')
|
||||
cmd = [
|
||||
'sysbench',
|
||||
f'--threads={exe.psutil.cpu_count()}',
|
||||
'--cpu-max-prime=1000000000',
|
||||
'cpu',
|
||||
'run',
|
||||
]
|
||||
|
||||
# Start sysbench
|
||||
filehandle = open(
|
||||
log_path, 'a', encoding='utf-8',
|
||||
)
|
||||
proc = exe.popen_program(cmd, stdout=filehandle)
|
||||
|
||||
# Done
|
||||
return (proc, filehandle)
|
||||
|
||||
|
||||
def stop_mprime(proc_mprime) -> None:
|
||||
"""Stop mprime gracefully, then forcefully as needed."""
|
||||
proc_mprime.terminate()
|
||||
|
|
|
|||
|
|
@ -45,7 +45,9 @@ LOG = logging.getLogger(__name__)
|
|||
TEST_GROUPS = {
|
||||
# Also used to build the menu options
|
||||
## NOTE: This needs to be above MENU_SETS
|
||||
'CPU & Cooling': 'cpu_stress_tests',
|
||||
'CPU (Sysbench)': 'cpu_test_sysbench',
|
||||
'CPU (Prime95)': 'cpu_test_mprime',
|
||||
'CPU (Cooling)': 'cpu_test_cooling',
|
||||
'Disk Attributes': 'disk_attribute_check',
|
||||
'Disk Self-Test': 'disk_self_test',
|
||||
'Disk Surface Scan': 'disk_surface_scan',
|
||||
|
|
@ -65,6 +67,7 @@ MENU_ACTIONS_SECRET = (
|
|||
MENU_OPTIONS_QUICK = ('Disk Attributes',)
|
||||
MENU_SETS = {
|
||||
'Full Diagnostic': (*TEST_GROUPS,),
|
||||
'CPU Diagnostic': (*[group for group in TEST_GROUPS if group.startswith('CPU')],),
|
||||
'Disk Diagnostic': (
|
||||
'Disk Attributes',
|
||||
'Disk Self-Test',
|
||||
|
|
@ -85,6 +88,7 @@ class State():
|
|||
self.disks: list[hw_disk.Disk] = []
|
||||
self.log_dir: pathlib.Path | None = None
|
||||
self.progress_file: pathlib.Path | None = None
|
||||
self.sensors: hw_sensors.Sensors = hw_sensors.Sensors()
|
||||
self.system: hw_system.System | None = None
|
||||
self.test_groups: list[TestGroup] = []
|
||||
self.title_text: str = ansi.color_string('Hardware Diagnostics', 'GREEN')
|
||||
|
|
@ -123,6 +127,7 @@ class State():
|
|||
# Reset objects
|
||||
self.disks.clear()
|
||||
self.test_groups.clear()
|
||||
self.sensors = hw_sensors.Sensors()
|
||||
|
||||
# Set log
|
||||
self.log_dir = log.format_log_path()
|
||||
|
|
@ -158,21 +163,9 @@ class State():
|
|||
continue
|
||||
|
||||
if 'CPU' in name:
|
||||
# Create two Test objects which will both be used by cpu_stress_tests
|
||||
# NOTE: Prime95 should be added first
|
||||
self.system.tests.append(
|
||||
Test(dev=self.system, label='Prime95', name=name),
|
||||
)
|
||||
self.system.tests.append(
|
||||
Test(dev=self.system, label='Cooling', name=name),
|
||||
)
|
||||
self.test_groups.append(
|
||||
TestGroup(
|
||||
name=name,
|
||||
function=globals()[TEST_GROUPS[name]],
|
||||
test_objects=self.system.tests,
|
||||
),
|
||||
)
|
||||
Test(dev=self.system, label=name[5:-1], name=name),
|
||||
)
|
||||
|
||||
if 'Disk' in name:
|
||||
test_group = TestGroup(
|
||||
|
|
@ -184,6 +177,17 @@ class State():
|
|||
test_group.test_objects.append(test_obj)
|
||||
self.test_groups.append(test_group)
|
||||
|
||||
# Group CPU tests
|
||||
if self.system.tests:
|
||||
self.test_groups.insert(
|
||||
0,
|
||||
TestGroup(
|
||||
name='CPU & Cooling',
|
||||
function=run_cpu_tests,
|
||||
test_objects=self.system.tests,
|
||||
),
|
||||
)
|
||||
|
||||
def reset_layout(self) -> None:
|
||||
"""Reset layout to avoid flickering."""
|
||||
self.ui.clear_current_pane_height()
|
||||
|
|
@ -312,135 +316,200 @@ def build_menu(cli_mode=False, quick_mode=False) -> cli.Menu:
|
|||
return menu
|
||||
|
||||
|
||||
def cpu_stress_tests(state, test_objects, test_mode=False) -> None:
|
||||
"""CPU & cooling check using Prime95 and Sysbench."""
|
||||
LOG.info('CPU Test (Prime95)')
|
||||
aborted = False
|
||||
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
|
||||
run_sysbench = False
|
||||
def cpu_tests_init(state) -> None:
|
||||
"""Initialize CPU tests."""
|
||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||
test_minutes = cfg.hw.CPU_TEST_MINUTES
|
||||
if test_mode:
|
||||
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
|
||||
test_mprime_obj, test_cooling_obj = test_objects
|
||||
state.update_title_text(state.system.cpu_description)
|
||||
|
||||
# Bail early
|
||||
if test_cooling_obj.disabled or test_mprime_obj.disabled:
|
||||
return
|
||||
|
||||
# Prep
|
||||
state.update_title_text(test_mprime_obj.dev.cpu_description)
|
||||
test_cooling_obj.set_status('Working')
|
||||
test_mprime_obj.set_status('Working')
|
||||
|
||||
# Start sensors monitor
|
||||
sensors = hw_sensors.Sensors()
|
||||
sensors.start_background_monitor(
|
||||
sensors_out,
|
||||
thermal_action=('killall', 'mprime', '-INT'),
|
||||
)
|
||||
|
||||
# Create monitor and worker panes
|
||||
state.update_progress_file()
|
||||
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=prime_log)
|
||||
# Start monitor
|
||||
if PLATFORM == 'Darwin':
|
||||
state.ui.add_info_pane(
|
||||
percent=80, cmd='./hw-sensors', update_layout=False,
|
||||
)
|
||||
elif PLATFORM == 'Linux':
|
||||
state.ui.add_info_pane(
|
||||
percent=80, watch_file=sensors_out, update_layout=False,
|
||||
percent=80,
|
||||
watch_file=pathlib.Path(f'{state.log_dir}/sensors.out'),
|
||||
update_layout=False,
|
||||
)
|
||||
state.sensors.start_background_monitor(sensors_out)
|
||||
state.ui.set_current_pane_height(3)
|
||||
|
||||
# Get idle temps
|
||||
# Save idle temps
|
||||
cli.print_standard('Saving idle temps...')
|
||||
sensors.save_average_temps(temp_label='Idle', seconds=5)
|
||||
state.sensors.save_average_temps(temp_label='Idle', seconds=5)
|
||||
|
||||
# Stress CPU
|
||||
cli.print_info('Running stress test')
|
||||
hw_cpu.set_apple_fan_speed('max')
|
||||
proc_mprime = hw_cpu.start_mprime(state.log_dir, prime_log)
|
||||
|
||||
# Show countdown
|
||||
print('')
|
||||
try:
|
||||
print_countdown(proc=proc_mprime, seconds=test_minutes*60)
|
||||
except KeyboardInterrupt:
|
||||
aborted = True
|
||||
|
||||
# Stop Prime95
|
||||
hw_cpu.stop_mprime(proc_mprime)
|
||||
|
||||
# Update progress if necessary
|
||||
if sensors.cpu_reached_critical_temp() or aborted:
|
||||
test_cooling_obj.set_status('Aborted')
|
||||
test_mprime_obj.set_status('Aborted')
|
||||
state.update_progress_file()
|
||||
|
||||
# Get cooldown temp
|
||||
state.ui.clear_current_pane()
|
||||
cli.print_standard('Letting CPU cooldown...')
|
||||
std.sleep(5)
|
||||
cli.print_standard('Saving cooldown temps...')
|
||||
sensors.save_average_temps(temp_label='Cooldown', seconds=5)
|
||||
|
||||
# Check Prime95 results
|
||||
test_mprime_obj.report.append(ansi.color_string('Prime95', 'BLUE'))
|
||||
hw_cpu.check_mprime_results(
|
||||
test_obj=test_mprime_obj, working_dir=state.log_dir,
|
||||
)
|
||||
|
||||
# Run Sysbench test if necessary
|
||||
run_sysbench = (
|
||||
not aborted and sensors.cpu_max_temp() >= cfg.hw.CPU_FAILURE_TEMP
|
||||
)
|
||||
if run_sysbench:
|
||||
LOG.info('CPU Test (Sysbench)')
|
||||
cli.print_standard('Letting CPU cooldown more...')
|
||||
std.sleep(10)
|
||||
state.ui.clear_current_pane()
|
||||
cli.print_info('Running alternate stress test')
|
||||
print('')
|
||||
sysbench_log = prime_log.with_name('sysbench.log')
|
||||
sysbench_log.touch()
|
||||
state.ui.remove_all_worker_panes()
|
||||
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=sysbench_log)
|
||||
proc_sysbench, filehandle_sysbench = hw_cpu.start_sysbench(
|
||||
sensors,
|
||||
sensors_out,
|
||||
log_path=sysbench_log,
|
||||
)
|
||||
try:
|
||||
print_countdown(proc=proc_sysbench, seconds=test_minutes*60)
|
||||
except AttributeError:
|
||||
# Assuming the sysbench process wasn't found and proc was set to None
|
||||
LOG.error('Failed to find sysbench process', exc_info=True)
|
||||
except KeyboardInterrupt:
|
||||
aborted = True
|
||||
hw_cpu.stop_sysbench(proc_sysbench, filehandle_sysbench)
|
||||
|
||||
# Update progress
|
||||
# NOTE: CPU critical temp check isn't really necessary
|
||||
# Hard to imagine it wasn't hit during Prime95 but was in sysbench
|
||||
if sensors.cpu_reached_critical_temp() or aborted:
|
||||
test_cooling_obj.set_status('Aborted')
|
||||
test_mprime_obj.set_status('Aborted')
|
||||
state.update_progress_file()
|
||||
|
||||
# Check Cooling results
|
||||
test_cooling_obj.report.append(ansi.color_string('Temps', 'BLUE'))
|
||||
hw_cpu.check_cooling_results(test_cooling_obj, sensors, run_sysbench)
|
||||
|
||||
def cpu_tests_end(state) -> None:
|
||||
"""End CPU tests."""
|
||||
# Cleanup
|
||||
state.update_progress_file()
|
||||
sensors.stop_background_monitor()
|
||||
state.sensors.stop_background_monitor()
|
||||
state.ui.clear_current_pane_height()
|
||||
state.ui.remove_all_info_panes()
|
||||
state.ui.remove_all_worker_panes()
|
||||
|
||||
|
||||
def cpu_test_cooling(state, test_object, test_mode=False) -> None:
|
||||
"""CPU cooling test via sensor data assessment."""
|
||||
LOG.info('CPU Test (Cooling)')
|
||||
|
||||
# Bail early
|
||||
if test_object.disabled:
|
||||
return
|
||||
|
||||
hw_cpu.check_cooling_results(state.sensors, test_object)
|
||||
state.update_progress_file()
|
||||
|
||||
|
||||
def cpu_test_mprime(state, test_object, test_mode=False) -> None:
|
||||
"""CPU stress test using mprime."""
|
||||
LOG.info('CPU Test (Prime95)')
|
||||
aborted = False
|
||||
log_path = pathlib.Path(f'{state.log_dir}/prime.log')
|
||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||
test_minutes = cfg.hw.CPU_TEST_MINUTES
|
||||
if test_mode:
|
||||
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
|
||||
|
||||
# Bail early
|
||||
if test_object.disabled:
|
||||
return
|
||||
if state.sensors.cpu_reached_critical_temp():
|
||||
test_object.set_status('Denied')
|
||||
test_object.disabled = True
|
||||
return
|
||||
|
||||
# Prep
|
||||
test_object.set_status('Working')
|
||||
state.update_progress_file()
|
||||
state.ui.clear_current_pane()
|
||||
cli.print_info('Running stress test')
|
||||
print('')
|
||||
|
||||
# Start sensors monitor
|
||||
state.sensors.stop_background_monitor()
|
||||
state.sensors.start_background_monitor(
|
||||
sensors_out,
|
||||
alt_max='Prime95',
|
||||
thermal_action=('killall', '-INT', 'mprime'),
|
||||
)
|
||||
|
||||
# Run Prime95
|
||||
hw_cpu.set_apple_fan_speed('max')
|
||||
proc = hw_cpu.start_mprime(state.log_dir, log_path)
|
||||
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=log_path)
|
||||
try:
|
||||
print_countdown(proc=proc, seconds=test_minutes*60)
|
||||
except KeyboardInterrupt:
|
||||
aborted = True
|
||||
|
||||
# Stop Prime95
|
||||
hw_cpu.stop_mprime(proc)
|
||||
|
||||
# Get cooldown temp
|
||||
if 'Cooldown' in state.sensors.temp_labels:
|
||||
# Give Prime95 time to save the results
|
||||
std.sleep(1)
|
||||
else:
|
||||
# Save cooldown temp
|
||||
state.ui.clear_current_pane()
|
||||
cli.print_standard('Letting CPU cooldown...')
|
||||
std.sleep(5)
|
||||
cli.print_standard('Saving cooldown temps...')
|
||||
state.sensors.save_average_temps(temp_label='Cooldown', seconds=5)
|
||||
|
||||
# Check Prime95 results
|
||||
test_object.report.append(ansi.color_string('Prime95', 'BLUE'))
|
||||
hw_cpu.check_mprime_results(test_obj=test_object, working_dir=state.log_dir)
|
||||
|
||||
# Update progress
|
||||
if state.sensors.cpu_reached_critical_temp() or aborted:
|
||||
test_object.set_status('Aborted')
|
||||
state.update_progress_file()
|
||||
|
||||
# Done
|
||||
state.ui.remove_all_worker_panes()
|
||||
if aborted:
|
||||
cpu_tests_end(state)
|
||||
raise std.GenericAbort('Aborted')
|
||||
|
||||
|
||||
def cpu_test_sysbench(state, test_object, test_mode=False) -> None:
|
||||
"""CPU stress test using Sysbench."""
|
||||
LOG.info('CPU Test (Sysbench)')
|
||||
aborted = False
|
||||
log_path = pathlib.Path(f'{state.log_dir}/sysbench.log')
|
||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||
test_minutes = cfg.hw.CPU_TEST_MINUTES
|
||||
if test_mode:
|
||||
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
|
||||
|
||||
# Bail early
|
||||
if test_object.disabled:
|
||||
return
|
||||
|
||||
# Prep
|
||||
test_object.set_status('Working')
|
||||
state.update_progress_file()
|
||||
state.ui.clear_current_pane()
|
||||
cli.print_info('Running stress test')
|
||||
print('')
|
||||
|
||||
# Start sensors monitor
|
||||
state.sensors.stop_background_monitor()
|
||||
state.sensors.start_background_monitor(
|
||||
sensors_out,
|
||||
alt_max='Sysbench',
|
||||
thermal_action=('killall', '-INT', 'sysbench'),
|
||||
)
|
||||
|
||||
# Run sysbench
|
||||
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=log_path)
|
||||
proc, filehandle = hw_cpu.start_sysbench(log_path=log_path)
|
||||
try:
|
||||
print_countdown(proc=proc, seconds=test_minutes*60)
|
||||
except AttributeError:
|
||||
# Assuming the sysbench process wasn't found and proc was set to None
|
||||
LOG.error('Failed to find sysbench process', exc_info=True)
|
||||
except KeyboardInterrupt:
|
||||
aborted = True
|
||||
hw_cpu.stop_sysbench(proc, filehandle)
|
||||
|
||||
# Get cooldown temp
|
||||
if 'Cooldown' not in state.sensors.temp_labels:
|
||||
state.ui.clear_current_pane()
|
||||
cli.print_standard('Letting CPU cooldown...')
|
||||
std.sleep(5)
|
||||
cli.print_standard('Saving cooldown temps...')
|
||||
state.sensors.save_average_temps(temp_label='Cooldown', seconds=5)
|
||||
|
||||
# Update progress
|
||||
test_object.report.append(ansi.color_string('Sysbench', 'BLUE'))
|
||||
if aborted:
|
||||
test_object.set_status('Aborted')
|
||||
test_object.report.append(ansi.color_string(' Aborted.', 'YELLOW'))
|
||||
state.update_progress_file()
|
||||
elif state.sensors.cpu_reached_critical_temp():
|
||||
test_object.set_status('Aborted')
|
||||
test_object.report.append(
|
||||
ansi.color_string(' Aborted due to temps.', 'YELLOW'),
|
||||
)
|
||||
elif proc.returncode not in (-15, -2, 0):
|
||||
# NOTE: Return codes:
|
||||
# 0 == Completed w/out issue
|
||||
# -2 == Stopped with INT signal
|
||||
# -15 == Stopped with TERM signal
|
||||
test_object.set_status('Failed')
|
||||
test_object.report.append(f' Failed with return code: {proc.returncode}')
|
||||
else:
|
||||
test_object.set_status('Passed')
|
||||
test_object.report.append(' Completed without issue.')
|
||||
state.update_progress_file()
|
||||
|
||||
# Done
|
||||
state.ui.remove_all_worker_panes()
|
||||
if aborted:
|
||||
cpu_tests_end(state)
|
||||
raise std.GenericAbort('Aborted')
|
||||
|
||||
|
||||
|
|
@ -765,6 +834,16 @@ def print_countdown(proc, seconds) -> None:
|
|||
# Done
|
||||
print('')
|
||||
|
||||
def run_cpu_tests(state, test_objects, test_mode=False) -> None:
|
||||
"""Run selected CPU test(s)."""
|
||||
state.update_progress_file()
|
||||
cpu_tests_init(state)
|
||||
for obj in test_objects:
|
||||
func = globals()[TEST_GROUPS[obj.name]]
|
||||
func(state, obj, test_mode=test_mode)
|
||||
cpu_tests_end(state)
|
||||
state.update_progress_file()
|
||||
|
||||
|
||||
def run_diags(state, menu, quick_mode=False, test_mode=False) -> None:
|
||||
"""Run selected diagnostics."""
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from subprocess import CalledProcessError
|
|||
from threading import Thread
|
||||
from typing import Any
|
||||
|
||||
from wk.cfg.hw import CPU_CRITICAL_TEMP, SMC_IDS, TEMP_COLORS
|
||||
from wk.cfg.hw import CPU_TEMP_CRITICAL, SMC_IDS, TEMP_COLORS
|
||||
from wk.exe import run_program, start_thread
|
||||
from wk.io import non_clobber_path
|
||||
from wk.std import PLATFORM, sleep
|
||||
|
|
@ -41,6 +41,7 @@ class Sensors():
|
|||
self.background_thread: Thread | None = None
|
||||
self.data: dict[Any, Any] = get_sensor_data()
|
||||
self.out_path: pathlib.Path | str | None = None
|
||||
self.temp_labels: set = set(['Current', 'Max'])
|
||||
|
||||
def clear_temps(self) -> None:
|
||||
"""Clear saved temps but keep structure"""
|
||||
|
|
@ -49,26 +50,8 @@ class Sensors():
|
|||
for source_data in sources.values():
|
||||
source_data['Temps'] = []
|
||||
|
||||
def cpu_max_temp(self) -> float:
|
||||
"""Get max temp from any CPU source, returns float.
|
||||
|
||||
NOTE: If no temps are found this returns zero.
|
||||
"""
|
||||
max_temp = 0.0
|
||||
|
||||
# Check all CPU Temps
|
||||
for section, adapters in self.data.items():
|
||||
if not section.startswith('CPU'):
|
||||
continue
|
||||
for sources in adapters.values():
|
||||
for source_data in sources.values():
|
||||
max_temp = max(max_temp, source_data.get('Max', 0))
|
||||
|
||||
# Done
|
||||
return max_temp
|
||||
|
||||
def cpu_reached_critical_temp(self) -> bool:
|
||||
"""Check if CPU reached CPU_CRITICAL_TEMP, returns bool."""
|
||||
"""Check if CPU reached CPU_TEMP_CRITICAL, returns bool."""
|
||||
for section, adapters in self.data.items():
|
||||
if not section.startswith('CPU'):
|
||||
# Limit to CPU temps
|
||||
|
|
@ -77,7 +60,7 @@ class Sensors():
|
|||
# Ugly section
|
||||
for sources in adapters.values():
|
||||
for source_data in sources.values():
|
||||
if source_data.get('Max', -1) >= CPU_CRITICAL_TEMP:
|
||||
if source_data.get('Max', -1) > CPU_TEMP_CRITICAL:
|
||||
return True
|
||||
|
||||
# Didn't return above so temps are within the threshold
|
||||
|
|
@ -119,6 +102,25 @@ class Sensors():
|
|||
# Done
|
||||
return report
|
||||
|
||||
def get_cpu_temp(self, label) -> float:
|
||||
"""Get temp for label from any CPU source, returns float.
|
||||
|
||||
NOTE: This returns the highest value for the label.
|
||||
NOTE 2: If no temps are found this returns zero.
|
||||
"""
|
||||
max_temp = 0.0
|
||||
|
||||
# Check all CPU Temps
|
||||
for section, adapters in self.data.items():
|
||||
if not section.startswith('CPU'):
|
||||
continue
|
||||
for sources in adapters.values():
|
||||
for source_data in sources.values():
|
||||
max_temp = max(max_temp, source_data.get(label, 0))
|
||||
|
||||
# Done
|
||||
return float(max_temp)
|
||||
|
||||
def monitor_to_file(
|
||||
self, out_path, alt_max=None,
|
||||
exit_on_thermal_limit=True, temp_labels=None,
|
||||
|
|
@ -136,6 +138,7 @@ class Sensors():
|
|||
temp_labels = ['Current', 'Max']
|
||||
if alt_max:
|
||||
temp_labels.append(alt_max)
|
||||
self.temp_labels.add(alt_max)
|
||||
|
||||
# Start loop
|
||||
while True:
|
||||
|
|
@ -158,6 +161,7 @@ class Sensors():
|
|||
def save_average_temps(self, temp_label, seconds=10) -> None:
|
||||
"""Save average temps under temp_label over provided seconds.."""
|
||||
self.clear_temps()
|
||||
self.temp_labels.add(temp_label)
|
||||
|
||||
# Get temps
|
||||
for _ in range(seconds):
|
||||
|
|
@ -200,6 +204,10 @@ class Sensors():
|
|||
|
||||
def stop_background_monitor(self) -> None:
|
||||
"""Stop background thread."""
|
||||
# Bail early
|
||||
if self.background_thread is None:
|
||||
return
|
||||
|
||||
self.out_path.with_suffix('.stop').touch()
|
||||
self.background_thread.join()
|
||||
|
||||
|
|
@ -210,6 +218,8 @@ class Sensors():
|
|||
def update_sensor_data(
|
||||
self, alt_max=None, exit_on_thermal_limit=True) -> None:
|
||||
"""Update sensor data via OS-specific means."""
|
||||
if alt_max:
|
||||
self.temp_labels.add(alt_max)
|
||||
if PLATFORM == 'Darwin':
|
||||
self.update_sensor_data_macos(alt_max, exit_on_thermal_limit)
|
||||
elif PLATFORM == 'Linux':
|
||||
|
|
@ -236,7 +246,7 @@ class Sensors():
|
|||
|
||||
# Raise exception if thermal limit reached
|
||||
if exit_on_thermal_limit and section == 'CPUTemps':
|
||||
if source_data['Current'] >= CPU_CRITICAL_TEMP:
|
||||
if source_data['Current'] > CPU_TEMP_CRITICAL:
|
||||
raise ThermalLimitReachedError('CPU temps reached limit')
|
||||
|
||||
def update_sensor_data_macos(
|
||||
|
|
@ -263,7 +273,7 @@ class Sensors():
|
|||
|
||||
# Raise exception if thermal limit reached
|
||||
if exit_on_thermal_limit and section == 'CPUTemps':
|
||||
if source_data['Current'] >= CPU_CRITICAL_TEMP:
|
||||
if source_data['Current'] > CPU_TEMP_CRITICAL:
|
||||
raise ThermalLimitReachedError('CPU temps reached limit')
|
||||
|
||||
|
||||
|
|
@ -420,7 +430,7 @@ def get_sensor_data_macos() -> dict[Any, Any]:
|
|||
|
||||
def get_temp_str(temp, colored=True) -> str:
|
||||
"""Get colored string based on temp, returns str."""
|
||||
temp_color = None
|
||||
temp_color = ''
|
||||
|
||||
# Safety check
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ class Test:
|
|||
name: str
|
||||
disabled: bool = field(init=False, default=False)
|
||||
failed: bool = field(init=False, default=False)
|
||||
hidden: bool = False
|
||||
passed: bool = field(init=False, default=False)
|
||||
report: list[str] = field(init=False, default_factory=list)
|
||||
status: str = field(init=False, default='Pending')
|
||||
|
|
|
|||
Loading…
Reference in a new issue