parent
172f00e4e9
commit
f9a6850c1a
5 changed files with 318 additions and 198 deletions
|
|
@ -20,8 +20,10 @@ BADBLOCKS_REGEX = re.compile(
|
||||||
)
|
)
|
||||||
BADBLOCKS_RESULTS_REGEX = re.compile(r'^(.*?)\x08.*\x08(.*)')
|
BADBLOCKS_RESULTS_REGEX = re.compile(r'^(.*?)\x08.*\x08(.*)')
|
||||||
BADBLOCKS_SKIP_REGEX = re.compile(r'^(Checking|\[)', re.IGNORECASE)
|
BADBLOCKS_SKIP_REGEX = re.compile(r'^(Checking|\[)', re.IGNORECASE)
|
||||||
CPU_CRITICAL_TEMP = 100
|
CPU_TEMP_COOLING_DELTA = 20
|
||||||
CPU_FAILURE_TEMP = 90
|
CPU_TEMP_CRITICAL = 100
|
||||||
|
CPU_TEMP_IDLE_HIGH = 85
|
||||||
|
CPU_TEMP_LOW_THRESHOLD = 50
|
||||||
CPU_TEST_MINUTES = 7
|
CPU_TEST_MINUTES = 7
|
||||||
IO_GRAPH_WIDTH = 40
|
IO_GRAPH_WIDTH = 40
|
||||||
IO_ALT_TEST_SIZE_FACTOR = 0.01
|
IO_ALT_TEST_SIZE_FACTOR = 0.01
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,12 @@ import subprocess
|
||||||
from typing import TextIO
|
from typing import TextIO
|
||||||
|
|
||||||
from wk import exe
|
from wk import exe
|
||||||
from wk.cfg.hw import CPU_FAILURE_TEMP
|
from wk.cfg.hw import (
|
||||||
|
CPU_TEMP_COOLING_DELTA,
|
||||||
|
CPU_TEMP_CRITICAL,
|
||||||
|
CPU_TEMP_IDLE_HIGH,
|
||||||
|
CPU_TEMP_LOW_THRESHOLD,
|
||||||
|
)
|
||||||
from wk.os.mac import set_fans as macos_set_fans
|
from wk.os.mac import set_fans as macos_set_fans
|
||||||
from wk.std import PLATFORM
|
from wk.std import PLATFORM
|
||||||
from wk.ui import ansi
|
from wk.ui import ansi
|
||||||
|
|
@ -20,26 +25,57 @@ SysbenchType = tuple[subprocess.Popen, TextIO]
|
||||||
|
|
||||||
|
|
||||||
# Functions
|
# Functions
|
||||||
def check_cooling_results(test_obj, sensors, run_sysbench=False) -> None:
|
def check_cooling_results(sensors, test_object) -> None:
|
||||||
"""Check cooling results and update test_obj."""
|
"""Check cooling result via sensor data."""
|
||||||
max_temp = sensors.cpu_max_temp()
|
idle_temp = sensors.get_cpu_temp('Idle')
|
||||||
temp_labels = ['Idle', 'Max', 'Cooldown']
|
cooldown_temp = sensors.get_cpu_temp('Cooldown')
|
||||||
if run_sysbench:
|
max_temp = sensors.get_cpu_temp('Max')
|
||||||
temp_labels.append('Sysbench')
|
test_object.report.append(ansi.color_string('Temps', 'BLUE'))
|
||||||
|
|
||||||
# Check temps
|
# Check temps
|
||||||
if not max_temp:
|
if max_temp > CPU_TEMP_CRITICAL:
|
||||||
test_obj.set_status('Unknown')
|
test_object.failed = True
|
||||||
elif max_temp >= CPU_FAILURE_TEMP:
|
test_object.set_status('Failed')
|
||||||
test_obj.failed = True
|
test_object.report.extend([
|
||||||
test_obj.set_status('Failed')
|
ansi.color_string(
|
||||||
elif 'Aborted' not in test_obj.status:
|
f' WARNING: Critical CPU temp of {CPU_TEMP_CRITICAL} exceeded.',
|
||||||
test_obj.passed = True
|
'RED',
|
||||||
test_obj.set_status('Passed')
|
),
|
||||||
|
'',
|
||||||
|
])
|
||||||
|
elif idle_temp >= CPU_TEMP_IDLE_HIGH:
|
||||||
|
test_object.failed = True
|
||||||
|
test_object.set_status('Failed')
|
||||||
|
test_object.report.extend([
|
||||||
|
ansi.color_string(
|
||||||
|
f' WARNING: Max idle temp of {CPU_TEMP_IDLE_HIGH} exceeded.',
|
||||||
|
'YELLOW',
|
||||||
|
),
|
||||||
|
'',
|
||||||
|
])
|
||||||
|
elif (
|
||||||
|
cooldown_temp <= CPU_TEMP_LOW_THRESHOLD
|
||||||
|
or abs(max_temp - cooldown_temp) >= CPU_TEMP_COOLING_DELTA
|
||||||
|
):
|
||||||
|
test_object.passed = True
|
||||||
|
test_object.set_status('Passed')
|
||||||
|
else:
|
||||||
|
test_object.passed = False
|
||||||
|
test_object.set_status('Unknown')
|
||||||
|
|
||||||
# Add temps to report
|
# Build report
|
||||||
for line in sensors.generate_report(*temp_labels, only_cpu=True):
|
report_labels = ['Idle']
|
||||||
test_obj.report.append(f' {line}')
|
if 'Sysbench' in sensors.temp_labels:
|
||||||
|
report_labels.extend(['Sysbench', 'Cooldown'])
|
||||||
|
if 'Prime95' in sensors.temp_labels:
|
||||||
|
report_labels.append('Prime95')
|
||||||
|
if 'Cooldown' not in report_labels:
|
||||||
|
report_labels.append('Cooldown')
|
||||||
|
if len(sensors.temp_labels.intersection(['Prime95', 'Sysbench'])) < 1:
|
||||||
|
# Include overall max temp if needed
|
||||||
|
report_labels.append('Max')
|
||||||
|
for line in sensors.generate_report(*report_labels, only_cpu=True):
|
||||||
|
test_object.report.append(f' {line}')
|
||||||
|
|
||||||
|
|
||||||
def check_mprime_results(test_obj, working_dir) -> None:
|
def check_mprime_results(test_obj, working_dir) -> None:
|
||||||
|
|
@ -112,7 +148,9 @@ def start_mprime(working_dir, log_path) -> subprocess.Popen:
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
)
|
)
|
||||||
proc_mprime.stdout.close() # type: ignore[reportOptionalMemberAccess]
|
proc_mprime.stdout.close() # type: ignore[reportOptionalMemberAccess]
|
||||||
save_nbsr = exe.NonBlockingStreamReader(proc_grep.stdout)
|
save_nbsr = exe.NonBlockingStreamReader(
|
||||||
|
proc_grep.stdout, # type: ignore[reportGeneralTypeIssues]
|
||||||
|
)
|
||||||
exe.start_thread(
|
exe.start_thread(
|
||||||
save_nbsr.save_to_file,
|
save_nbsr.save_to_file,
|
||||||
args=(proc_grep, log_path),
|
args=(proc_grep, log_path),
|
||||||
|
|
@ -122,35 +160,6 @@ def start_mprime(working_dir, log_path) -> subprocess.Popen:
|
||||||
return proc_mprime
|
return proc_mprime
|
||||||
|
|
||||||
|
|
||||||
def start_sysbench(sensors, sensors_out, log_path) -> SysbenchType:
|
|
||||||
"""Start sysbench, returns tuple with Popen object and file handle."""
|
|
||||||
set_apple_fan_speed('max')
|
|
||||||
sysbench_cmd = [
|
|
||||||
'sysbench',
|
|
||||||
f'--threads={exe.psutil.cpu_count()}',
|
|
||||||
'--cpu-max-prime=1000000000',
|
|
||||||
'cpu',
|
|
||||||
'run',
|
|
||||||
]
|
|
||||||
|
|
||||||
# Restart background monitor for Sysbench
|
|
||||||
sensors.stop_background_monitor()
|
|
||||||
sensors.start_background_monitor(
|
|
||||||
sensors_out,
|
|
||||||
alt_max='Sysbench',
|
|
||||||
thermal_action=('killall', 'sysbench', '-INT'),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Start sysbench
|
|
||||||
filehandle_sysbench = open(
|
|
||||||
log_path, 'a', encoding='utf-8',
|
|
||||||
)
|
|
||||||
proc_sysbench = exe.popen_program(sysbench_cmd, stdout=filehandle_sysbench)
|
|
||||||
|
|
||||||
# Done
|
|
||||||
return (proc_sysbench, filehandle_sysbench)
|
|
||||||
|
|
||||||
|
|
||||||
def set_apple_fan_speed(speed) -> None:
|
def set_apple_fan_speed(speed) -> None:
|
||||||
"""Set Apple fan speed."""
|
"""Set Apple fan speed."""
|
||||||
cmd = None
|
cmd = None
|
||||||
|
|
@ -174,6 +183,27 @@ def set_apple_fan_speed(speed) -> None:
|
||||||
exe.run_program(cmd, check=False)
|
exe.run_program(cmd, check=False)
|
||||||
|
|
||||||
|
|
||||||
|
def start_sysbench(log_path) -> SysbenchType:
|
||||||
|
"""Start sysbench, returns tuple with Popen object and file handle."""
|
||||||
|
set_apple_fan_speed('max')
|
||||||
|
cmd = [
|
||||||
|
'sysbench',
|
||||||
|
f'--threads={exe.psutil.cpu_count()}',
|
||||||
|
'--cpu-max-prime=1000000000',
|
||||||
|
'cpu',
|
||||||
|
'run',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Start sysbench
|
||||||
|
filehandle = open(
|
||||||
|
log_path, 'a', encoding='utf-8',
|
||||||
|
)
|
||||||
|
proc = exe.popen_program(cmd, stdout=filehandle)
|
||||||
|
|
||||||
|
# Done
|
||||||
|
return (proc, filehandle)
|
||||||
|
|
||||||
|
|
||||||
def stop_mprime(proc_mprime) -> None:
|
def stop_mprime(proc_mprime) -> None:
|
||||||
"""Stop mprime gracefully, then forcefully as needed."""
|
"""Stop mprime gracefully, then forcefully as needed."""
|
||||||
proc_mprime.terminate()
|
proc_mprime.terminate()
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,9 @@ LOG = logging.getLogger(__name__)
|
||||||
TEST_GROUPS = {
|
TEST_GROUPS = {
|
||||||
# Also used to build the menu options
|
# Also used to build the menu options
|
||||||
## NOTE: This needs to be above MENU_SETS
|
## NOTE: This needs to be above MENU_SETS
|
||||||
'CPU & Cooling': 'cpu_stress_tests',
|
'CPU (Sysbench)': 'cpu_test_sysbench',
|
||||||
|
'CPU (Prime95)': 'cpu_test_mprime',
|
||||||
|
'CPU (Cooling)': 'cpu_test_cooling',
|
||||||
'Disk Attributes': 'disk_attribute_check',
|
'Disk Attributes': 'disk_attribute_check',
|
||||||
'Disk Self-Test': 'disk_self_test',
|
'Disk Self-Test': 'disk_self_test',
|
||||||
'Disk Surface Scan': 'disk_surface_scan',
|
'Disk Surface Scan': 'disk_surface_scan',
|
||||||
|
|
@ -65,6 +67,7 @@ MENU_ACTIONS_SECRET = (
|
||||||
MENU_OPTIONS_QUICK = ('Disk Attributes',)
|
MENU_OPTIONS_QUICK = ('Disk Attributes',)
|
||||||
MENU_SETS = {
|
MENU_SETS = {
|
||||||
'Full Diagnostic': (*TEST_GROUPS,),
|
'Full Diagnostic': (*TEST_GROUPS,),
|
||||||
|
'CPU Diagnostic': (*[group for group in TEST_GROUPS if group.startswith('CPU')],),
|
||||||
'Disk Diagnostic': (
|
'Disk Diagnostic': (
|
||||||
'Disk Attributes',
|
'Disk Attributes',
|
||||||
'Disk Self-Test',
|
'Disk Self-Test',
|
||||||
|
|
@ -85,6 +88,7 @@ class State():
|
||||||
self.disks: list[hw_disk.Disk] = []
|
self.disks: list[hw_disk.Disk] = []
|
||||||
self.log_dir: pathlib.Path | None = None
|
self.log_dir: pathlib.Path | None = None
|
||||||
self.progress_file: pathlib.Path | None = None
|
self.progress_file: pathlib.Path | None = None
|
||||||
|
self.sensors: hw_sensors.Sensors = hw_sensors.Sensors()
|
||||||
self.system: hw_system.System | None = None
|
self.system: hw_system.System | None = None
|
||||||
self.test_groups: list[TestGroup] = []
|
self.test_groups: list[TestGroup] = []
|
||||||
self.title_text: str = ansi.color_string('Hardware Diagnostics', 'GREEN')
|
self.title_text: str = ansi.color_string('Hardware Diagnostics', 'GREEN')
|
||||||
|
|
@ -123,6 +127,7 @@ class State():
|
||||||
# Reset objects
|
# Reset objects
|
||||||
self.disks.clear()
|
self.disks.clear()
|
||||||
self.test_groups.clear()
|
self.test_groups.clear()
|
||||||
|
self.sensors = hw_sensors.Sensors()
|
||||||
|
|
||||||
# Set log
|
# Set log
|
||||||
self.log_dir = log.format_log_path()
|
self.log_dir = log.format_log_path()
|
||||||
|
|
@ -158,21 +163,9 @@ class State():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if 'CPU' in name:
|
if 'CPU' in name:
|
||||||
# Create two Test objects which will both be used by cpu_stress_tests
|
|
||||||
# NOTE: Prime95 should be added first
|
|
||||||
self.system.tests.append(
|
self.system.tests.append(
|
||||||
Test(dev=self.system, label='Prime95', name=name),
|
Test(dev=self.system, label=name[5:-1], name=name),
|
||||||
)
|
)
|
||||||
self.system.tests.append(
|
|
||||||
Test(dev=self.system, label='Cooling', name=name),
|
|
||||||
)
|
|
||||||
self.test_groups.append(
|
|
||||||
TestGroup(
|
|
||||||
name=name,
|
|
||||||
function=globals()[TEST_GROUPS[name]],
|
|
||||||
test_objects=self.system.tests,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
if 'Disk' in name:
|
if 'Disk' in name:
|
||||||
test_group = TestGroup(
|
test_group = TestGroup(
|
||||||
|
|
@ -184,6 +177,17 @@ class State():
|
||||||
test_group.test_objects.append(test_obj)
|
test_group.test_objects.append(test_obj)
|
||||||
self.test_groups.append(test_group)
|
self.test_groups.append(test_group)
|
||||||
|
|
||||||
|
# Group CPU tests
|
||||||
|
if self.system.tests:
|
||||||
|
self.test_groups.insert(
|
||||||
|
0,
|
||||||
|
TestGroup(
|
||||||
|
name='CPU & Cooling',
|
||||||
|
function=run_cpu_tests,
|
||||||
|
test_objects=self.system.tests,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
def reset_layout(self) -> None:
|
def reset_layout(self) -> None:
|
||||||
"""Reset layout to avoid flickering."""
|
"""Reset layout to avoid flickering."""
|
||||||
self.ui.clear_current_pane_height()
|
self.ui.clear_current_pane_height()
|
||||||
|
|
@ -312,135 +316,200 @@ def build_menu(cli_mode=False, quick_mode=False) -> cli.Menu:
|
||||||
return menu
|
return menu
|
||||||
|
|
||||||
|
|
||||||
def cpu_stress_tests(state, test_objects, test_mode=False) -> None:
|
def cpu_tests_init(state) -> None:
|
||||||
"""CPU & cooling check using Prime95 and Sysbench."""
|
"""Initialize CPU tests."""
|
||||||
LOG.info('CPU Test (Prime95)')
|
|
||||||
aborted = False
|
|
||||||
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
|
|
||||||
run_sysbench = False
|
|
||||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||||
test_minutes = cfg.hw.CPU_TEST_MINUTES
|
state.update_title_text(state.system.cpu_description)
|
||||||
if test_mode:
|
|
||||||
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
|
|
||||||
test_mprime_obj, test_cooling_obj = test_objects
|
|
||||||
|
|
||||||
# Bail early
|
# Start monitor
|
||||||
if test_cooling_obj.disabled or test_mprime_obj.disabled:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Prep
|
|
||||||
state.update_title_text(test_mprime_obj.dev.cpu_description)
|
|
||||||
test_cooling_obj.set_status('Working')
|
|
||||||
test_mprime_obj.set_status('Working')
|
|
||||||
|
|
||||||
# Start sensors monitor
|
|
||||||
sensors = hw_sensors.Sensors()
|
|
||||||
sensors.start_background_monitor(
|
|
||||||
sensors_out,
|
|
||||||
thermal_action=('killall', 'mprime', '-INT'),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create monitor and worker panes
|
|
||||||
state.update_progress_file()
|
|
||||||
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=prime_log)
|
|
||||||
if PLATFORM == 'Darwin':
|
if PLATFORM == 'Darwin':
|
||||||
state.ui.add_info_pane(
|
state.ui.add_info_pane(
|
||||||
percent=80, cmd='./hw-sensors', update_layout=False,
|
percent=80, cmd='./hw-sensors', update_layout=False,
|
||||||
)
|
)
|
||||||
elif PLATFORM == 'Linux':
|
elif PLATFORM == 'Linux':
|
||||||
state.ui.add_info_pane(
|
state.ui.add_info_pane(
|
||||||
percent=80, watch_file=sensors_out, update_layout=False,
|
percent=80,
|
||||||
|
watch_file=pathlib.Path(f'{state.log_dir}/sensors.out'),
|
||||||
|
update_layout=False,
|
||||||
)
|
)
|
||||||
|
state.sensors.start_background_monitor(sensors_out)
|
||||||
state.ui.set_current_pane_height(3)
|
state.ui.set_current_pane_height(3)
|
||||||
|
|
||||||
# Get idle temps
|
# Save idle temps
|
||||||
cli.print_standard('Saving idle temps...')
|
cli.print_standard('Saving idle temps...')
|
||||||
sensors.save_average_temps(temp_label='Idle', seconds=5)
|
state.sensors.save_average_temps(temp_label='Idle', seconds=5)
|
||||||
|
|
||||||
# Stress CPU
|
|
||||||
cli.print_info('Running stress test')
|
|
||||||
hw_cpu.set_apple_fan_speed('max')
|
|
||||||
proc_mprime = hw_cpu.start_mprime(state.log_dir, prime_log)
|
|
||||||
|
|
||||||
# Show countdown
|
|
||||||
print('')
|
|
||||||
try:
|
|
||||||
print_countdown(proc=proc_mprime, seconds=test_minutes*60)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
aborted = True
|
|
||||||
|
|
||||||
# Stop Prime95
|
|
||||||
hw_cpu.stop_mprime(proc_mprime)
|
|
||||||
|
|
||||||
# Update progress if necessary
|
|
||||||
if sensors.cpu_reached_critical_temp() or aborted:
|
|
||||||
test_cooling_obj.set_status('Aborted')
|
|
||||||
test_mprime_obj.set_status('Aborted')
|
|
||||||
state.update_progress_file()
|
|
||||||
|
|
||||||
# Get cooldown temp
|
|
||||||
state.ui.clear_current_pane()
|
|
||||||
cli.print_standard('Letting CPU cooldown...')
|
|
||||||
std.sleep(5)
|
|
||||||
cli.print_standard('Saving cooldown temps...')
|
|
||||||
sensors.save_average_temps(temp_label='Cooldown', seconds=5)
|
|
||||||
|
|
||||||
# Check Prime95 results
|
|
||||||
test_mprime_obj.report.append(ansi.color_string('Prime95', 'BLUE'))
|
|
||||||
hw_cpu.check_mprime_results(
|
|
||||||
test_obj=test_mprime_obj, working_dir=state.log_dir,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Run Sysbench test if necessary
|
|
||||||
run_sysbench = (
|
|
||||||
not aborted and sensors.cpu_max_temp() >= cfg.hw.CPU_FAILURE_TEMP
|
|
||||||
)
|
|
||||||
if run_sysbench:
|
|
||||||
LOG.info('CPU Test (Sysbench)')
|
|
||||||
cli.print_standard('Letting CPU cooldown more...')
|
|
||||||
std.sleep(10)
|
|
||||||
state.ui.clear_current_pane()
|
|
||||||
cli.print_info('Running alternate stress test')
|
|
||||||
print('')
|
|
||||||
sysbench_log = prime_log.with_name('sysbench.log')
|
|
||||||
sysbench_log.touch()
|
|
||||||
state.ui.remove_all_worker_panes()
|
|
||||||
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=sysbench_log)
|
|
||||||
proc_sysbench, filehandle_sysbench = hw_cpu.start_sysbench(
|
|
||||||
sensors,
|
|
||||||
sensors_out,
|
|
||||||
log_path=sysbench_log,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
print_countdown(proc=proc_sysbench, seconds=test_minutes*60)
|
|
||||||
except AttributeError:
|
|
||||||
# Assuming the sysbench process wasn't found and proc was set to None
|
|
||||||
LOG.error('Failed to find sysbench process', exc_info=True)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
aborted = True
|
|
||||||
hw_cpu.stop_sysbench(proc_sysbench, filehandle_sysbench)
|
|
||||||
|
|
||||||
# Update progress
|
|
||||||
# NOTE: CPU critical temp check isn't really necessary
|
|
||||||
# Hard to imagine it wasn't hit during Prime95 but was in sysbench
|
|
||||||
if sensors.cpu_reached_critical_temp() or aborted:
|
|
||||||
test_cooling_obj.set_status('Aborted')
|
|
||||||
test_mprime_obj.set_status('Aborted')
|
|
||||||
state.update_progress_file()
|
|
||||||
|
|
||||||
# Check Cooling results
|
|
||||||
test_cooling_obj.report.append(ansi.color_string('Temps', 'BLUE'))
|
|
||||||
hw_cpu.check_cooling_results(test_cooling_obj, sensors, run_sysbench)
|
|
||||||
|
|
||||||
|
def cpu_tests_end(state) -> None:
|
||||||
|
"""End CPU tests."""
|
||||||
# Cleanup
|
# Cleanup
|
||||||
state.update_progress_file()
|
state.sensors.stop_background_monitor()
|
||||||
sensors.stop_background_monitor()
|
|
||||||
state.ui.clear_current_pane_height()
|
state.ui.clear_current_pane_height()
|
||||||
state.ui.remove_all_info_panes()
|
state.ui.remove_all_info_panes()
|
||||||
state.ui.remove_all_worker_panes()
|
state.ui.remove_all_worker_panes()
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_test_cooling(state, test_object, test_mode=False) -> None:
|
||||||
|
"""CPU cooling test via sensor data assessment."""
|
||||||
|
LOG.info('CPU Test (Cooling)')
|
||||||
|
|
||||||
|
# Bail early
|
||||||
|
if test_object.disabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
hw_cpu.check_cooling_results(state.sensors, test_object)
|
||||||
|
state.update_progress_file()
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_test_mprime(state, test_object, test_mode=False) -> None:
|
||||||
|
"""CPU stress test using mprime."""
|
||||||
|
LOG.info('CPU Test (Prime95)')
|
||||||
|
aborted = False
|
||||||
|
log_path = pathlib.Path(f'{state.log_dir}/prime.log')
|
||||||
|
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||||
|
test_minutes = cfg.hw.CPU_TEST_MINUTES
|
||||||
|
if test_mode:
|
||||||
|
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
|
||||||
|
|
||||||
|
# Bail early
|
||||||
|
if test_object.disabled:
|
||||||
|
return
|
||||||
|
if state.sensors.cpu_reached_critical_temp():
|
||||||
|
test_object.set_status('Denied')
|
||||||
|
test_object.disabled = True
|
||||||
|
return
|
||||||
|
|
||||||
|
# Prep
|
||||||
|
test_object.set_status('Working')
|
||||||
|
state.update_progress_file()
|
||||||
|
state.ui.clear_current_pane()
|
||||||
|
cli.print_info('Running stress test')
|
||||||
|
print('')
|
||||||
|
|
||||||
|
# Start sensors monitor
|
||||||
|
state.sensors.stop_background_monitor()
|
||||||
|
state.sensors.start_background_monitor(
|
||||||
|
sensors_out,
|
||||||
|
alt_max='Prime95',
|
||||||
|
thermal_action=('killall', '-INT', 'mprime'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run Prime95
|
||||||
|
hw_cpu.set_apple_fan_speed('max')
|
||||||
|
proc = hw_cpu.start_mprime(state.log_dir, log_path)
|
||||||
|
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=log_path)
|
||||||
|
try:
|
||||||
|
print_countdown(proc=proc, seconds=test_minutes*60)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
aborted = True
|
||||||
|
|
||||||
|
# Stop Prime95
|
||||||
|
hw_cpu.stop_mprime(proc)
|
||||||
|
|
||||||
|
# Get cooldown temp
|
||||||
|
if 'Cooldown' in state.sensors.temp_labels:
|
||||||
|
# Give Prime95 time to save the results
|
||||||
|
std.sleep(1)
|
||||||
|
else:
|
||||||
|
# Save cooldown temp
|
||||||
|
state.ui.clear_current_pane()
|
||||||
|
cli.print_standard('Letting CPU cooldown...')
|
||||||
|
std.sleep(5)
|
||||||
|
cli.print_standard('Saving cooldown temps...')
|
||||||
|
state.sensors.save_average_temps(temp_label='Cooldown', seconds=5)
|
||||||
|
|
||||||
|
# Check Prime95 results
|
||||||
|
test_object.report.append(ansi.color_string('Prime95', 'BLUE'))
|
||||||
|
hw_cpu.check_mprime_results(test_obj=test_object, working_dir=state.log_dir)
|
||||||
|
|
||||||
|
# Update progress
|
||||||
|
if state.sensors.cpu_reached_critical_temp() or aborted:
|
||||||
|
test_object.set_status('Aborted')
|
||||||
|
state.update_progress_file()
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
|
state.ui.remove_all_worker_panes()
|
||||||
if aborted:
|
if aborted:
|
||||||
|
cpu_tests_end(state)
|
||||||
|
raise std.GenericAbort('Aborted')
|
||||||
|
|
||||||
|
|
||||||
|
def cpu_test_sysbench(state, test_object, test_mode=False) -> None:
|
||||||
|
"""CPU stress test using Sysbench."""
|
||||||
|
LOG.info('CPU Test (Sysbench)')
|
||||||
|
aborted = False
|
||||||
|
log_path = pathlib.Path(f'{state.log_dir}/sysbench.log')
|
||||||
|
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||||
|
test_minutes = cfg.hw.CPU_TEST_MINUTES
|
||||||
|
if test_mode:
|
||||||
|
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
|
||||||
|
|
||||||
|
# Bail early
|
||||||
|
if test_object.disabled:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Prep
|
||||||
|
test_object.set_status('Working')
|
||||||
|
state.update_progress_file()
|
||||||
|
state.ui.clear_current_pane()
|
||||||
|
cli.print_info('Running stress test')
|
||||||
|
print('')
|
||||||
|
|
||||||
|
# Start sensors monitor
|
||||||
|
state.sensors.stop_background_monitor()
|
||||||
|
state.sensors.start_background_monitor(
|
||||||
|
sensors_out,
|
||||||
|
alt_max='Sysbench',
|
||||||
|
thermal_action=('killall', '-INT', 'sysbench'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run sysbench
|
||||||
|
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=log_path)
|
||||||
|
proc, filehandle = hw_cpu.start_sysbench(log_path=log_path)
|
||||||
|
try:
|
||||||
|
print_countdown(proc=proc, seconds=test_minutes*60)
|
||||||
|
except AttributeError:
|
||||||
|
# Assuming the sysbench process wasn't found and proc was set to None
|
||||||
|
LOG.error('Failed to find sysbench process', exc_info=True)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
aborted = True
|
||||||
|
hw_cpu.stop_sysbench(proc, filehandle)
|
||||||
|
|
||||||
|
# Get cooldown temp
|
||||||
|
if 'Cooldown' not in state.sensors.temp_labels:
|
||||||
|
state.ui.clear_current_pane()
|
||||||
|
cli.print_standard('Letting CPU cooldown...')
|
||||||
|
std.sleep(5)
|
||||||
|
cli.print_standard('Saving cooldown temps...')
|
||||||
|
state.sensors.save_average_temps(temp_label='Cooldown', seconds=5)
|
||||||
|
|
||||||
|
# Update progress
|
||||||
|
test_object.report.append(ansi.color_string('Sysbench', 'BLUE'))
|
||||||
|
if aborted:
|
||||||
|
test_object.set_status('Aborted')
|
||||||
|
test_object.report.append(ansi.color_string(' Aborted.', 'YELLOW'))
|
||||||
|
state.update_progress_file()
|
||||||
|
elif state.sensors.cpu_reached_critical_temp():
|
||||||
|
test_object.set_status('Aborted')
|
||||||
|
test_object.report.append(
|
||||||
|
ansi.color_string(' Aborted due to temps.', 'YELLOW'),
|
||||||
|
)
|
||||||
|
elif proc.returncode not in (-15, -2, 0):
|
||||||
|
# NOTE: Return codes:
|
||||||
|
# 0 == Completed w/out issue
|
||||||
|
# -2 == Stopped with INT signal
|
||||||
|
# -15 == Stopped with TERM signal
|
||||||
|
test_object.set_status('Failed')
|
||||||
|
test_object.report.append(f' Failed with return code: {proc.returncode}')
|
||||||
|
else:
|
||||||
|
test_object.set_status('Passed')
|
||||||
|
test_object.report.append(' Completed without issue.')
|
||||||
|
state.update_progress_file()
|
||||||
|
|
||||||
|
# Done
|
||||||
|
state.ui.remove_all_worker_panes()
|
||||||
|
if aborted:
|
||||||
|
cpu_tests_end(state)
|
||||||
raise std.GenericAbort('Aborted')
|
raise std.GenericAbort('Aborted')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -765,6 +834,16 @@ def print_countdown(proc, seconds) -> None:
|
||||||
# Done
|
# Done
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
|
def run_cpu_tests(state, test_objects, test_mode=False) -> None:
|
||||||
|
"""Run selected CPU test(s)."""
|
||||||
|
state.update_progress_file()
|
||||||
|
cpu_tests_init(state)
|
||||||
|
for obj in test_objects:
|
||||||
|
func = globals()[TEST_GROUPS[obj.name]]
|
||||||
|
func(state, obj, test_mode=test_mode)
|
||||||
|
cpu_tests_end(state)
|
||||||
|
state.update_progress_file()
|
||||||
|
|
||||||
|
|
||||||
def run_diags(state, menu, quick_mode=False, test_mode=False) -> None:
|
def run_diags(state, menu, quick_mode=False, test_mode=False) -> None:
|
||||||
"""Run selected diagnostics."""
|
"""Run selected diagnostics."""
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from subprocess import CalledProcessError
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from wk.cfg.hw import CPU_CRITICAL_TEMP, SMC_IDS, TEMP_COLORS
|
from wk.cfg.hw import CPU_TEMP_CRITICAL, SMC_IDS, TEMP_COLORS
|
||||||
from wk.exe import run_program, start_thread
|
from wk.exe import run_program, start_thread
|
||||||
from wk.io import non_clobber_path
|
from wk.io import non_clobber_path
|
||||||
from wk.std import PLATFORM, sleep
|
from wk.std import PLATFORM, sleep
|
||||||
|
|
@ -41,6 +41,7 @@ class Sensors():
|
||||||
self.background_thread: Thread | None = None
|
self.background_thread: Thread | None = None
|
||||||
self.data: dict[Any, Any] = get_sensor_data()
|
self.data: dict[Any, Any] = get_sensor_data()
|
||||||
self.out_path: pathlib.Path | str | None = None
|
self.out_path: pathlib.Path | str | None = None
|
||||||
|
self.temp_labels: set = set(['Current', 'Max'])
|
||||||
|
|
||||||
def clear_temps(self) -> None:
|
def clear_temps(self) -> None:
|
||||||
"""Clear saved temps but keep structure"""
|
"""Clear saved temps but keep structure"""
|
||||||
|
|
@ -49,26 +50,8 @@ class Sensors():
|
||||||
for source_data in sources.values():
|
for source_data in sources.values():
|
||||||
source_data['Temps'] = []
|
source_data['Temps'] = []
|
||||||
|
|
||||||
def cpu_max_temp(self) -> float:
|
|
||||||
"""Get max temp from any CPU source, returns float.
|
|
||||||
|
|
||||||
NOTE: If no temps are found this returns zero.
|
|
||||||
"""
|
|
||||||
max_temp = 0.0
|
|
||||||
|
|
||||||
# Check all CPU Temps
|
|
||||||
for section, adapters in self.data.items():
|
|
||||||
if not section.startswith('CPU'):
|
|
||||||
continue
|
|
||||||
for sources in adapters.values():
|
|
||||||
for source_data in sources.values():
|
|
||||||
max_temp = max(max_temp, source_data.get('Max', 0))
|
|
||||||
|
|
||||||
# Done
|
|
||||||
return max_temp
|
|
||||||
|
|
||||||
def cpu_reached_critical_temp(self) -> bool:
|
def cpu_reached_critical_temp(self) -> bool:
|
||||||
"""Check if CPU reached CPU_CRITICAL_TEMP, returns bool."""
|
"""Check if CPU reached CPU_TEMP_CRITICAL, returns bool."""
|
||||||
for section, adapters in self.data.items():
|
for section, adapters in self.data.items():
|
||||||
if not section.startswith('CPU'):
|
if not section.startswith('CPU'):
|
||||||
# Limit to CPU temps
|
# Limit to CPU temps
|
||||||
|
|
@ -77,7 +60,7 @@ class Sensors():
|
||||||
# Ugly section
|
# Ugly section
|
||||||
for sources in adapters.values():
|
for sources in adapters.values():
|
||||||
for source_data in sources.values():
|
for source_data in sources.values():
|
||||||
if source_data.get('Max', -1) >= CPU_CRITICAL_TEMP:
|
if source_data.get('Max', -1) > CPU_TEMP_CRITICAL:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Didn't return above so temps are within the threshold
|
# Didn't return above so temps are within the threshold
|
||||||
|
|
@ -119,6 +102,25 @@ class Sensors():
|
||||||
# Done
|
# Done
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
def get_cpu_temp(self, label) -> float:
|
||||||
|
"""Get temp for label from any CPU source, returns float.
|
||||||
|
|
||||||
|
NOTE: This returns the highest value for the label.
|
||||||
|
NOTE 2: If no temps are found this returns zero.
|
||||||
|
"""
|
||||||
|
max_temp = 0.0
|
||||||
|
|
||||||
|
# Check all CPU Temps
|
||||||
|
for section, adapters in self.data.items():
|
||||||
|
if not section.startswith('CPU'):
|
||||||
|
continue
|
||||||
|
for sources in adapters.values():
|
||||||
|
for source_data in sources.values():
|
||||||
|
max_temp = max(max_temp, source_data.get(label, 0))
|
||||||
|
|
||||||
|
# Done
|
||||||
|
return float(max_temp)
|
||||||
|
|
||||||
def monitor_to_file(
|
def monitor_to_file(
|
||||||
self, out_path, alt_max=None,
|
self, out_path, alt_max=None,
|
||||||
exit_on_thermal_limit=True, temp_labels=None,
|
exit_on_thermal_limit=True, temp_labels=None,
|
||||||
|
|
@ -136,6 +138,7 @@ class Sensors():
|
||||||
temp_labels = ['Current', 'Max']
|
temp_labels = ['Current', 'Max']
|
||||||
if alt_max:
|
if alt_max:
|
||||||
temp_labels.append(alt_max)
|
temp_labels.append(alt_max)
|
||||||
|
self.temp_labels.add(alt_max)
|
||||||
|
|
||||||
# Start loop
|
# Start loop
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -158,6 +161,7 @@ class Sensors():
|
||||||
def save_average_temps(self, temp_label, seconds=10) -> None:
|
def save_average_temps(self, temp_label, seconds=10) -> None:
|
||||||
"""Save average temps under temp_label over provided seconds.."""
|
"""Save average temps under temp_label over provided seconds.."""
|
||||||
self.clear_temps()
|
self.clear_temps()
|
||||||
|
self.temp_labels.add(temp_label)
|
||||||
|
|
||||||
# Get temps
|
# Get temps
|
||||||
for _ in range(seconds):
|
for _ in range(seconds):
|
||||||
|
|
@ -200,6 +204,10 @@ class Sensors():
|
||||||
|
|
||||||
def stop_background_monitor(self) -> None:
|
def stop_background_monitor(self) -> None:
|
||||||
"""Stop background thread."""
|
"""Stop background thread."""
|
||||||
|
# Bail early
|
||||||
|
if self.background_thread is None:
|
||||||
|
return
|
||||||
|
|
||||||
self.out_path.with_suffix('.stop').touch()
|
self.out_path.with_suffix('.stop').touch()
|
||||||
self.background_thread.join()
|
self.background_thread.join()
|
||||||
|
|
||||||
|
|
@ -210,6 +218,8 @@ class Sensors():
|
||||||
def update_sensor_data(
|
def update_sensor_data(
|
||||||
self, alt_max=None, exit_on_thermal_limit=True) -> None:
|
self, alt_max=None, exit_on_thermal_limit=True) -> None:
|
||||||
"""Update sensor data via OS-specific means."""
|
"""Update sensor data via OS-specific means."""
|
||||||
|
if alt_max:
|
||||||
|
self.temp_labels.add(alt_max)
|
||||||
if PLATFORM == 'Darwin':
|
if PLATFORM == 'Darwin':
|
||||||
self.update_sensor_data_macos(alt_max, exit_on_thermal_limit)
|
self.update_sensor_data_macos(alt_max, exit_on_thermal_limit)
|
||||||
elif PLATFORM == 'Linux':
|
elif PLATFORM == 'Linux':
|
||||||
|
|
@ -236,7 +246,7 @@ class Sensors():
|
||||||
|
|
||||||
# Raise exception if thermal limit reached
|
# Raise exception if thermal limit reached
|
||||||
if exit_on_thermal_limit and section == 'CPUTemps':
|
if exit_on_thermal_limit and section == 'CPUTemps':
|
||||||
if source_data['Current'] >= CPU_CRITICAL_TEMP:
|
if source_data['Current'] > CPU_TEMP_CRITICAL:
|
||||||
raise ThermalLimitReachedError('CPU temps reached limit')
|
raise ThermalLimitReachedError('CPU temps reached limit')
|
||||||
|
|
||||||
def update_sensor_data_macos(
|
def update_sensor_data_macos(
|
||||||
|
|
@ -263,7 +273,7 @@ class Sensors():
|
||||||
|
|
||||||
# Raise exception if thermal limit reached
|
# Raise exception if thermal limit reached
|
||||||
if exit_on_thermal_limit and section == 'CPUTemps':
|
if exit_on_thermal_limit and section == 'CPUTemps':
|
||||||
if source_data['Current'] >= CPU_CRITICAL_TEMP:
|
if source_data['Current'] > CPU_TEMP_CRITICAL:
|
||||||
raise ThermalLimitReachedError('CPU temps reached limit')
|
raise ThermalLimitReachedError('CPU temps reached limit')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -420,7 +430,7 @@ def get_sensor_data_macos() -> dict[Any, Any]:
|
||||||
|
|
||||||
def get_temp_str(temp, colored=True) -> str:
|
def get_temp_str(temp, colored=True) -> str:
|
||||||
"""Get colored string based on temp, returns str."""
|
"""Get colored string based on temp, returns str."""
|
||||||
temp_color = None
|
temp_color = ''
|
||||||
|
|
||||||
# Safety check
|
# Safety check
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ class Test:
|
||||||
name: str
|
name: str
|
||||||
disabled: bool = field(init=False, default=False)
|
disabled: bool = field(init=False, default=False)
|
||||||
failed: bool = field(init=False, default=False)
|
failed: bool = field(init=False, default=False)
|
||||||
hidden: bool = False
|
|
||||||
passed: bool = field(init=False, default=False)
|
passed: bool = field(init=False, default=False)
|
||||||
report: list[str] = field(init=False, default_factory=list)
|
report: list[str] = field(init=False, default_factory=list)
|
||||||
status: str = field(init=False, default='Pending')
|
status: str = field(init=False, default='Pending')
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue