Split CPU & Cooling tests into separate functions

Addresses issue #204
This commit is contained in:
2Shirt 2023-07-02 15:08:44 -07:00
parent 172f00e4e9
commit f9a6850c1a
Signed by: 2Shirt
GPG key ID: 152FAC923B0E132C
5 changed files with 318 additions and 198 deletions

View file

@ -20,8 +20,10 @@ BADBLOCKS_REGEX = re.compile(
) )
BADBLOCKS_RESULTS_REGEX = re.compile(r'^(.*?)\x08.*\x08(.*)') BADBLOCKS_RESULTS_REGEX = re.compile(r'^(.*?)\x08.*\x08(.*)')
BADBLOCKS_SKIP_REGEX = re.compile(r'^(Checking|\[)', re.IGNORECASE) BADBLOCKS_SKIP_REGEX = re.compile(r'^(Checking|\[)', re.IGNORECASE)
CPU_CRITICAL_TEMP = 100 CPU_TEMP_COOLING_DELTA = 20
CPU_FAILURE_TEMP = 90 CPU_TEMP_CRITICAL = 100
CPU_TEMP_IDLE_HIGH = 85
CPU_TEMP_LOW_THRESHOLD = 50
CPU_TEST_MINUTES = 7 CPU_TEST_MINUTES = 7
IO_GRAPH_WIDTH = 40 IO_GRAPH_WIDTH = 40
IO_ALT_TEST_SIZE_FACTOR = 0.01 IO_ALT_TEST_SIZE_FACTOR = 0.01

View file

@ -8,7 +8,12 @@ import subprocess
from typing import TextIO from typing import TextIO
from wk import exe from wk import exe
from wk.cfg.hw import CPU_FAILURE_TEMP from wk.cfg.hw import (
CPU_TEMP_COOLING_DELTA,
CPU_TEMP_CRITICAL,
CPU_TEMP_IDLE_HIGH,
CPU_TEMP_LOW_THRESHOLD,
)
from wk.os.mac import set_fans as macos_set_fans from wk.os.mac import set_fans as macos_set_fans
from wk.std import PLATFORM from wk.std import PLATFORM
from wk.ui import ansi from wk.ui import ansi
@ -20,26 +25,57 @@ SysbenchType = tuple[subprocess.Popen, TextIO]
# Functions # Functions
def check_cooling_results(test_obj, sensors, run_sysbench=False) -> None: def check_cooling_results(sensors, test_object) -> None:
"""Check cooling results and update test_obj.""" """Check cooling result via sensor data."""
max_temp = sensors.cpu_max_temp() idle_temp = sensors.get_cpu_temp('Idle')
temp_labels = ['Idle', 'Max', 'Cooldown'] cooldown_temp = sensors.get_cpu_temp('Cooldown')
if run_sysbench: max_temp = sensors.get_cpu_temp('Max')
temp_labels.append('Sysbench') test_object.report.append(ansi.color_string('Temps', 'BLUE'))
# Check temps # Check temps
if not max_temp: if max_temp > CPU_TEMP_CRITICAL:
test_obj.set_status('Unknown') test_object.failed = True
elif max_temp >= CPU_FAILURE_TEMP: test_object.set_status('Failed')
test_obj.failed = True test_object.report.extend([
test_obj.set_status('Failed') ansi.color_string(
elif 'Aborted' not in test_obj.status: f' WARNING: Critical CPU temp of {CPU_TEMP_CRITICAL} exceeded.',
test_obj.passed = True 'RED',
test_obj.set_status('Passed') ),
'',
])
elif idle_temp >= CPU_TEMP_IDLE_HIGH:
test_object.failed = True
test_object.set_status('Failed')
test_object.report.extend([
ansi.color_string(
f' WARNING: Max idle temp of {CPU_TEMP_IDLE_HIGH} exceeded.',
'YELLOW',
),
'',
])
elif (
cooldown_temp <= CPU_TEMP_LOW_THRESHOLD
or abs(max_temp - cooldown_temp) >= CPU_TEMP_COOLING_DELTA
):
test_object.passed = True
test_object.set_status('Passed')
else:
test_object.passed = False
test_object.set_status('Unknown')
# Add temps to report # Build report
for line in sensors.generate_report(*temp_labels, only_cpu=True): report_labels = ['Idle']
test_obj.report.append(f' {line}') if 'Sysbench' in sensors.temp_labels:
report_labels.extend(['Sysbench', 'Cooldown'])
if 'Prime95' in sensors.temp_labels:
report_labels.append('Prime95')
if 'Cooldown' not in report_labels:
report_labels.append('Cooldown')
if len(sensors.temp_labels.intersection(['Prime95', 'Sysbench'])) < 1:
# Include overall max temp if needed
report_labels.append('Max')
for line in sensors.generate_report(*report_labels, only_cpu=True):
test_object.report.append(f' {line}')
def check_mprime_results(test_obj, working_dir) -> None: def check_mprime_results(test_obj, working_dir) -> None:
@ -112,7 +148,9 @@ def start_mprime(working_dir, log_path) -> subprocess.Popen:
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) )
proc_mprime.stdout.close() # type: ignore[reportOptionalMemberAccess] proc_mprime.stdout.close() # type: ignore[reportOptionalMemberAccess]
save_nbsr = exe.NonBlockingStreamReader(proc_grep.stdout) save_nbsr = exe.NonBlockingStreamReader(
proc_grep.stdout, # type: ignore[reportGeneralTypeIssues]
)
exe.start_thread( exe.start_thread(
save_nbsr.save_to_file, save_nbsr.save_to_file,
args=(proc_grep, log_path), args=(proc_grep, log_path),
@ -122,35 +160,6 @@ def start_mprime(working_dir, log_path) -> subprocess.Popen:
return proc_mprime return proc_mprime
def start_sysbench(sensors, sensors_out, log_path) -> SysbenchType:
"""Start sysbench, returns tuple with Popen object and file handle."""
set_apple_fan_speed('max')
sysbench_cmd = [
'sysbench',
f'--threads={exe.psutil.cpu_count()}',
'--cpu-max-prime=1000000000',
'cpu',
'run',
]
# Restart background monitor for Sysbench
sensors.stop_background_monitor()
sensors.start_background_monitor(
sensors_out,
alt_max='Sysbench',
thermal_action=('killall', 'sysbench', '-INT'),
)
# Start sysbench
filehandle_sysbench = open(
log_path, 'a', encoding='utf-8',
)
proc_sysbench = exe.popen_program(sysbench_cmd, stdout=filehandle_sysbench)
# Done
return (proc_sysbench, filehandle_sysbench)
def set_apple_fan_speed(speed) -> None: def set_apple_fan_speed(speed) -> None:
"""Set Apple fan speed.""" """Set Apple fan speed."""
cmd = None cmd = None
@ -174,6 +183,27 @@ def set_apple_fan_speed(speed) -> None:
exe.run_program(cmd, check=False) exe.run_program(cmd, check=False)
def start_sysbench(log_path) -> SysbenchType:
"""Start sysbench, returns tuple with Popen object and file handle."""
set_apple_fan_speed('max')
cmd = [
'sysbench',
f'--threads={exe.psutil.cpu_count()}',
'--cpu-max-prime=1000000000',
'cpu',
'run',
]
# Start sysbench
filehandle = open(
log_path, 'a', encoding='utf-8',
)
proc = exe.popen_program(cmd, stdout=filehandle)
# Done
return (proc, filehandle)
def stop_mprime(proc_mprime) -> None: def stop_mprime(proc_mprime) -> None:
"""Stop mprime gracefully, then forcefully as needed.""" """Stop mprime gracefully, then forcefully as needed."""
proc_mprime.terminate() proc_mprime.terminate()

View file

@ -45,7 +45,9 @@ LOG = logging.getLogger(__name__)
TEST_GROUPS = { TEST_GROUPS = {
# Also used to build the menu options # Also used to build the menu options
## NOTE: This needs to be above MENU_SETS ## NOTE: This needs to be above MENU_SETS
'CPU & Cooling': 'cpu_stress_tests', 'CPU (Sysbench)': 'cpu_test_sysbench',
'CPU (Prime95)': 'cpu_test_mprime',
'CPU (Cooling)': 'cpu_test_cooling',
'Disk Attributes': 'disk_attribute_check', 'Disk Attributes': 'disk_attribute_check',
'Disk Self-Test': 'disk_self_test', 'Disk Self-Test': 'disk_self_test',
'Disk Surface Scan': 'disk_surface_scan', 'Disk Surface Scan': 'disk_surface_scan',
@ -65,6 +67,7 @@ MENU_ACTIONS_SECRET = (
MENU_OPTIONS_QUICK = ('Disk Attributes',) MENU_OPTIONS_QUICK = ('Disk Attributes',)
MENU_SETS = { MENU_SETS = {
'Full Diagnostic': (*TEST_GROUPS,), 'Full Diagnostic': (*TEST_GROUPS,),
'CPU Diagnostic': (*[group for group in TEST_GROUPS if group.startswith('CPU')],),
'Disk Diagnostic': ( 'Disk Diagnostic': (
'Disk Attributes', 'Disk Attributes',
'Disk Self-Test', 'Disk Self-Test',
@ -85,6 +88,7 @@ class State():
self.disks: list[hw_disk.Disk] = [] self.disks: list[hw_disk.Disk] = []
self.log_dir: pathlib.Path | None = None self.log_dir: pathlib.Path | None = None
self.progress_file: pathlib.Path | None = None self.progress_file: pathlib.Path | None = None
self.sensors: hw_sensors.Sensors = hw_sensors.Sensors()
self.system: hw_system.System | None = None self.system: hw_system.System | None = None
self.test_groups: list[TestGroup] = [] self.test_groups: list[TestGroup] = []
self.title_text: str = ansi.color_string('Hardware Diagnostics', 'GREEN') self.title_text: str = ansi.color_string('Hardware Diagnostics', 'GREEN')
@ -123,6 +127,7 @@ class State():
# Reset objects # Reset objects
self.disks.clear() self.disks.clear()
self.test_groups.clear() self.test_groups.clear()
self.sensors = hw_sensors.Sensors()
# Set log # Set log
self.log_dir = log.format_log_path() self.log_dir = log.format_log_path()
@ -158,21 +163,9 @@ class State():
continue continue
if 'CPU' in name: if 'CPU' in name:
# Create two Test objects which will both be used by cpu_stress_tests
# NOTE: Prime95 should be added first
self.system.tests.append( self.system.tests.append(
Test(dev=self.system, label='Prime95', name=name), Test(dev=self.system, label=name[5:-1], name=name),
) )
self.system.tests.append(
Test(dev=self.system, label='Cooling', name=name),
)
self.test_groups.append(
TestGroup(
name=name,
function=globals()[TEST_GROUPS[name]],
test_objects=self.system.tests,
),
)
if 'Disk' in name: if 'Disk' in name:
test_group = TestGroup( test_group = TestGroup(
@ -184,6 +177,17 @@ class State():
test_group.test_objects.append(test_obj) test_group.test_objects.append(test_obj)
self.test_groups.append(test_group) self.test_groups.append(test_group)
# Group CPU tests
if self.system.tests:
self.test_groups.insert(
0,
TestGroup(
name='CPU & Cooling',
function=run_cpu_tests,
test_objects=self.system.tests,
),
)
def reset_layout(self) -> None: def reset_layout(self) -> None:
"""Reset layout to avoid flickering.""" """Reset layout to avoid flickering."""
self.ui.clear_current_pane_height() self.ui.clear_current_pane_height()
@ -312,135 +316,200 @@ def build_menu(cli_mode=False, quick_mode=False) -> cli.Menu:
return menu return menu
def cpu_stress_tests(state, test_objects, test_mode=False) -> None: def cpu_tests_init(state) -> None:
"""CPU & cooling check using Prime95 and Sysbench.""" """Initialize CPU tests."""
LOG.info('CPU Test (Prime95)')
aborted = False
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
run_sysbench = False
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out') sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
test_minutes = cfg.hw.CPU_TEST_MINUTES state.update_title_text(state.system.cpu_description)
if test_mode:
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
test_mprime_obj, test_cooling_obj = test_objects
# Bail early # Start monitor
if test_cooling_obj.disabled or test_mprime_obj.disabled:
return
# Prep
state.update_title_text(test_mprime_obj.dev.cpu_description)
test_cooling_obj.set_status('Working')
test_mprime_obj.set_status('Working')
# Start sensors monitor
sensors = hw_sensors.Sensors()
sensors.start_background_monitor(
sensors_out,
thermal_action=('killall', 'mprime', '-INT'),
)
# Create monitor and worker panes
state.update_progress_file()
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=prime_log)
if PLATFORM == 'Darwin': if PLATFORM == 'Darwin':
state.ui.add_info_pane( state.ui.add_info_pane(
percent=80, cmd='./hw-sensors', update_layout=False, percent=80, cmd='./hw-sensors', update_layout=False,
) )
elif PLATFORM == 'Linux': elif PLATFORM == 'Linux':
state.ui.add_info_pane( state.ui.add_info_pane(
percent=80, watch_file=sensors_out, update_layout=False, percent=80,
watch_file=pathlib.Path(f'{state.log_dir}/sensors.out'),
update_layout=False,
) )
state.sensors.start_background_monitor(sensors_out)
state.ui.set_current_pane_height(3) state.ui.set_current_pane_height(3)
# Get idle temps # Save idle temps
cli.print_standard('Saving idle temps...') cli.print_standard('Saving idle temps...')
sensors.save_average_temps(temp_label='Idle', seconds=5) state.sensors.save_average_temps(temp_label='Idle', seconds=5)
# Stress CPU
cli.print_info('Running stress test')
hw_cpu.set_apple_fan_speed('max')
proc_mprime = hw_cpu.start_mprime(state.log_dir, prime_log)
# Show countdown
print('')
try:
print_countdown(proc=proc_mprime, seconds=test_minutes*60)
except KeyboardInterrupt:
aborted = True
# Stop Prime95
hw_cpu.stop_mprime(proc_mprime)
# Update progress if necessary
if sensors.cpu_reached_critical_temp() or aborted:
test_cooling_obj.set_status('Aborted')
test_mprime_obj.set_status('Aborted')
state.update_progress_file()
# Get cooldown temp
state.ui.clear_current_pane()
cli.print_standard('Letting CPU cooldown...')
std.sleep(5)
cli.print_standard('Saving cooldown temps...')
sensors.save_average_temps(temp_label='Cooldown', seconds=5)
# Check Prime95 results
test_mprime_obj.report.append(ansi.color_string('Prime95', 'BLUE'))
hw_cpu.check_mprime_results(
test_obj=test_mprime_obj, working_dir=state.log_dir,
)
# Run Sysbench test if necessary
run_sysbench = (
not aborted and sensors.cpu_max_temp() >= cfg.hw.CPU_FAILURE_TEMP
)
if run_sysbench:
LOG.info('CPU Test (Sysbench)')
cli.print_standard('Letting CPU cooldown more...')
std.sleep(10)
state.ui.clear_current_pane()
cli.print_info('Running alternate stress test')
print('')
sysbench_log = prime_log.with_name('sysbench.log')
sysbench_log.touch()
state.ui.remove_all_worker_panes()
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=sysbench_log)
proc_sysbench, filehandle_sysbench = hw_cpu.start_sysbench(
sensors,
sensors_out,
log_path=sysbench_log,
)
try:
print_countdown(proc=proc_sysbench, seconds=test_minutes*60)
except AttributeError:
# Assuming the sysbench process wasn't found and proc was set to None
LOG.error('Failed to find sysbench process', exc_info=True)
except KeyboardInterrupt:
aborted = True
hw_cpu.stop_sysbench(proc_sysbench, filehandle_sysbench)
# Update progress
# NOTE: CPU critical temp check isn't really necessary
# Hard to imagine it wasn't hit during Prime95 but was in sysbench
if sensors.cpu_reached_critical_temp() or aborted:
test_cooling_obj.set_status('Aborted')
test_mprime_obj.set_status('Aborted')
state.update_progress_file()
# Check Cooling results
test_cooling_obj.report.append(ansi.color_string('Temps', 'BLUE'))
hw_cpu.check_cooling_results(test_cooling_obj, sensors, run_sysbench)
def cpu_tests_end(state) -> None:
"""End CPU tests."""
# Cleanup # Cleanup
state.update_progress_file() state.sensors.stop_background_monitor()
sensors.stop_background_monitor()
state.ui.clear_current_pane_height() state.ui.clear_current_pane_height()
state.ui.remove_all_info_panes() state.ui.remove_all_info_panes()
state.ui.remove_all_worker_panes() state.ui.remove_all_worker_panes()
def cpu_test_cooling(state, test_object, test_mode=False) -> None:
"""CPU cooling test via sensor data assessment."""
LOG.info('CPU Test (Cooling)')
# Bail early
if test_object.disabled:
return
hw_cpu.check_cooling_results(state.sensors, test_object)
state.update_progress_file()
def cpu_test_mprime(state, test_object, test_mode=False) -> None:
"""CPU stress test using mprime."""
LOG.info('CPU Test (Prime95)')
aborted = False
log_path = pathlib.Path(f'{state.log_dir}/prime.log')
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
test_minutes = cfg.hw.CPU_TEST_MINUTES
if test_mode:
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
# Bail early
if test_object.disabled:
return
if state.sensors.cpu_reached_critical_temp():
test_object.set_status('Denied')
test_object.disabled = True
return
# Prep
test_object.set_status('Working')
state.update_progress_file()
state.ui.clear_current_pane()
cli.print_info('Running stress test')
print('')
# Start sensors monitor
state.sensors.stop_background_monitor()
state.sensors.start_background_monitor(
sensors_out,
alt_max='Prime95',
thermal_action=('killall', '-INT', 'mprime'),
)
# Run Prime95
hw_cpu.set_apple_fan_speed('max')
proc = hw_cpu.start_mprime(state.log_dir, log_path)
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=log_path)
try:
print_countdown(proc=proc, seconds=test_minutes*60)
except KeyboardInterrupt:
aborted = True
# Stop Prime95
hw_cpu.stop_mprime(proc)
# Get cooldown temp
if 'Cooldown' in state.sensors.temp_labels:
# Give Prime95 time to save the results
std.sleep(1)
else:
# Save cooldown temp
state.ui.clear_current_pane()
cli.print_standard('Letting CPU cooldown...')
std.sleep(5)
cli.print_standard('Saving cooldown temps...')
state.sensors.save_average_temps(temp_label='Cooldown', seconds=5)
# Check Prime95 results
test_object.report.append(ansi.color_string('Prime95', 'BLUE'))
hw_cpu.check_mprime_results(test_obj=test_object, working_dir=state.log_dir)
# Update progress
if state.sensors.cpu_reached_critical_temp() or aborted:
test_object.set_status('Aborted')
state.update_progress_file()
# Done # Done
state.ui.remove_all_worker_panes()
if aborted: if aborted:
cpu_tests_end(state)
raise std.GenericAbort('Aborted')
def cpu_test_sysbench(state, test_object, test_mode=False) -> None:
"""CPU stress test using Sysbench."""
LOG.info('CPU Test (Sysbench)')
aborted = False
log_path = pathlib.Path(f'{state.log_dir}/sysbench.log')
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
test_minutes = cfg.hw.CPU_TEST_MINUTES
if test_mode:
test_minutes = cfg.hw.TEST_MODE_CPU_LIMIT
# Bail early
if test_object.disabled:
return
# Prep
test_object.set_status('Working')
state.update_progress_file()
state.ui.clear_current_pane()
cli.print_info('Running stress test')
print('')
# Start sensors monitor
state.sensors.stop_background_monitor()
state.sensors.start_background_monitor(
sensors_out,
alt_max='Sysbench',
thermal_action=('killall', '-INT', 'sysbench'),
)
# Run sysbench
state.ui.add_worker_pane(lines=10, watch_cmd='tail', watch_file=log_path)
proc, filehandle = hw_cpu.start_sysbench(log_path=log_path)
try:
print_countdown(proc=proc, seconds=test_minutes*60)
except AttributeError:
# Assuming the sysbench process wasn't found and proc was set to None
LOG.error('Failed to find sysbench process', exc_info=True)
except KeyboardInterrupt:
aborted = True
hw_cpu.stop_sysbench(proc, filehandle)
# Get cooldown temp
if 'Cooldown' not in state.sensors.temp_labels:
state.ui.clear_current_pane()
cli.print_standard('Letting CPU cooldown...')
std.sleep(5)
cli.print_standard('Saving cooldown temps...')
state.sensors.save_average_temps(temp_label='Cooldown', seconds=5)
# Update progress
test_object.report.append(ansi.color_string('Sysbench', 'BLUE'))
if aborted:
test_object.set_status('Aborted')
test_object.report.append(ansi.color_string(' Aborted.', 'YELLOW'))
state.update_progress_file()
elif state.sensors.cpu_reached_critical_temp():
test_object.set_status('Aborted')
test_object.report.append(
ansi.color_string(' Aborted due to temps.', 'YELLOW'),
)
elif proc.returncode not in (-15, -2, 0):
# NOTE: Return codes:
# 0 == Completed w/out issue
# -2 == Stopped with INT signal
# -15 == Stopped with TERM signal
test_object.set_status('Failed')
test_object.report.append(f' Failed with return code: {proc.returncode}')
else:
test_object.set_status('Passed')
test_object.report.append(' Completed without issue.')
state.update_progress_file()
# Done
state.ui.remove_all_worker_panes()
if aborted:
cpu_tests_end(state)
raise std.GenericAbort('Aborted') raise std.GenericAbort('Aborted')
@ -765,6 +834,16 @@ def print_countdown(proc, seconds) -> None:
# Done # Done
print('') print('')
def run_cpu_tests(state, test_objects, test_mode=False) -> None:
"""Run selected CPU test(s)."""
state.update_progress_file()
cpu_tests_init(state)
for obj in test_objects:
func = globals()[TEST_GROUPS[obj.name]]
func(state, obj, test_mode=test_mode)
cpu_tests_end(state)
state.update_progress_file()
def run_diags(state, menu, quick_mode=False, test_mode=False) -> None: def run_diags(state, menu, quick_mode=False, test_mode=False) -> None:
"""Run selected diagnostics.""" """Run selected diagnostics."""

View file

@ -10,7 +10,7 @@ from subprocess import CalledProcessError
from threading import Thread from threading import Thread
from typing import Any from typing import Any
from wk.cfg.hw import CPU_CRITICAL_TEMP, SMC_IDS, TEMP_COLORS from wk.cfg.hw import CPU_TEMP_CRITICAL, SMC_IDS, TEMP_COLORS
from wk.exe import run_program, start_thread from wk.exe import run_program, start_thread
from wk.io import non_clobber_path from wk.io import non_clobber_path
from wk.std import PLATFORM, sleep from wk.std import PLATFORM, sleep
@ -41,6 +41,7 @@ class Sensors():
self.background_thread: Thread | None = None self.background_thread: Thread | None = None
self.data: dict[Any, Any] = get_sensor_data() self.data: dict[Any, Any] = get_sensor_data()
self.out_path: pathlib.Path | str | None = None self.out_path: pathlib.Path | str | None = None
self.temp_labels: set = set(['Current', 'Max'])
def clear_temps(self) -> None: def clear_temps(self) -> None:
"""Clear saved temps but keep structure""" """Clear saved temps but keep structure"""
@ -49,26 +50,8 @@ class Sensors():
for source_data in sources.values(): for source_data in sources.values():
source_data['Temps'] = [] source_data['Temps'] = []
def cpu_max_temp(self) -> float:
"""Get max temp from any CPU source, returns float.
NOTE: If no temps are found this returns zero.
"""
max_temp = 0.0
# Check all CPU Temps
for section, adapters in self.data.items():
if not section.startswith('CPU'):
continue
for sources in adapters.values():
for source_data in sources.values():
max_temp = max(max_temp, source_data.get('Max', 0))
# Done
return max_temp
def cpu_reached_critical_temp(self) -> bool: def cpu_reached_critical_temp(self) -> bool:
"""Check if CPU reached CPU_CRITICAL_TEMP, returns bool.""" """Check if CPU reached CPU_TEMP_CRITICAL, returns bool."""
for section, adapters in self.data.items(): for section, adapters in self.data.items():
if not section.startswith('CPU'): if not section.startswith('CPU'):
# Limit to CPU temps # Limit to CPU temps
@ -77,7 +60,7 @@ class Sensors():
# Ugly section # Ugly section
for sources in adapters.values(): for sources in adapters.values():
for source_data in sources.values(): for source_data in sources.values():
if source_data.get('Max', -1) >= CPU_CRITICAL_TEMP: if source_data.get('Max', -1) > CPU_TEMP_CRITICAL:
return True return True
# Didn't return above so temps are within the threshold # Didn't return above so temps are within the threshold
@ -119,6 +102,25 @@ class Sensors():
# Done # Done
return report return report
def get_cpu_temp(self, label) -> float:
"""Get temp for label from any CPU source, returns float.
NOTE: This returns the highest value for the label.
NOTE 2: If no temps are found this returns zero.
"""
max_temp = 0.0
# Check all CPU Temps
for section, adapters in self.data.items():
if not section.startswith('CPU'):
continue
for sources in adapters.values():
for source_data in sources.values():
max_temp = max(max_temp, source_data.get(label, 0))
# Done
return float(max_temp)
def monitor_to_file( def monitor_to_file(
self, out_path, alt_max=None, self, out_path, alt_max=None,
exit_on_thermal_limit=True, temp_labels=None, exit_on_thermal_limit=True, temp_labels=None,
@ -136,6 +138,7 @@ class Sensors():
temp_labels = ['Current', 'Max'] temp_labels = ['Current', 'Max']
if alt_max: if alt_max:
temp_labels.append(alt_max) temp_labels.append(alt_max)
self.temp_labels.add(alt_max)
# Start loop # Start loop
while True: while True:
@ -158,6 +161,7 @@ class Sensors():
def save_average_temps(self, temp_label, seconds=10) -> None: def save_average_temps(self, temp_label, seconds=10) -> None:
"""Save average temps under temp_label over provided seconds..""" """Save average temps under temp_label over provided seconds.."""
self.clear_temps() self.clear_temps()
self.temp_labels.add(temp_label)
# Get temps # Get temps
for _ in range(seconds): for _ in range(seconds):
@ -200,6 +204,10 @@ class Sensors():
def stop_background_monitor(self) -> None: def stop_background_monitor(self) -> None:
"""Stop background thread.""" """Stop background thread."""
# Bail early
if self.background_thread is None:
return
self.out_path.with_suffix('.stop').touch() self.out_path.with_suffix('.stop').touch()
self.background_thread.join() self.background_thread.join()
@ -210,6 +218,8 @@ class Sensors():
def update_sensor_data( def update_sensor_data(
self, alt_max=None, exit_on_thermal_limit=True) -> None: self, alt_max=None, exit_on_thermal_limit=True) -> None:
"""Update sensor data via OS-specific means.""" """Update sensor data via OS-specific means."""
if alt_max:
self.temp_labels.add(alt_max)
if PLATFORM == 'Darwin': if PLATFORM == 'Darwin':
self.update_sensor_data_macos(alt_max, exit_on_thermal_limit) self.update_sensor_data_macos(alt_max, exit_on_thermal_limit)
elif PLATFORM == 'Linux': elif PLATFORM == 'Linux':
@ -236,7 +246,7 @@ class Sensors():
# Raise exception if thermal limit reached # Raise exception if thermal limit reached
if exit_on_thermal_limit and section == 'CPUTemps': if exit_on_thermal_limit and section == 'CPUTemps':
if source_data['Current'] >= CPU_CRITICAL_TEMP: if source_data['Current'] > CPU_TEMP_CRITICAL:
raise ThermalLimitReachedError('CPU temps reached limit') raise ThermalLimitReachedError('CPU temps reached limit')
def update_sensor_data_macos( def update_sensor_data_macos(
@ -263,7 +273,7 @@ class Sensors():
# Raise exception if thermal limit reached # Raise exception if thermal limit reached
if exit_on_thermal_limit and section == 'CPUTemps': if exit_on_thermal_limit and section == 'CPUTemps':
if source_data['Current'] >= CPU_CRITICAL_TEMP: if source_data['Current'] > CPU_TEMP_CRITICAL:
raise ThermalLimitReachedError('CPU temps reached limit') raise ThermalLimitReachedError('CPU temps reached limit')
@ -420,7 +430,7 @@ def get_sensor_data_macos() -> dict[Any, Any]:
def get_temp_str(temp, colored=True) -> str: def get_temp_str(temp, colored=True) -> str:
"""Get colored string based on temp, returns str.""" """Get colored string based on temp, returns str."""
temp_color = None temp_color = ''
# Safety check # Safety check
try: try:

View file

@ -12,7 +12,6 @@ class Test:
name: str name: str
disabled: bool = field(init=False, default=False) disabled: bool = field(init=False, default=False)
failed: bool = field(init=False, default=False) failed: bool = field(init=False, default=False)
hidden: bool = False
passed: bool = field(init=False, default=False) passed: bool = field(init=False, default=False)
report: list[str] = field(init=False, default_factory=list) report: list[str] = field(init=False, default_factory=list)
status: str = field(init=False, default='Pending') status: str = field(init=False, default='Pending')