Handle critical temps correctly in mprime sections
* Moved ThermalLimitReachedError catches to wk.hw.sensors * Before they would never be caught and would never stop the script * Added cpu_reached_critical_temp() to wk.hw.sensors * This allows us to check if it happened without exceptions * Added thermal_action to wk.hw.sensors * This is run when ThermalLimitReachedError(s) are caught * Stop print_countdown if mprime is terminated * This is required since it may be killed in the background
This commit is contained in:
parent
c520b5a865
commit
4990537082
2 changed files with 81 additions and 30 deletions
|
|
@ -68,15 +68,15 @@ MENU_TOGGLES = (
|
|||
'Skip USB Benchmarks',
|
||||
)
|
||||
STATUS_COLORS = {
|
||||
'Passed': 'GREEN',
|
||||
'Aborted': 'YELLOW',
|
||||
'Denied': 'RED',
|
||||
'ERROR': 'RED',
|
||||
'FAIL': 'RED',
|
||||
'N/A': 'YELLOW',
|
||||
'PASS': 'GREEN',
|
||||
'TimedOut': 'RED',
|
||||
'Unknown': 'YELLOW',
|
||||
'Working': 'YELLOW',
|
||||
'Denied': 'RED',
|
||||
'ERROR': 'RED',
|
||||
'Failed': 'RED',
|
||||
'TimedOut': 'RED',
|
||||
}
|
||||
WK_LABEL_REGEX = re.compile(
|
||||
fr'{cfg.main.KIT_NAME_SHORT}_(LINUX|UFD)',
|
||||
|
|
@ -417,6 +417,7 @@ def check_mprime_results(test_obj, working_dir):
|
|||
def cpu_mprime_test(state, test_objects):
|
||||
"""CPU & cooling check using Prime95."""
|
||||
LOG.info('CPU Test (Prime95)')
|
||||
aborted = False
|
||||
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
|
||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||
test_mprime_obj, test_cooling_obj = test_objects
|
||||
|
|
@ -432,7 +433,10 @@ def cpu_mprime_test(state, test_objects):
|
|||
|
||||
# Start sensors monitor
|
||||
sensors = hw_sensors.Sensors()
|
||||
sensors.start_background_monitor(sensors_out)
|
||||
sensors.start_background_monitor(
|
||||
sensors_out,
|
||||
thermal_action=('killall', 'mprime'),
|
||||
)
|
||||
|
||||
# Create monitor and worker panes
|
||||
state.update_progress_pane()
|
||||
|
|
@ -450,28 +454,27 @@ def cpu_mprime_test(state, test_objects):
|
|||
|
||||
# Stress CPU
|
||||
std.print_info('Starting stress test')
|
||||
std.print_warning('If running too hot, press CTRL+c to abort the test')
|
||||
set_apple_fan_speed('max')
|
||||
proc_mprime = start_mprime_thread(state.log_dir, prime_log)
|
||||
proc_mprime = start_mprime(state.log_dir, prime_log)
|
||||
|
||||
# Show countdown
|
||||
print('')
|
||||
try:
|
||||
print_countdown(seconds=cfg.hw.CPU_TEST_MINUTES*60)
|
||||
print_countdown(proc=proc_mprime, seconds=cfg.hw.CPU_TEST_MINUTES*60)
|
||||
except KeyboardInterrupt:
|
||||
test_cooling_obj.set_status('Aborted')
|
||||
test_mprime_obj.set_status('Aborted')
|
||||
except hw_sensors.ThermalLimitReachedError:
|
||||
test_mprime_obj.set_status('Aborted')
|
||||
aborted = True
|
||||
|
||||
# Stop Prime95
|
||||
proc_mprime.terminate()
|
||||
try:
|
||||
proc_mprime.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc_mprime.kill()
|
||||
set_apple_fan_speed('auto')
|
||||
stop_mprime(proc_mprime)
|
||||
|
||||
# Update progress if necessary
|
||||
if sensors.cpu_reached_critical_temp() or aborted:
|
||||
test_cooling_obj.set_status('Aborted')
|
||||
test_mprime_obj.set_status('Aborted')
|
||||
state.update_progress_pane()
|
||||
|
||||
# Get cooldown temp
|
||||
std.clear_screen()
|
||||
std.print_standard('Letting CPU cooldown...')
|
||||
std.sleep(5)
|
||||
std.print_standard('Saving cooldown temps...')
|
||||
|
|
@ -705,8 +708,8 @@ def network_test():
|
|||
std.pause('Press Enter to return to main menu...')
|
||||
|
||||
|
||||
def print_countdown(seconds):
|
||||
"""Print countdown to screen."""
|
||||
def print_countdown(proc, seconds):
|
||||
"""Print countdown to screen while proc is alive."""
|
||||
for i in range(seconds):
|
||||
sec_left = (seconds - i) % 60
|
||||
min_left = int((seconds - i) / 60)
|
||||
|
|
@ -718,7 +721,17 @@ def print_countdown(seconds):
|
|||
out_str += ' remaining'
|
||||
|
||||
print(f'{out_str:<42}', end='', flush=True)
|
||||
std.sleep(1)
|
||||
try:
|
||||
proc.wait(1)
|
||||
except KeyboardInterrupt:
|
||||
# Stop countdown
|
||||
break
|
||||
except subprocess.TimeoutExpired:
|
||||
# proc still going, continue
|
||||
pass
|
||||
if proc.poll() is not None:
|
||||
# proc exited, stop countdown
|
||||
break
|
||||
|
||||
# Done
|
||||
print('')
|
||||
|
|
@ -835,8 +848,9 @@ def show_results(state):
|
|||
std.print_standard(' ')
|
||||
|
||||
|
||||
def start_mprime_thread(working_dir, log_path):
|
||||
def start_mprime(working_dir, log_path):
|
||||
"""Start mprime and save filtered output to log, returns Popen object."""
|
||||
set_apple_fan_speed('max')
|
||||
proc_mprime = subprocess.Popen(
|
||||
['mprime', '-t'],
|
||||
cwd=working_dir,
|
||||
|
|
@ -859,5 +873,15 @@ def start_mprime_thread(working_dir, log_path):
|
|||
return proc_mprime
|
||||
|
||||
|
||||
def stop_mprime(proc):
|
||||
"""Stop mprime gracefully, then forcefully as needed."""
|
||||
proc_mprime.terminate()
|
||||
try:
|
||||
proc_mprime.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc_mprime.kill()
|
||||
set_apple_fan_speed('auto')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("This file is not meant to be called directly.")
|
||||
|
|
|
|||
|
|
@ -64,6 +64,22 @@ class Sensors():
|
|||
# Done
|
||||
return max_temp
|
||||
|
||||
def cpu_reached_critical_temp(self):
|
||||
"""Check if CPU reached CPU_CRITICAL_TEMP, returns bool."""
|
||||
for section, adapters in self.data.items():
|
||||
if not section.startswith('CPU'):
|
||||
# Limit to CPU temps
|
||||
continue
|
||||
|
||||
# Ugly section
|
||||
for sources in adapters.values():
|
||||
for source_data in sources.values():
|
||||
if source_data.get('Max', -1) >= CPU_CRITICAL_TEMP:
|
||||
return True
|
||||
|
||||
# Didn't return above so temps are within the threshold
|
||||
return False
|
||||
|
||||
def generate_report(self, *temp_labels, colored=True, only_cpu=False):
|
||||
"""Generate report based on given temp_labels, returns list."""
|
||||
report = []
|
||||
|
|
@ -72,7 +88,7 @@ class Sensors():
|
|||
if only_cpu and not section.startswith('CPU'):
|
||||
continue
|
||||
|
||||
# Ugly section
|
||||
# Ugly section
|
||||
for adapter, sources in sorted(adapters.items()):
|
||||
report.append(fix_sensor_name(adapter))
|
||||
for source, source_data in sorted(sources.items()):
|
||||
|
|
@ -99,15 +115,22 @@ class Sensors():
|
|||
# Done
|
||||
return report
|
||||
|
||||
def monitor_to_file(self, out_path, temp_labels=None):
|
||||
"""Write report to path every second until stopped."""
|
||||
def monitor_to_file(self, out_path, temp_labels=None, thermal_action=None):
|
||||
"""Write report to path every second until stopped.
|
||||
|
||||
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
|
||||
"""
|
||||
stop_path = pathlib.Path(out_path).resolve().with_suffix('.stop')
|
||||
if not temp_labels:
|
||||
temp_labels = ('Current', 'Max')
|
||||
|
||||
# Start loop
|
||||
while True:
|
||||
self.update_sensor_data()
|
||||
try:
|
||||
self.update_sensor_data()
|
||||
except ThermalLimitReachedError:
|
||||
if thermal_action:
|
||||
run_program(thermal_action, check=False)
|
||||
report = self.generate_report(*temp_labels)
|
||||
with open(out_path, 'w') as _f:
|
||||
_f.write('\n'.join(report))
|
||||
|
|
@ -136,15 +159,19 @@ class Sensors():
|
|||
temps = source_data['Temps']
|
||||
source_data[temp_label] = sum(temps) / len(temps)
|
||||
|
||||
def start_background_monitor(self, out_path, temp_labels=None):
|
||||
"""Start background thread to save report to file."""
|
||||
def start_background_monitor(
|
||||
self, out_path, temp_labels=None, thermal_action=None):
|
||||
"""Start background thread to save report to file.
|
||||
|
||||
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
|
||||
"""
|
||||
if self.background_thread:
|
||||
raise RuntimeError('Background thread already running')
|
||||
|
||||
self.out_path = pathlib.Path(out_path)
|
||||
self.background_thread = start_thread(
|
||||
self.monitor_to_file,
|
||||
args=(out_path, temp_labels),
|
||||
args=(out_path, temp_labels, thermal_action),
|
||||
)
|
||||
|
||||
def stop_background_monitor(self):
|
||||
|
|
|
|||
Loading…
Reference in a new issue