Handle critical temps correctly in mprime sections

* Moved ThermalLimitReachedError catches to wk.hw.sensors
  * Before they would never be caught and would never stop the script
* Added cpu_reached_critical_temp() to wk.hw.sensors
  * This allows us to check if it happened without exceptions
* Added thermal_action to wk.hw.sensors
  * This is run when ThermalLimitReachedError(s) are caught
* Stop print_countdown if mprime is terminated
  * This is required since it may be killed in the background
This commit is contained in:
2Shirt 2019-12-02 20:11:02 -07:00
parent c520b5a865
commit 4990537082
Signed by: 2Shirt
GPG key ID: 152FAC923B0E132C
2 changed files with 81 additions and 30 deletions

View file

@ -68,15 +68,15 @@ MENU_TOGGLES = (
'Skip USB Benchmarks',
)
STATUS_COLORS = {
'Passed': 'GREEN',
'Aborted': 'YELLOW',
'Denied': 'RED',
'ERROR': 'RED',
'FAIL': 'RED',
'N/A': 'YELLOW',
'PASS': 'GREEN',
'TimedOut': 'RED',
'Unknown': 'YELLOW',
'Working': 'YELLOW',
'Denied': 'RED',
'ERROR': 'RED',
'Failed': 'RED',
'TimedOut': 'RED',
}
WK_LABEL_REGEX = re.compile(
fr'{cfg.main.KIT_NAME_SHORT}_(LINUX|UFD)',
@ -417,6 +417,7 @@ def check_mprime_results(test_obj, working_dir):
def cpu_mprime_test(state, test_objects):
"""CPU & cooling check using Prime95."""
LOG.info('CPU Test (Prime95)')
aborted = False
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
test_mprime_obj, test_cooling_obj = test_objects
@ -432,7 +433,10 @@ def cpu_mprime_test(state, test_objects):
# Start sensors monitor
sensors = hw_sensors.Sensors()
sensors.start_background_monitor(sensors_out)
sensors.start_background_monitor(
sensors_out,
thermal_action=('killall', 'mprime'),
)
# Create monitor and worker panes
state.update_progress_pane()
@ -450,28 +454,27 @@ def cpu_mprime_test(state, test_objects):
# Stress CPU
std.print_info('Starting stress test')
std.print_warning('If running too hot, press CTRL+c to abort the test')
set_apple_fan_speed('max')
proc_mprime = start_mprime_thread(state.log_dir, prime_log)
proc_mprime = start_mprime(state.log_dir, prime_log)
# Show countdown
print('')
try:
print_countdown(seconds=cfg.hw.CPU_TEST_MINUTES*60)
print_countdown(proc=proc_mprime, seconds=cfg.hw.CPU_TEST_MINUTES*60)
except KeyboardInterrupt:
test_cooling_obj.set_status('Aborted')
test_mprime_obj.set_status('Aborted')
except hw_sensors.ThermalLimitReachedError:
test_mprime_obj.set_status('Aborted')
aborted = True
# Stop Prime95
proc_mprime.terminate()
try:
proc_mprime.wait(timeout=5)
except subprocess.TimeoutExpired:
proc_mprime.kill()
set_apple_fan_speed('auto')
stop_mprime(proc_mprime)
# Update progress if necessary
if sensors.cpu_reached_critical_temp() or aborted:
test_cooling_obj.set_status('Aborted')
test_mprime_obj.set_status('Aborted')
state.update_progress_pane()
# Get cooldown temp
std.clear_screen()
std.print_standard('Letting CPU cooldown...')
std.sleep(5)
std.print_standard('Saving cooldown temps...')
@ -705,8 +708,8 @@ def network_test():
std.pause('Press Enter to return to main menu...')
def print_countdown(seconds):
"""Print countdown to screen."""
def print_countdown(proc, seconds):
"""Print countdown to screen while proc is alive."""
for i in range(seconds):
sec_left = (seconds - i) % 60
min_left = int((seconds - i) / 60)
@ -718,7 +721,17 @@ def print_countdown(seconds):
out_str += ' remaining'
print(f'{out_str:<42}', end='', flush=True)
std.sleep(1)
try:
proc.wait(1)
except KeyboardInterrupt:
# Stop countdown
break
except subprocess.TimeoutExpired:
# proc still going, continue
pass
if proc.poll() is not None:
# proc exited, stop countdown
break
# Done
print('')
@ -835,8 +848,9 @@ def show_results(state):
std.print_standard(' ')
def start_mprime_thread(working_dir, log_path):
def start_mprime(working_dir, log_path):
"""Start mprime and save filtered output to log, returns Popen object."""
set_apple_fan_speed('max')
proc_mprime = subprocess.Popen(
['mprime', '-t'],
cwd=working_dir,
@ -859,5 +873,15 @@ def start_mprime_thread(working_dir, log_path):
return proc_mprime
def stop_mprime(proc):
"""Stop mprime gracefully, then forcefully as needed."""
proc_mprime.terminate()
try:
proc_mprime.wait(timeout=5)
except subprocess.TimeoutExpired:
proc_mprime.kill()
set_apple_fan_speed('auto')
if __name__ == '__main__':
print("This file is not meant to be called directly.")

View file

@ -64,6 +64,22 @@ class Sensors():
# Done
return max_temp
def cpu_reached_critical_temp(self):
"""Check if CPU reached CPU_CRITICAL_TEMP, returns bool."""
for section, adapters in self.data.items():
if not section.startswith('CPU'):
# Limit to CPU temps
continue
# Ugly section
for sources in adapters.values():
for source_data in sources.values():
if source_data.get('Max', -1) >= CPU_CRITICAL_TEMP:
return True
# Didn't return above so temps are within the threshold
return False
def generate_report(self, *temp_labels, colored=True, only_cpu=False):
"""Generate report based on given temp_labels, returns list."""
report = []
@ -72,7 +88,7 @@ class Sensors():
if only_cpu and not section.startswith('CPU'):
continue
# Ugly section
# Ugly section
for adapter, sources in sorted(adapters.items()):
report.append(fix_sensor_name(adapter))
for source, source_data in sorted(sources.items()):
@ -99,15 +115,22 @@ class Sensors():
# Done
return report
def monitor_to_file(self, out_path, temp_labels=None):
"""Write report to path every second until stopped."""
def monitor_to_file(self, out_path, temp_labels=None, thermal_action=None):
"""Write report to path every second until stopped.
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
"""
stop_path = pathlib.Path(out_path).resolve().with_suffix('.stop')
if not temp_labels:
temp_labels = ('Current', 'Max')
# Start loop
while True:
self.update_sensor_data()
try:
self.update_sensor_data()
except ThermalLimitReachedError:
if thermal_action:
run_program(thermal_action, check=False)
report = self.generate_report(*temp_labels)
with open(out_path, 'w') as _f:
_f.write('\n'.join(report))
@ -136,15 +159,19 @@ class Sensors():
temps = source_data['Temps']
source_data[temp_label] = sum(temps) / len(temps)
def start_background_monitor(self, out_path, temp_labels=None):
"""Start background thread to save report to file."""
def start_background_monitor(
self, out_path, temp_labels=None, thermal_action=None):
"""Start background thread to save report to file.
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
"""
if self.background_thread:
raise RuntimeError('Background thread already running')
self.out_path = pathlib.Path(out_path)
self.background_thread = start_thread(
self.monitor_to_file,
args=(out_path, temp_labels),
args=(out_path, temp_labels, thermal_action),
)
def stop_background_monitor(self):