Handle critical temps correctly in mprime sections
* Moved ThermalLimitReachedError catches to wk.hw.sensors * Before they would never be caught and would never stop the script * Added cpu_reached_critical_temp() to wk.hw.sensors * This allows us to check if it happened without exceptions * Added thermal_action to wk.hw.sensors * This is run when ThermalLimitReachedError(s) are caught * Stop print_countdown if mprime is terminated * This is required since it may be killed in the background
This commit is contained in:
parent
c520b5a865
commit
4990537082
2 changed files with 81 additions and 30 deletions
|
|
@ -68,15 +68,15 @@ MENU_TOGGLES = (
|
||||||
'Skip USB Benchmarks',
|
'Skip USB Benchmarks',
|
||||||
)
|
)
|
||||||
STATUS_COLORS = {
|
STATUS_COLORS = {
|
||||||
|
'Passed': 'GREEN',
|
||||||
'Aborted': 'YELLOW',
|
'Aborted': 'YELLOW',
|
||||||
'Denied': 'RED',
|
|
||||||
'ERROR': 'RED',
|
|
||||||
'FAIL': 'RED',
|
|
||||||
'N/A': 'YELLOW',
|
'N/A': 'YELLOW',
|
||||||
'PASS': 'GREEN',
|
|
||||||
'TimedOut': 'RED',
|
|
||||||
'Unknown': 'YELLOW',
|
'Unknown': 'YELLOW',
|
||||||
'Working': 'YELLOW',
|
'Working': 'YELLOW',
|
||||||
|
'Denied': 'RED',
|
||||||
|
'ERROR': 'RED',
|
||||||
|
'Failed': 'RED',
|
||||||
|
'TimedOut': 'RED',
|
||||||
}
|
}
|
||||||
WK_LABEL_REGEX = re.compile(
|
WK_LABEL_REGEX = re.compile(
|
||||||
fr'{cfg.main.KIT_NAME_SHORT}_(LINUX|UFD)',
|
fr'{cfg.main.KIT_NAME_SHORT}_(LINUX|UFD)',
|
||||||
|
|
@ -417,6 +417,7 @@ def check_mprime_results(test_obj, working_dir):
|
||||||
def cpu_mprime_test(state, test_objects):
|
def cpu_mprime_test(state, test_objects):
|
||||||
"""CPU & cooling check using Prime95."""
|
"""CPU & cooling check using Prime95."""
|
||||||
LOG.info('CPU Test (Prime95)')
|
LOG.info('CPU Test (Prime95)')
|
||||||
|
aborted = False
|
||||||
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
|
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
|
||||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||||
test_mprime_obj, test_cooling_obj = test_objects
|
test_mprime_obj, test_cooling_obj = test_objects
|
||||||
|
|
@ -432,7 +433,10 @@ def cpu_mprime_test(state, test_objects):
|
||||||
|
|
||||||
# Start sensors monitor
|
# Start sensors monitor
|
||||||
sensors = hw_sensors.Sensors()
|
sensors = hw_sensors.Sensors()
|
||||||
sensors.start_background_monitor(sensors_out)
|
sensors.start_background_monitor(
|
||||||
|
sensors_out,
|
||||||
|
thermal_action=('killall', 'mprime'),
|
||||||
|
)
|
||||||
|
|
||||||
# Create monitor and worker panes
|
# Create monitor and worker panes
|
||||||
state.update_progress_pane()
|
state.update_progress_pane()
|
||||||
|
|
@ -450,28 +454,27 @@ def cpu_mprime_test(state, test_objects):
|
||||||
|
|
||||||
# Stress CPU
|
# Stress CPU
|
||||||
std.print_info('Starting stress test')
|
std.print_info('Starting stress test')
|
||||||
std.print_warning('If running too hot, press CTRL+c to abort the test')
|
|
||||||
set_apple_fan_speed('max')
|
set_apple_fan_speed('max')
|
||||||
proc_mprime = start_mprime_thread(state.log_dir, prime_log)
|
proc_mprime = start_mprime(state.log_dir, prime_log)
|
||||||
|
|
||||||
# Show countdown
|
# Show countdown
|
||||||
|
print('')
|
||||||
try:
|
try:
|
||||||
print_countdown(seconds=cfg.hw.CPU_TEST_MINUTES*60)
|
print_countdown(proc=proc_mprime, seconds=cfg.hw.CPU_TEST_MINUTES*60)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
test_cooling_obj.set_status('Aborted')
|
aborted = True
|
||||||
test_mprime_obj.set_status('Aborted')
|
|
||||||
except hw_sensors.ThermalLimitReachedError:
|
|
||||||
test_mprime_obj.set_status('Aborted')
|
|
||||||
|
|
||||||
# Stop Prime95
|
# Stop Prime95
|
||||||
proc_mprime.terminate()
|
stop_mprime(proc_mprime)
|
||||||
try:
|
|
||||||
proc_mprime.wait(timeout=5)
|
# Update progress if necessary
|
||||||
except subprocess.TimeoutExpired:
|
if sensors.cpu_reached_critical_temp() or aborted:
|
||||||
proc_mprime.kill()
|
test_cooling_obj.set_status('Aborted')
|
||||||
set_apple_fan_speed('auto')
|
test_mprime_obj.set_status('Aborted')
|
||||||
|
state.update_progress_pane()
|
||||||
|
|
||||||
# Get cooldown temp
|
# Get cooldown temp
|
||||||
|
std.clear_screen()
|
||||||
std.print_standard('Letting CPU cooldown...')
|
std.print_standard('Letting CPU cooldown...')
|
||||||
std.sleep(5)
|
std.sleep(5)
|
||||||
std.print_standard('Saving cooldown temps...')
|
std.print_standard('Saving cooldown temps...')
|
||||||
|
|
@ -705,8 +708,8 @@ def network_test():
|
||||||
std.pause('Press Enter to return to main menu...')
|
std.pause('Press Enter to return to main menu...')
|
||||||
|
|
||||||
|
|
||||||
def print_countdown(seconds):
|
def print_countdown(proc, seconds):
|
||||||
"""Print countdown to screen."""
|
"""Print countdown to screen while proc is alive."""
|
||||||
for i in range(seconds):
|
for i in range(seconds):
|
||||||
sec_left = (seconds - i) % 60
|
sec_left = (seconds - i) % 60
|
||||||
min_left = int((seconds - i) / 60)
|
min_left = int((seconds - i) / 60)
|
||||||
|
|
@ -718,7 +721,17 @@ def print_countdown(seconds):
|
||||||
out_str += ' remaining'
|
out_str += ' remaining'
|
||||||
|
|
||||||
print(f'{out_str:<42}', end='', flush=True)
|
print(f'{out_str:<42}', end='', flush=True)
|
||||||
std.sleep(1)
|
try:
|
||||||
|
proc.wait(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
# Stop countdown
|
||||||
|
break
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
# proc still going, continue
|
||||||
|
pass
|
||||||
|
if proc.poll() is not None:
|
||||||
|
# proc exited, stop countdown
|
||||||
|
break
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
print('')
|
print('')
|
||||||
|
|
@ -835,8 +848,9 @@ def show_results(state):
|
||||||
std.print_standard(' ')
|
std.print_standard(' ')
|
||||||
|
|
||||||
|
|
||||||
def start_mprime_thread(working_dir, log_path):
|
def start_mprime(working_dir, log_path):
|
||||||
"""Start mprime and save filtered output to log, returns Popen object."""
|
"""Start mprime and save filtered output to log, returns Popen object."""
|
||||||
|
set_apple_fan_speed('max')
|
||||||
proc_mprime = subprocess.Popen(
|
proc_mprime = subprocess.Popen(
|
||||||
['mprime', '-t'],
|
['mprime', '-t'],
|
||||||
cwd=working_dir,
|
cwd=working_dir,
|
||||||
|
|
@ -859,5 +873,15 @@ def start_mprime_thread(working_dir, log_path):
|
||||||
return proc_mprime
|
return proc_mprime
|
||||||
|
|
||||||
|
|
||||||
|
def stop_mprime(proc):
|
||||||
|
"""Stop mprime gracefully, then forcefully as needed."""
|
||||||
|
proc_mprime.terminate()
|
||||||
|
try:
|
||||||
|
proc_mprime.wait(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
proc_mprime.kill()
|
||||||
|
set_apple_fan_speed('auto')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print("This file is not meant to be called directly.")
|
print("This file is not meant to be called directly.")
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,22 @@ class Sensors():
|
||||||
# Done
|
# Done
|
||||||
return max_temp
|
return max_temp
|
||||||
|
|
||||||
|
def cpu_reached_critical_temp(self):
|
||||||
|
"""Check if CPU reached CPU_CRITICAL_TEMP, returns bool."""
|
||||||
|
for section, adapters in self.data.items():
|
||||||
|
if not section.startswith('CPU'):
|
||||||
|
# Limit to CPU temps
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Ugly section
|
||||||
|
for sources in adapters.values():
|
||||||
|
for source_data in sources.values():
|
||||||
|
if source_data.get('Max', -1) >= CPU_CRITICAL_TEMP:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Didn't return above so temps are within the threshold
|
||||||
|
return False
|
||||||
|
|
||||||
def generate_report(self, *temp_labels, colored=True, only_cpu=False):
|
def generate_report(self, *temp_labels, colored=True, only_cpu=False):
|
||||||
"""Generate report based on given temp_labels, returns list."""
|
"""Generate report based on given temp_labels, returns list."""
|
||||||
report = []
|
report = []
|
||||||
|
|
@ -72,7 +88,7 @@ class Sensors():
|
||||||
if only_cpu and not section.startswith('CPU'):
|
if only_cpu and not section.startswith('CPU'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Ugly section
|
# Ugly section
|
||||||
for adapter, sources in sorted(adapters.items()):
|
for adapter, sources in sorted(adapters.items()):
|
||||||
report.append(fix_sensor_name(adapter))
|
report.append(fix_sensor_name(adapter))
|
||||||
for source, source_data in sorted(sources.items()):
|
for source, source_data in sorted(sources.items()):
|
||||||
|
|
@ -99,15 +115,22 @@ class Sensors():
|
||||||
# Done
|
# Done
|
||||||
return report
|
return report
|
||||||
|
|
||||||
def monitor_to_file(self, out_path, temp_labels=None):
|
def monitor_to_file(self, out_path, temp_labels=None, thermal_action=None):
|
||||||
"""Write report to path every second until stopped."""
|
"""Write report to path every second until stopped.
|
||||||
|
|
||||||
|
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
|
||||||
|
"""
|
||||||
stop_path = pathlib.Path(out_path).resolve().with_suffix('.stop')
|
stop_path = pathlib.Path(out_path).resolve().with_suffix('.stop')
|
||||||
if not temp_labels:
|
if not temp_labels:
|
||||||
temp_labels = ('Current', 'Max')
|
temp_labels = ('Current', 'Max')
|
||||||
|
|
||||||
# Start loop
|
# Start loop
|
||||||
while True:
|
while True:
|
||||||
self.update_sensor_data()
|
try:
|
||||||
|
self.update_sensor_data()
|
||||||
|
except ThermalLimitReachedError:
|
||||||
|
if thermal_action:
|
||||||
|
run_program(thermal_action, check=False)
|
||||||
report = self.generate_report(*temp_labels)
|
report = self.generate_report(*temp_labels)
|
||||||
with open(out_path, 'w') as _f:
|
with open(out_path, 'w') as _f:
|
||||||
_f.write('\n'.join(report))
|
_f.write('\n'.join(report))
|
||||||
|
|
@ -136,15 +159,19 @@ class Sensors():
|
||||||
temps = source_data['Temps']
|
temps = source_data['Temps']
|
||||||
source_data[temp_label] = sum(temps) / len(temps)
|
source_data[temp_label] = sum(temps) / len(temps)
|
||||||
|
|
||||||
def start_background_monitor(self, out_path, temp_labels=None):
|
def start_background_monitor(
|
||||||
"""Start background thread to save report to file."""
|
self, out_path, temp_labels=None, thermal_action=None):
|
||||||
|
"""Start background thread to save report to file.
|
||||||
|
|
||||||
|
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
|
||||||
|
"""
|
||||||
if self.background_thread:
|
if self.background_thread:
|
||||||
raise RuntimeError('Background thread already running')
|
raise RuntimeError('Background thread already running')
|
||||||
|
|
||||||
self.out_path = pathlib.Path(out_path)
|
self.out_path = pathlib.Path(out_path)
|
||||||
self.background_thread = start_thread(
|
self.background_thread = start_thread(
|
||||||
self.monitor_to_file,
|
self.monitor_to_file,
|
||||||
args=(out_path, temp_labels),
|
args=(out_path, temp_labels, thermal_action),
|
||||||
)
|
)
|
||||||
|
|
||||||
def stop_background_monitor(self):
|
def stop_background_monitor(self):
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue