Retest temps with sysbench if Prime95 runs too hot
If the CPU reaches the failing temps during Prime95 then sysbench will be run to emulate a less artificial workload. The both the overall and sysbench max temps are recorded and shown in the results. Added new option to track an alternate max temp value in wk.hw.sensors. This was needed so show two different max temps recorded during CPU testing. Sysbench was added to the Linux package list and is compiled for macOS. Without manually compiling the package it brings in way too many dependencies to support SQL DB testing (which we don't need).
This commit is contained in:
parent
bfea4b9910
commit
8e0fdf641b
6 changed files with 123 additions and 22 deletions
|
|
@ -108,7 +108,7 @@ class State():
|
|||
self.tests = OrderedDict({
|
||||
'CPU & Cooling': {
|
||||
'Enabled': False,
|
||||
'Function': cpu_mprime_test,
|
||||
'Function': cpu_stress_tests,
|
||||
'Objects': [],
|
||||
},
|
||||
'Disk Attributes': {
|
||||
|
|
@ -302,7 +302,7 @@ class State():
|
|||
if not details['Selected']:
|
||||
continue
|
||||
if 'CPU' in name:
|
||||
# Create two Test objects which will both be used by cpu_mprime_test
|
||||
# Create two Test objects which will both be used by cpu_stress_tests
|
||||
# NOTE: Prime95 should be added first
|
||||
test_mprime_obj = hw_obj.Test(dev=self.cpu, label='Prime95')
|
||||
test_cooling_obj = hw_obj.Test(dev=self.cpu, label='Cooling')
|
||||
|
|
@ -556,9 +556,12 @@ def calc_io_dd_values(dev_size):
|
|||
}
|
||||
|
||||
|
||||
def check_cooling_results(test_obj, sensors):
|
||||
def check_cooling_results(test_obj, sensors, run_sysbench=False):
|
||||
"""Check cooling results and update test_obj."""
|
||||
max_temp = sensors.cpu_max_temp()
|
||||
temp_labels = ['Idle', 'Max', 'Cooldown']
|
||||
if run_sysbench:
|
||||
temp_labels.append('Sysbench')
|
||||
|
||||
# Check temps
|
||||
if not max_temp:
|
||||
|
|
@ -571,8 +574,7 @@ def check_cooling_results(test_obj, sensors):
|
|||
test_obj.set_status('Passed')
|
||||
|
||||
# Add temps to report
|
||||
for line in sensors.generate_report(
|
||||
'Idle', 'Max', 'Cooldown', only_cpu=True):
|
||||
for line in sensors.generate_report(*temp_labels, only_cpu=True):
|
||||
test_obj.report.append(f' {line}')
|
||||
|
||||
|
||||
|
|
@ -702,12 +704,13 @@ def check_self_test_results(test_obj, aborted=False):
|
|||
test_obj.set_status('Failed')
|
||||
|
||||
|
||||
def cpu_mprime_test(state, test_objects):
|
||||
def cpu_stress_tests(state, test_objects):
|
||||
# pylint: disable=too-many-statements
|
||||
"""CPU & cooling check using Prime95."""
|
||||
"""CPU & cooling check using Prime95 and Sysbench."""
|
||||
LOG.info('CPU Test (Prime95)')
|
||||
aborted = False
|
||||
prime_log = pathlib.Path(f'{state.log_dir}/prime.log')
|
||||
run_sysbench = False
|
||||
sensors_out = pathlib.Path(f'{state.log_dir}/sensors.out')
|
||||
test_mprime_obj, test_cooling_obj = test_objects
|
||||
|
||||
|
|
@ -777,9 +780,41 @@ def cpu_mprime_test(state, test_objects):
|
|||
test_mprime_obj.report.append(std.color_string('Prime95', 'BLUE'))
|
||||
check_mprime_results(test_obj=test_mprime_obj, working_dir=state.log_dir)
|
||||
|
||||
# Run Sysbench test if necessary
|
||||
run_sysbench = (
|
||||
not aborted and sensors.cpu_max_temp() >= cfg.hw.CPU_FAILURE_TEMP
|
||||
)
|
||||
if run_sysbench:
|
||||
LOG.info('CPU Test (Sysbench)')
|
||||
std.clear_screen()
|
||||
std.print_info('Starting alternate stress test')
|
||||
print('')
|
||||
proc_sysbench, filehandle_sysbench = start_sysbench(
|
||||
sensors,
|
||||
sensors_out,
|
||||
log_path=prime_log.with_name('sysbench.log'),
|
||||
pane='Prime95',
|
||||
)
|
||||
try:
|
||||
print_countdown(proc=proc_sysbench, seconds=cfg.hw.CPU_TEST_MINUTES*60)
|
||||
except AttributeError:
|
||||
# Assuming the sysbench process wasn't found and proc was set to None
|
||||
LOG.error('Failed to find sysbench process', exc_info=True)
|
||||
except KeyboardInterrupt:
|
||||
aborted = True
|
||||
stop_sysbench(proc_sysbench, filehandle_sysbench)
|
||||
|
||||
# Update progress
|
||||
# NOTE: CPU critical temp check isn't really necessary
|
||||
# Hard to imagine it wasn't hit during Prime95 but was in sysbench
|
||||
if sensors.cpu_reached_critical_temp() or aborted:
|
||||
test_cooling_obj.set_status('Aborted')
|
||||
test_mprime_obj.set_status('Aborted')
|
||||
state.update_progress_pane()
|
||||
|
||||
# Check Cooling results
|
||||
test_cooling_obj.report.append(std.color_string('Temps', 'BLUE'))
|
||||
check_cooling_results(test_obj=test_cooling_obj, sensors=sensors)
|
||||
check_cooling_results(test_cooling_obj, sensors, run_sysbench)
|
||||
|
||||
# Cleanup
|
||||
state.update_progress_pane()
|
||||
|
|
@ -1300,7 +1335,8 @@ def print_countdown(proc, seconds):
|
|||
except subprocess.TimeoutExpired:
|
||||
# proc still going, continue
|
||||
pass
|
||||
if proc.poll() is not None:
|
||||
if ((hasattr(proc, 'poll') and proc.poll() is not None)
|
||||
or (hasattr(proc, 'is_running') and not proc.is_running())):
|
||||
# proc exited, stop countdown
|
||||
break
|
||||
|
||||
|
|
@ -1460,6 +1496,35 @@ def start_mprime(working_dir, log_path):
|
|||
return proc_mprime
|
||||
|
||||
|
||||
def start_sysbench(sensors, sensors_out, log_path, pane):
|
||||
"""Start sysbench, returns tuple with Popen object and file handle."""
|
||||
set_apple_fan_speed('max')
|
||||
sysbench_cmd = [
|
||||
'sysbench',
|
||||
f'--threads={exe.psutil.cpu_count()}',
|
||||
'--cpu-max-prime=1000000000',
|
||||
'cpu',
|
||||
'run',
|
||||
]
|
||||
|
||||
# Restart background monitor for Sysbench
|
||||
sensors.stop_background_monitor()
|
||||
sensors.start_background_monitor(
|
||||
sensors_out,
|
||||
alt_max='Sysbench',
|
||||
thermal_action=('killall', 'sysbench', '-INT'),
|
||||
)
|
||||
|
||||
# Update bottom pane
|
||||
tmux.respawn_pane(pane, watch_file=log_path)
|
||||
|
||||
# Start sysbench
|
||||
filehandle_sysbench = open(log_path, 'a')
|
||||
proc_sysbench = exe.popen_program(sysbench_cmd, stdout=filehandle_sysbench)
|
||||
|
||||
# Done
|
||||
return (proc_sysbench, filehandle_sysbench)
|
||||
|
||||
def stop_mprime(proc_mprime):
|
||||
"""Stop mprime gracefully, then forcefully as needed."""
|
||||
proc_mprime.terminate()
|
||||
|
|
@ -1470,6 +1535,18 @@ def stop_mprime(proc_mprime):
|
|||
set_apple_fan_speed('auto')
|
||||
|
||||
|
||||
def stop_sysbench(proc_sysbench, filehandle_sysbench):
|
||||
"""Stop sysbench."""
|
||||
proc_sysbench.terminate()
|
||||
try:
|
||||
proc_sysbench.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc_sysbench.kill()
|
||||
filehandle_sysbench.flush()
|
||||
filehandle_sysbench.close()
|
||||
set_apple_fan_speed('auto')
|
||||
|
||||
|
||||
def sync_clock():
|
||||
"""Sync clock under macOS using sntp."""
|
||||
cmd = ['sudo', 'sntp', '-Ss', 'us.pool.ntp.org']
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from subprocess import CalledProcessError
|
|||
|
||||
from wk.cfg.hw import CPU_CRITICAL_TEMP, SMC_IDS, TEMP_COLORS
|
||||
from wk.exe import run_program, start_thread
|
||||
from wk.io import non_clobber_path
|
||||
from wk.std import PLATFORM, color_string, sleep
|
||||
|
||||
|
||||
|
|
@ -115,20 +116,27 @@ class Sensors():
|
|||
return report
|
||||
|
||||
def monitor_to_file(
|
||||
self, out_path,
|
||||
self, out_path, alt_max=None,
|
||||
exit_on_thermal_limit=True, temp_labels=None, thermal_action=None):
|
||||
# pylint: disable=too-many-arguments
|
||||
"""Write report to path every second until stopped.
|
||||
|
||||
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
|
||||
"""
|
||||
stop_path = pathlib.Path(out_path).resolve().with_suffix('.stop')
|
||||
if stop_path.exists():
|
||||
# Rename existing file to allow thread to start as expected
|
||||
# Yes this is excessive but safe
|
||||
stop_path.rename(non_clobber_path(stop_path))
|
||||
if not temp_labels:
|
||||
temp_labels = ('Current', 'Max')
|
||||
temp_labels = ['Current', 'Max']
|
||||
if alt_max:
|
||||
temp_labels.append(alt_max)
|
||||
|
||||
# Start loop
|
||||
while True:
|
||||
try:
|
||||
self.update_sensor_data(exit_on_thermal_limit)
|
||||
self.update_sensor_data(alt_max, exit_on_thermal_limit)
|
||||
except ThermalLimitReachedError:
|
||||
if thermal_action:
|
||||
run_program(thermal_action, check=False)
|
||||
|
|
@ -169,8 +177,9 @@ class Sensors():
|
|||
source_data[temp_label] = 0
|
||||
|
||||
def start_background_monitor(
|
||||
self, out_path,
|
||||
self, out_path, alt_max=None,
|
||||
exit_on_thermal_limit=True, temp_labels=None, thermal_action=None):
|
||||
# pylint: disable=too-many-arguments
|
||||
"""Start background thread to save report to file.
|
||||
|
||||
thermal_action is a cmd to run if ThermalLimitReachedError is caught.
|
||||
|
|
@ -181,7 +190,9 @@ class Sensors():
|
|||
self.out_path = pathlib.Path(out_path)
|
||||
self.background_thread = start_thread(
|
||||
self.monitor_to_file,
|
||||
args=(out_path, exit_on_thermal_limit, temp_labels, thermal_action),
|
||||
args=(
|
||||
out_path, alt_max, exit_on_thermal_limit, temp_labels, thermal_action,
|
||||
),
|
||||
)
|
||||
|
||||
def stop_background_monitor(self):
|
||||
|
|
@ -193,14 +204,14 @@ class Sensors():
|
|||
self.background_thread = None
|
||||
self.out_path = None
|
||||
|
||||
def update_sensor_data(self, exit_on_thermal_limit=True):
|
||||
def update_sensor_data(self, alt_max=None, exit_on_thermal_limit=True):
|
||||
"""Update sensor data via OS-specific means."""
|
||||
if PLATFORM == 'Darwin':
|
||||
self.update_sensor_data_macos(exit_on_thermal_limit)
|
||||
self.update_sensor_data_macos(alt_max, exit_on_thermal_limit)
|
||||
elif PLATFORM == 'Linux':
|
||||
self.update_sensor_data_linux(exit_on_thermal_limit)
|
||||
self.update_sensor_data_linux(alt_max, exit_on_thermal_limit)
|
||||
|
||||
def update_sensor_data_linux(self, exit_on_thermal_limit=True):
|
||||
def update_sensor_data_linux(self, alt_max, exit_on_thermal_limit=True):
|
||||
"""Update sensor data via lm_sensors."""
|
||||
lm_sensor_data = get_sensor_data_lm()
|
||||
for section, adapters in self.data.items():
|
||||
|
|
@ -212,6 +223,8 @@ class Sensors():
|
|||
source_data['Current'] = temp
|
||||
source_data['Max'] = max(temp, source_data['Max'])
|
||||
source_data['Temps'].append(temp)
|
||||
if alt_max:
|
||||
source_data[alt_max] = max(temp, source_data.get(alt_max, 0))
|
||||
except KeyError:
|
||||
# Dumb workaround for Dell sensors with changing source names
|
||||
pass
|
||||
|
|
@ -221,7 +234,7 @@ class Sensors():
|
|||
if source_data['Current'] >= CPU_CRITICAL_TEMP:
|
||||
raise ThermalLimitReachedError('CPU temps reached limit')
|
||||
|
||||
def update_sensor_data_macos(self, exit_on_thermal_limit=True):
|
||||
def update_sensor_data_macos(self, alt_max, exit_on_thermal_limit=True):
|
||||
"""Update sensor data via SMC."""
|
||||
for section, adapters in self.data.items():
|
||||
for sources in adapters.values():
|
||||
|
|
@ -239,6 +252,8 @@ class Sensors():
|
|||
source_data['Current'] = temp
|
||||
source_data['Max'] = max(temp, source_data['Max'])
|
||||
source_data['Temps'].append(temp)
|
||||
if alt_max:
|
||||
source_data[alt_max] = max(temp, source_data.get(alt_max, 0))
|
||||
|
||||
# Raise exception if thermal limit reached
|
||||
if exit_on_thermal_limit and section == 'CPUTemps':
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ rxvt-unicode-terminfo
|
|||
smartmontools-svn
|
||||
speedtest-cli
|
||||
sudo
|
||||
sysbench
|
||||
sysfsutils
|
||||
syslinux
|
||||
systemd-sysvcompat
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
|
||||
set -o errexit
|
||||
|
|
@ -103,3 +103,11 @@ git clone https://github.com/yuyichao/gnuplot-py gnuplot-py
|
|||
cd gnuplot-py
|
||||
git checkout 2c2218dc67
|
||||
python3 setup.py install
|
||||
|
||||
# Sysbench
|
||||
git clone https://github.com/akopytov/sysbench sysbench
|
||||
cd sysbench
|
||||
./autogen.sh LDFLAGS=-L/usr/local/opt/openssl/lib --without-mysql
|
||||
./configure LDFLAGS=-L/usr/local/opt/openssl/lib --without-mysql
|
||||
make MACOSX_DEPLOYMENT_TARGET="${OS_VERSION:0:5}" -j
|
||||
sudo mv -nv sysbench/src/sysbench /usr/local/bin/
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
## Init macOS env
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
## Update BaseImage for use as WK
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue