diff --git a/examples/plugin_log_task_usage/StressTest.wdl b/examples/plugin_log_task_usage/StressTest.wdl new file mode 100644 index 00000000..01e49f56 --- /dev/null +++ b/examples/plugin_log_task_usage/StressTest.wdl @@ -0,0 +1,34 @@ +version 1.1 +# MINIWDL__LOG_TASK_USAGE__PERIOD=2 miniwdl run examples/plugin_log_task_usage/StressTest.wdl --dir /tmp --verbose +# MINIWDL__LOG_TASK_USAGE__PERIOD=2 miniwdl-aws-submit plugin_log_task_usage/StressTest.wdl --verbose --follow + +task StressTest { + input { + Int cpu = 4 + Int memory_G = 2 + Int cpu_memory_duration_s = 10 + Int disk_load_G = 4 + + String docker = "polinux/stress" # Docker image with stress tool + } + + command <<< + set -euxo pipefail + + >&2 ls -l /sys/fs/cgroup + + stress --cpu 4 --vm 1 --vm-bytes ~{memory_G}G --vm-hang 0 --timeout ~{cpu_memory_duration_s}s || true + dd if=/dev/zero of=testfile bs=1G count=~{disk_load_G} + sync + cat testfile > /dev/null & + sleep 5 + >>> + + runtime { + docker: docker + memory: "${memory_G*2}G" + cpu: cpu + } + + output {} +} diff --git a/examples/plugin_log_task_usage/miniwdl_log_task_usage.py b/examples/plugin_log_task_usage/miniwdl_log_task_usage.py index 17eba487..b3babe9f 100644 --- a/examples/plugin_log_task_usage/miniwdl_log_task_usage.py +++ b/examples/plugin_log_task_usage/miniwdl_log_task_usage.py @@ -7,8 +7,8 @@ set configuration [log_task_usage] period (or the environment variable MINIWDL__LOG_TASK_USAGE__PERIOD) to the desired logging period in seconds. -YMMV because host OS version & configuration may affect availability of the counters read from -pseudo-files under /sys/fs/cgroup +YMMV because host OS version & configuration may affect availability of the cgroup counters read +from pseudo-files under /sys/fs/cgroup """ import WDL @@ -21,7 +21,7 @@ def main(cfg, logger, run_id, run_dir, task, **recv): # inject logger into command script if cfg.has_option("log_task_usage", "period"): period = cfg["log_task_usage"].get_int("period") - recv["command"] = _logger_sh + f"log_cpu_mem_in_docker {period} &\n\n" + recv["command"] + recv["command"] = _logger_sh + f"_miniwdl_log_task_usage {period} &\n\n" + recv["command"] recv = yield recv # do nothing with outputs @@ -29,34 +29,61 @@ def main(cfg, logger, run_id, run_dir, task, **recv): _logger_sh = r""" -log_cpu_mem_in_docker() { +_miniwdl_log_task_usage() { set +ex - PERIOD_SECS=${1:-10} # logging period (default 10s) - JIFFIES_PER_SEC=100 # see http://man7.org/linux/man-pages/man7/time.7.html - T_0=$(date +%s) + local PERIOD_SECS=${1:-10} # logging period (default 10s) - cpu_user_jiffies() { - cut -f2 -d ' ' /sys/fs/cgroup/cpuacct/cpuacct.stat | head -n 1 + # detect whether host provides cgroup v2 or v1, and helper functions to read CPU & memory usage + # counters from the appropriate pseudo-files + local cgroup_version="" + if [ -f /sys/fs/cgroup/cpu.stat ]; then + cgroup_version=2 + elif [ -f /sys/fs/cgroup/cpuacct/cpuacct.stat ]; then + cgroup_version=1 + else + >&2 echo "miniwdl_log_task_usage unable to report: cgroup CPU usage counters not found" + exit 1 + fi + + cpu_secs() { + local ans + if [ $cgroup_version -eq 2 ]; then + ans=$(awk '/^usage_usec/ {print $2}' /sys/fs/cgroup/cpu.stat) + echo $(( ans / 1000000 )) + else + ans=$(cut -f2 -d ' ' /sys/fs/cgroup/cpuacct/cpuacct.stat | head -n 1) + echo $(( ans / 100 )) # 100 "jiffies" per second + fi + } + + mem_bytes() { + if [ $cgroup_version -eq 2 ]; then + awk '$1 == "anon" { print $2 }' /sys/fs/cgroup/memory.stat + else + awk -F ' ' '$1 == "total_rss" { print $2 }' /sys/fs/cgroup/memory/memory.stat + fi } - user_jiffies_0=$(cpu_user_jiffies) - user_jffies_last=$user_jiffies_0 - t_last=$T_0 + + local T_0=$(date +%s) + local t_last=$T_0 + local cpu_secs_0=$(cpu_secs) + local cpu_secs_last=$cpu_secs_0 while true; do sleep "$PERIOD_SECS" - t=$(date +%s) - wall_secs=$(( t - T_0 )) + local t=$(date +%s) + local wall_secs=$(( t - T_0 )) - user_jiffies=$(cpu_user_jiffies) - user_pct=$(( 100*(user_jiffies - user_jffies_last)/JIFFIES_PER_SEC/(t - t_last) )) - user_secs=$(( (user_jiffies - user_jiffies_0)/ JIFFIES_PER_SEC )) + local cpu_secs_current=$(cpu_secs) + local cpu_total_secs=$(( cpu_secs_current - cpu_secs_0 )) + local cpu_period_secs=$(( cpu_secs_current - cpu_secs_last )) - user_jffies_last=$user_jiffies - t_last=$t + local mem_bytes_current=$(mem_bytes) - rss_bytes=$(awk -F ' ' '$1 == "total_rss" { print $2 }' /sys/fs/cgroup/memory/memory.stat) + >&2 echo "container usage :: cpu_pct: $(( 100 * cpu_period_secs / PERIOD_SECS )), mem_MiB: $(( mem_bytes_current/1048576 )), cpu_total_s: ${cpu_total_secs}, elapsed_s: ${wall_secs}" - >&2 echo "container usage :: cpu_pct: ${user_pct}, mem_MiB: $(( rss_bytes/1048576 )), cpu_total_s: ${user_secs}, elapsed_s: ${wall_secs}" + cpu_secs_last=$cpu_secs_current + t_last=$t done } """ diff --git a/examples/plugin_log_task_usage/setup.py b/examples/plugin_log_task_usage/setup.py index 1f57767a..9eeb4a7b 100644 --- a/examples/plugin_log_task_usage/setup.py +++ b/examples/plugin_log_task_usage/setup.py @@ -2,12 +2,13 @@ setup( name='miniwdl_log_task_usage', - version='0.0.1', - description='miniwdl task plugin to log container cpu+mem usage', + version='0.1.0', + description='miniwdl task plugin to log container cpu/mem usage', author='Wid L. Hacker', py_modules=["miniwdl_log_task_usage"], python_requires='>=3.6', setup_requires=['reentry'], + install_requires=['miniwdl'], reentry_register=True, entry_points={ 'miniwdl.plugin.task': ['log_task_usage = miniwdl_log_task_usage:main'],