1#! /bin/sh 2# SPDX-License-Identifier: GPL-2.0-or-later 3# Copyright (c) 2012 FUJITSU LIMITED 4# Copyright (c) 2014-2022 Linux Test Project 5# Copyright (c) 2021 Joerg Vehlow <joerg.vehlow@aox-tech.de> 6# 7# Author: Peng Haitao <penght@cn.fujitsu.com> 8 9TST_NEEDS_CHECKPOINTS=1 10TST_NEEDS_ROOT=1 11TST_NEEDS_TMPDIR=1 12TST_NEEDS_CMDS="killall find kill" 13TST_SETUP="${TST_SETUP:-memcg_setup}" 14TST_CLEANUP="${TST_CLEANUP:-memcg_cleanup}" 15TST_TESTFUNC=memcg_testfunc 16 17memcg_require_memsw() 18{ 19 if ! [ -e "$mount_point/memory.limit_in_bytes" ]; then 20 tst_brk TBROK "$mount_point must be mounted before calling memcg_require_memsw" 21 fi 22 if ! [ -e "$mount_point/memory.memsw.limit_in_bytes" ]; then 23 tst_brk TCONF "mem+swap is not enabled" 24 fi 25} 26 27memcg_require_hierarchy_disabled() 28{ 29 if [ ! -e "$mount_point/memory.use_hierarchy" ]; then 30 tst_brk TBROK "$mount_point must be mounted before calling memcg_require_hierarchy_disabled" 31 fi 32 if [ "$(cat "$mount_point/memory.use_hierarchy")" -eq 1 ]; then 33 tst_brk TCONF "Test requires root cgroup memory.use_hierarchy=0" 34 fi 35} 36 37# Kernel memory allocated for the process is also charged. It might depend on 38# the number of CPUs and number of nodes. For example on kernel v5.11 39# additionally total_cpus (plus 1 or 2) pages are charged to the group via 40# kernel memory. For a two-node machine, additional 108 pages kernel memory 41# are charged to the group. 42# 43# Adjust the limit to account such per-CPU and per-node kernel memory. 44# $1 - expected cgroup memory limit value to adjust 45memcg_adjust_limit_for_kmem() 46{ 47 [ $# -ne 1 ] && tst_brk TBROK "memcg_adjust_limit_for_kmem expects 1 parameter" 48 49 local limit=$1 50 51 # Total number of CPUs 52 local total_cpus=`tst_ncpus` 53 54 # Total number of nodes 55 if [ ! -d /sys/devices/system/node/node0 ]; then 56 total_nodes=1 57 else 58 total_nodes=`ls /sys/devices/system/node/ | grep -c "node[0-9][0-9]*"` 59 fi 60 61 local node_mem=0 62 if [ $total_nodes -gt 1 ]; then 63 node_mem=$((total_nodes - 1)) 64 node_mem=$((node_mem * PAGESIZE * 128)) 65 fi 66 67 limit=$((limit + 4 * PAGESIZE + total_cpus * PAGESIZE + node_mem)) 68 69 echo $limit 70} 71 72memcg_setup() 73{ 74 cgroup_require "memory" 75 cgroup_version=$(cgroup_get_version "memory") 76 77 # Most of the tests here are testing specific parts of the cgroup v1 memory interface that is 78 # not present for cgroup2, so if it is already mounted on a cgroup v2 hierarchy we should skip 79 # the test. 80 # Some tests still make sense in v2 and should be modified in a future patch 81 if [ "$cgroup_version" = "2" ]; then 82 tst_brk TCONF "memory controller mounted on cgroup v2 hierarchy, skipping test." 83 fi 84 85 mount_point=$(cgroup_get_mountpoint "memory") 86 test_dir=$(cgroup_get_test_path "memory") 87 88 # For kernels older than v5.11 the default value for 89 # memory.use_hierarchy is 0 and some of tests (memcg_stat_test.sh and 90 # memcg_use_hierarchy_test.sh) expect it so while there are 91 # distributions (RHEL7U0Beta for example) that sets it to 1. 92 # Note: If there are already subgroups created it is not possible, 93 # to set this back to 0. 94 # This seems to be the default for all systems using systemd. 95 # 96 # Starting with kernel v5.11, the non-hierarchical mode is not 97 # available. See Linux kernel commit bef8620cd8e0 ("mm: memcg: 98 # deprecate the non-hierarchical mode"). 99 orig_memory_use_hierarchy=$(cat "$mount_point/memory.use_hierarchy") 100 if [ -z "$orig_memory_use_hierarchy" ];then 101 tst_res TINFO "cat $mount_point failed" 102 elif [ "$orig_memory_use_hierarchy" = "0" ];then 103 orig_memory_use_hierarchy="" 104 else 105 echo 0 > "$mount_point/memory.use_hierarchy" 2>/dev/null 106 if [ $? -ne 0 ];then 107 tst_res TINFO "set $mount_point/memory.use_hierarchy to 0 failed" 108 fi 109 fi 110 111 [ "$MEMCG_SHMMAX" = "1" ] && shmmax_setup 112} 113 114memcg_cleanup() 115{ 116 kill -9 $MEMCG_PROCESS_PID 2> /dev/null 117 118 cd $TST_TMPDIR 119 # In order to remove all subgroups, we have to remove them recursively 120 if [ -e $test_dir ]; then 121 ROD find $test_dir -depth -type d -delete 122 fi 123 124 if [ -n "$orig_memory_use_hierarchy" ];then 125 echo $orig_memory_use_hierarchy > $mount_point/memory.use_hierarchy 126 if [ $? -ne 0 ];then 127 tst_res TINFO "restore $mount_point/memory.use_hierarchy failed" 128 fi 129 orig_memory_use_hierarchy="" 130 fi 131 132 cgroup_cleanup 133 134 [ "$MEMCG_SHMMAX" = "1" ] && shmmax_cleanup 135} 136 137shmmax_setup() 138{ 139 tst_require_cmds bc 140 141 tst_res TINFO "Setting shmmax" 142 143 orig_shmmax=$(cat /proc/sys/kernel/shmmax) 144 if [ $(echo "$orig_shmmax < $HUGEPAGESIZE" | bc) -eq 1 ]; then 145 ROD echo "$HUGEPAGESIZE" \> /proc/sys/kernel/shmmax 146 fi 147} 148 149shmmax_cleanup() 150{ 151 if [ -n "$orig_shmmax" ]; then 152 echo "$orig_shmmax" > /proc/sys/kernel/shmmax 153 fi 154} 155 156# Check size in memcg 157# $1 - Item name 158# $2 - Expected size lower bound 159# $3 - Expected size upper bound (optional) 160check_mem_stat() 161{ 162 local item_size 163 164 if [ -e $1 ]; then 165 item_size=$(cat $1) 166 else 167 item_size=$(grep -w $1 memory.stat | cut -d " " -f 2) 168 fi 169 170 if [ "$3" ]; then 171 if [ $item_size -ge $2 ] && [ $item_size -le $3 ]; then 172 tst_res TPASS "$1 is ${2}-${3} as expected" 173 else 174 tst_res TFAIL "$1 is $item_size, ${2}-${3} expected" 175 fi 176 elif [ "$2" = "$item_size" ]; then 177 tst_res TPASS "$1 is $2 as expected" 178 else 179 tst_res TFAIL "$1 is $item_size, $2 expected" 180 fi 181} 182 183start_memcg_process() 184{ 185 tst_res TINFO "Running memcg_process $@" 186 memcg_process "$@" & 187 MEMCG_PROCESS_PID=$! 188 ROD tst_checkpoint wait 10000 0 189} 190 191signal_memcg_process() 192{ 193 local size=$1 194 local path=$2 195 local usage_start=$(cat ${path}memory.usage_in_bytes) 196 197 kill -s USR1 $MEMCG_PROCESS_PID 2> /dev/null 198 199 if [ -z "$size" ]; then 200 return 201 fi 202 203 local loops=100 204 205 while kill -0 $MEMCG_PROCESS_PID 2> /dev/null; do 206 local usage=$(cat ${path}memory.usage_in_bytes) 207 local diff_a=$((usage_start - usage)) 208 local diff_b=$((usage - usage_start)) 209 210 if [ "$diff_a" -ge "$size" -o "$diff_b" -ge "$size" ]; then 211 return 212 fi 213 214 tst_sleep 100ms 215 216 loops=$((loops - 1)) 217 if [ $loops -le 0 ]; then 218 tst_brk TBROK "timed out on memory.usage_in_bytes" $usage $usage_start $size 219 fi 220 done 221} 222 223stop_memcg_process() 224{ 225 [ -z "$MEMCG_PROCESS_PID" ] && return 226 kill -s INT $MEMCG_PROCESS_PID 2> /dev/null 227 wait $MEMCG_PROCESS_PID 228 MEMCG_PROCESS_PID= 229} 230 231warmup() 232{ 233 tst_res TINFO "Warming up pid: $MEMCG_PROCESS_PID" 234 signal_memcg_process 235 signal_memcg_process 236 sleep 1 237 238 if ! kill -0 $MEMCG_PROCESS_PID; then 239 wait $MEMCG_PROCESS_PID 240 tst_res TFAIL "Process $MEMCG_PROCESS_PID exited with $? after warm up" 241 return 1 242 else 243 tst_res TINFO "Process is still here after warm up: $MEMCG_PROCESS_PID" 244 fi 245 246 return 0 247} 248 249# Run test cases which checks memory.stat after make 250# some memory allocation 251test_mem_stat() 252{ 253 local memtypes="$1" 254 local size=$2 255 local total_size=$3 256 local stat_name=$4 257 local exp_stat_size_low=$5 258 local exp_stat_size_up=$6 259 local check_after_free=$7 260 local kmem_stat_name="${stat_name##*.}" 261 262 start_memcg_process $memtypes -s $size 263 264 if ! warmup; then 265 return 266 fi 267 268 echo $MEMCG_PROCESS_PID > tasks 269 signal_memcg_process $size 270 271 if [ "$kmem_stat_name" = "max_usage_in_bytes" ] || 272 [ "$kmem_stat_name" = "usage_in_bytes" ]; then 273 local kmem=$(cat "memory.kmem.${kmem_stat_name}") 274 if [ $? -eq 0 ]; then 275 exp_stat_size_low=$((exp_stat_size_low + kmem)) 276 exp_stat_size_up=$((exp_stat_size_up + kmem)) 277 fi 278 fi 279 280 if [ "$exp_stat_size_low" = "$exp_stat_size_up" ]; then 281 check_mem_stat $stat_name $exp_stat_size_low 282 else 283 check_mem_stat $stat_name $exp_stat_size_low $exp_stat_size_up 284 fi 285 286 signal_memcg_process $size 287 if $check_after_free; then 288 check_mem_stat $stat_name 0 289 fi 290 291 stop_memcg_process 292} 293 294# Test process will be killed due to exceed memory limit 295# $1 - the value of memory.limit_in_bytes 296# $2 - the parameters of 'process', such as --shm 297# $3 - the -s parameter of 'process', such as 4096 298# $4 - use mem+swap limitation 299test_proc_kill() 300{ 301 local limit=$1 302 local memtypes="$2" 303 local size=$3 304 local use_memsw=$4 305 local tpk_iter 306 307 echo $limit > memory.limit_in_bytes 308 if [ $use_memsw -eq 1 ]; then 309 memcg_require_memsw 310 echo $limit > memory.memsw.limit_in_bytes 311 fi 312 313 start_memcg_process $memtypes -s $size 314 echo $MEMCG_PROCESS_PID > tasks 315 316 signal_memcg_process $size 317 318 local tpk_pid_exists=1 319 for tpk_iter in $(seq 20); do 320 if [ ! -d "/proc/$MEMCG_PROCESS_PID" ] || 321 grep -q 'Z (zombie)' "/proc/$MEMCG_PROCESS_PID/status"; then 322 tpk_pid_exists=0 323 break 324 fi 325 326 tst_sleep 250ms 327 done 328 329 if [ $tpk_pid_exists -eq 0 ]; then 330 wait $MEMCG_PROCESS_PID 331 ret=$? 332 if [ $ret -eq 1 ]; then 333 tst_res TFAIL "process $MEMCG_PROCESS_PID is killed by error" 334 elif [ $ret -eq 2 ]; then 335 tst_res TPASS "Failed to lock memory" 336 else 337 tst_res TPASS "process $MEMCG_PROCESS_PID is killed" 338 fi 339 else 340 stop_memcg_process 341 tst_res TFAIL "process $MEMCG_PROCESS_PID is not killed" 342 fi 343} 344 345# Test limit_in_bytes will be aligned to PAGESIZE 346# $1 - user input value 347# $2 - use mem+swap limitation 348test_limit_in_bytes() 349{ 350 local limit=$1 351 local use_memsw=$2 352 local elimit 353 354 EXPECT_PASS echo $limit \> memory.limit_in_bytes 355 if [ $use_memsw -eq 1 ]; then 356 memcg_require_memsw 357 echo $limit > memory.memsw.limit_in_bytes 358 elimit=$(cat memory.memsw.limit_in_bytes) 359 else 360 elimit=$(cat memory.limit_in_bytes) 361 fi 362 363 # Kernels prior to 3.19 were rounding up, 364 # but newer kernels are rounding down 365 local limit_up=$(( PAGESIZE * (limit / PAGESIZE) )) 366 local limit_down=$(( PAGESIZE * ((limit + PAGESIZE - 1) / PAGESIZE) )) 367 if [ $limit_up -eq $elimit ] || [ $limit_down -eq $elimit ]; then 368 tst_res TPASS "input=$limit, limit_in_bytes=$elimit" 369 else 370 tst_res TFAIL "input=$limit, limit_in_bytes=$elimit" 371 fi 372} 373 374memcg_testfunc() 375{ 376 ROD mkdir $test_dir/ltp_$$ 377 cd $test_dir/ltp_$$ 378 379 if type ${MEMCG_TESTFUNC}1 > /dev/null 2>&1; then 380 ${MEMCG_TESTFUNC}$1 $1 "$2" 381 else 382 ${MEMCG_TESTFUNC} $1 "$2" 383 fi 384 385 cd $TST_TMPDIR 386 ROD rmdir $test_dir/ltp_$$ 387} 388 389memcg_no_testfunc() 390{ 391 tst_brk TBROK "No testfunc specified, set MEMCG_TESTFUNC" 392} 393 394. cgroup_lib.sh 395 396MEMCG_SHMMAX=${MEMCG_SHMMAX:-0} 397MEMCG_TESTFUNC=${MEMCG_TESTFUNC:-memcg_no_testfunc} 398 399PAGESIZE=$(tst_getconf PAGESIZE) 400if [ $? -ne 0 ]; then 401 tst_brk TBROK "tst_getconf PAGESIZE failed" 402fi 403 404# Post 4.16 kernel updates stat in batch (> 32 pages) every time 405# Post 6.1 kernel updates stat in batch (> 64 pages) every time 406# 1813e51eece0ad6 ("memcg: increase MEMCG_CHARGE_BATCH to 64") 407# has been merged since 5.14.0-191.el9 and 4.18.0-438.el8. 408if tst_kvcmp -lt "6.1 RHEL9:5.14.0-191 RHEL8:4.18.0-438" ; then 409 PAGESIZES=$(($PAGESIZE * 33)) 410else 411 PAGESIZES=$(($PAGESIZE * 65)) 412fi 413 414# On recent Linux kernels (at least v5.4) updating stats happens in batches 415# (PAGESIZES) and also might depend on workload and number of CPUs. The kernel 416# caches the data and does not prioritize stats precision. This is especially 417# visible for max_usage_in_bytes where it usually exceeds 418# actual memory allocation. 419# When checking for usage_in_bytes and max_usage_in_bytes accept also higher values 420# from given range: 421MEM_USAGE_RANGE=$((PAGESIZES)) 422 423HUGEPAGESIZE=$(awk '/Hugepagesize/ {print $2}' /proc/meminfo) 424[ -z $HUGEPAGESIZE ] && HUGEPAGESIZE=0 425HUGEPAGESIZE=$(($HUGEPAGESIZE * 1024)) 426 427orig_memory_use_hierarchy= 428orig_shmmax= 429