• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /bin/sh
2# SPDX-License-Identifier: GPL-2.0-or-later
3# Copyright (c) 2012 FUJITSU LIMITED
4# Copyright (c) 2014-2022 Linux Test Project
5# Copyright (c) 2021 Joerg Vehlow <joerg.vehlow@aox-tech.de>
6#
7# Author: Peng Haitao <penght@cn.fujitsu.com>
8
9TST_NEEDS_CHECKPOINTS=1
10TST_NEEDS_ROOT=1
11TST_NEEDS_TMPDIR=1
12TST_NEEDS_CMDS="killall find kill"
13TST_SETUP="${TST_SETUP:-memcg_setup}"
14TST_CLEANUP="${TST_CLEANUP:-memcg_cleanup}"
15TST_TESTFUNC=memcg_testfunc
16
17memcg_require_memsw()
18{
19	if ! [ -e "$mount_point/memory.limit_in_bytes" ]; then
20		tst_brk TBROK "$mount_point must be mounted before calling memcg_require_memsw"
21	fi
22	if ! [ -e "$mount_point/memory.memsw.limit_in_bytes" ]; then
23		tst_brk TCONF "mem+swap is not enabled"
24	fi
25}
26
27memcg_require_hierarchy_disabled()
28{
29	if [ ! -e "$mount_point/memory.use_hierarchy" ]; then
30		tst_brk TBROK "$mount_point must be mounted before calling memcg_require_hierarchy_disabled"
31	fi
32	if [ "$(cat "$mount_point/memory.use_hierarchy")" -eq 1 ]; then
33		tst_brk TCONF "Test requires root cgroup memory.use_hierarchy=0"
34	fi
35}
36
37# Kernel memory allocated for the process is also charged.  It might depend on
38# the number of CPUs and number of nodes. For example on kernel v5.11
39# additionally total_cpus (plus 1 or 2) pages are charged to the group via
40# kernel memory.  For a two-node machine, additional 108 pages kernel memory
41# are charged to the group.
42#
43# Adjust the limit to account such per-CPU and per-node kernel memory.
44# $1 - expected cgroup memory limit value to adjust
45memcg_adjust_limit_for_kmem()
46{
47	[ $# -ne 1 ] && tst_brk TBROK "memcg_adjust_limit_for_kmem expects 1 parameter"
48
49	local limit=$1
50
51	# Total number of CPUs
52	local total_cpus=`tst_ncpus`
53
54	# Total number of nodes
55	if [ ! -d /sys/devices/system/node/node0 ]; then
56		total_nodes=1
57	else
58		total_nodes=`ls /sys/devices/system/node/ | grep -c "node[0-9][0-9]*"`
59	fi
60
61	local node_mem=0
62	if [ $total_nodes -gt 1 ]; then
63		node_mem=$((total_nodes - 1))
64		node_mem=$((node_mem * PAGESIZE * 128))
65	fi
66
67	limit=$((limit + 4 * PAGESIZE + total_cpus * PAGESIZE + node_mem))
68
69	echo $limit
70}
71
72memcg_setup()
73{
74	cgroup_require "memory"
75	cgroup_version=$(cgroup_get_version "memory")
76
77	# Most of the tests here are testing specific parts of the cgroup v1 memory interface that is
78	# not present for cgroup2, so if it is already mounted on a cgroup v2 hierarchy we should skip
79	# the test.
80	# Some tests still make sense in v2 and should be modified in a future patch
81	if [ "$cgroup_version" = "2" ]; then
82		tst_brk TCONF "memory controller mounted on cgroup v2 hierarchy, skipping test."
83	fi
84
85	mount_point=$(cgroup_get_mountpoint "memory")
86	test_dir=$(cgroup_get_test_path "memory")
87
88	# For kernels older than v5.11 the default value for
89	# memory.use_hierarchy is 0 and some of tests (memcg_stat_test.sh and
90	# memcg_use_hierarchy_test.sh) expect it so while there are
91	# distributions (RHEL7U0Beta for example) that sets it to 1.
92	# Note: If there are already subgroups created it is not possible,
93	# to set this back to 0.
94	# This seems to be the default for all systems using systemd.
95	#
96	# Starting with kernel v5.11, the non-hierarchical mode is not
97	# available. See Linux kernel commit bef8620cd8e0 ("mm: memcg:
98	# deprecate the non-hierarchical mode").
99	orig_memory_use_hierarchy=$(cat "$mount_point/memory.use_hierarchy")
100	if [ -z "$orig_memory_use_hierarchy" ];then
101		tst_res TINFO "cat $mount_point failed"
102	elif [ "$orig_memory_use_hierarchy" = "0" ];then
103		orig_memory_use_hierarchy=""
104	else
105		echo 0 > "$mount_point/memory.use_hierarchy" 2>/dev/null
106		if [ $? -ne 0 ];then
107			tst_res TINFO "set $mount_point/memory.use_hierarchy to 0 failed"
108		fi
109	fi
110
111	[ "$MEMCG_SHMMAX" = "1" ] && shmmax_setup
112}
113
114memcg_cleanup()
115{
116	kill -9 $MEMCG_PROCESS_PID 2> /dev/null
117
118	cd $TST_TMPDIR
119	# In order to remove all subgroups, we have to remove them recursively
120	if [ -e $test_dir ]; then
121		ROD find $test_dir -depth -type d -delete
122	fi
123
124	if [ -n "$orig_memory_use_hierarchy" ];then
125		echo $orig_memory_use_hierarchy > $mount_point/memory.use_hierarchy
126		if [ $? -ne 0 ];then
127			tst_res TINFO "restore $mount_point/memory.use_hierarchy failed"
128		fi
129		orig_memory_use_hierarchy=""
130	fi
131
132	cgroup_cleanup
133
134	[ "$MEMCG_SHMMAX" = "1" ] && shmmax_cleanup
135}
136
137shmmax_setup()
138{
139	tst_require_cmds bc
140
141	tst_res TINFO "Setting shmmax"
142
143	orig_shmmax=$(cat /proc/sys/kernel/shmmax)
144	if [ $(echo "$orig_shmmax < $HUGEPAGESIZE" | bc) -eq 1 ]; then
145		ROD echo "$HUGEPAGESIZE" \> /proc/sys/kernel/shmmax
146	fi
147}
148
149shmmax_cleanup()
150{
151	if [ -n "$orig_shmmax" ]; then
152		echo "$orig_shmmax" > /proc/sys/kernel/shmmax
153	fi
154}
155
156# Check size in memcg
157# $1 - Item name
158# $2 - Expected size lower bound
159# $3 - Expected size upper bound (optional)
160check_mem_stat()
161{
162	local item_size
163
164	if [ -e $1 ]; then
165		item_size=$(cat $1)
166	else
167		item_size=$(grep -w $1 memory.stat | cut -d " " -f 2)
168	fi
169
170	if [ "$3" ]; then
171		if [ $item_size -ge $2 ] && [ $item_size -le $3 ]; then
172			tst_res TPASS "$1 is ${2}-${3} as expected"
173		else
174			tst_res TFAIL "$1 is $item_size, ${2}-${3} expected"
175		fi
176	elif [ "$2" = "$item_size" ]; then
177		tst_res TPASS "$1 is $2 as expected"
178	else
179		tst_res TFAIL "$1 is $item_size, $2 expected"
180	fi
181}
182
183start_memcg_process()
184{
185	tst_res TINFO "Running memcg_process $@"
186	memcg_process "$@" &
187	MEMCG_PROCESS_PID=$!
188	ROD tst_checkpoint wait 10000 0
189}
190
191signal_memcg_process()
192{
193	local size=$1
194	local path=$2
195	local usage_start=$(cat ${path}memory.usage_in_bytes)
196
197	kill -s USR1 $MEMCG_PROCESS_PID 2> /dev/null
198
199	if [ -z "$size" ]; then
200		return
201	fi
202
203	local loops=100
204
205	while kill -0 $MEMCG_PROCESS_PID 2> /dev/null; do
206		local usage=$(cat ${path}memory.usage_in_bytes)
207		local diff_a=$((usage_start - usage))
208		local diff_b=$((usage - usage_start))
209
210		if [ "$diff_a" -ge "$size" -o "$diff_b" -ge "$size" ]; then
211			return
212		fi
213
214		tst_sleep 100ms
215
216		loops=$((loops - 1))
217		if [ $loops -le 0 ]; then
218			tst_brk TBROK "timed out on memory.usage_in_bytes" $usage $usage_start $size
219		fi
220	done
221}
222
223stop_memcg_process()
224{
225	[ -z "$MEMCG_PROCESS_PID" ] && return
226	kill -s INT $MEMCG_PROCESS_PID 2> /dev/null
227	wait $MEMCG_PROCESS_PID
228	MEMCG_PROCESS_PID=
229}
230
231warmup()
232{
233	tst_res TINFO "Warming up pid: $MEMCG_PROCESS_PID"
234	signal_memcg_process
235	signal_memcg_process
236	sleep 1
237
238	if ! kill -0 $MEMCG_PROCESS_PID; then
239		wait $MEMCG_PROCESS_PID
240		tst_res TFAIL "Process $MEMCG_PROCESS_PID exited with $? after warm up"
241		return 1
242	else
243		tst_res TINFO "Process is still here after warm up: $MEMCG_PROCESS_PID"
244	fi
245
246	return 0
247}
248
249# Run test cases which checks memory.stat after make
250# some memory allocation
251test_mem_stat()
252{
253	local memtypes="$1"
254	local size=$2
255	local total_size=$3
256	local stat_name=$4
257	local exp_stat_size_low=$5
258	local exp_stat_size_up=$6
259	local check_after_free=$7
260	local kmem_stat_name="${stat_name##*.}"
261
262	start_memcg_process $memtypes -s $size
263
264	if ! warmup; then
265		return
266	fi
267
268	echo $MEMCG_PROCESS_PID > tasks
269	signal_memcg_process $size
270
271	if [ "$kmem_stat_name" = "max_usage_in_bytes" ] ||
272	   [ "$kmem_stat_name" = "usage_in_bytes" ]; then
273		local kmem=$(cat "memory.kmem.${kmem_stat_name}")
274		if [ $? -eq 0 ]; then
275			exp_stat_size_low=$((exp_stat_size_low + kmem))
276			exp_stat_size_up=$((exp_stat_size_up + kmem))
277		fi
278	fi
279
280	if [ "$exp_stat_size_low" = "$exp_stat_size_up" ]; then
281		check_mem_stat $stat_name $exp_stat_size_low
282	else
283		check_mem_stat $stat_name $exp_stat_size_low $exp_stat_size_up
284	fi
285
286	signal_memcg_process $size
287	if $check_after_free; then
288		check_mem_stat $stat_name 0
289	fi
290
291	stop_memcg_process
292}
293
294# Test process will be killed due to exceed memory limit
295# $1 - the value of memory.limit_in_bytes
296# $2 - the parameters of 'process', such as --shm
297# $3 - the -s parameter of 'process', such as 4096
298# $4 - use mem+swap limitation
299test_proc_kill()
300{
301	local limit=$1
302	local memtypes="$2"
303	local size=$3
304	local use_memsw=$4
305	local tpk_iter
306
307	echo $limit > memory.limit_in_bytes
308	if [ $use_memsw -eq 1 ]; then
309		memcg_require_memsw
310		echo $limit > memory.memsw.limit_in_bytes
311	fi
312
313	start_memcg_process $memtypes -s $size
314	echo $MEMCG_PROCESS_PID > tasks
315
316	signal_memcg_process $size
317
318	local tpk_pid_exists=1
319	for tpk_iter in $(seq 20); do
320		if [ ! -d "/proc/$MEMCG_PROCESS_PID" ] ||
321			grep -q 'Z (zombie)' "/proc/$MEMCG_PROCESS_PID/status"; then
322			tpk_pid_exists=0
323			break
324		fi
325
326		tst_sleep 250ms
327	done
328
329	if [ $tpk_pid_exists -eq 0 ]; then
330		wait $MEMCG_PROCESS_PID
331		ret=$?
332		if [ $ret -eq 1 ]; then
333			tst_res TFAIL "process $MEMCG_PROCESS_PID is killed by error"
334		elif [ $ret -eq 2 ]; then
335			tst_res TPASS "Failed to lock memory"
336		else
337			tst_res TPASS "process $MEMCG_PROCESS_PID is killed"
338		fi
339	else
340		stop_memcg_process
341		tst_res TFAIL "process $MEMCG_PROCESS_PID is not killed"
342	fi
343}
344
345# Test limit_in_bytes will be aligned to PAGESIZE
346# $1 - user input value
347# $2 - use mem+swap limitation
348test_limit_in_bytes()
349{
350	local limit=$1
351	local use_memsw=$2
352	local elimit
353
354	EXPECT_PASS echo $limit \> memory.limit_in_bytes
355	if [ $use_memsw -eq 1 ]; then
356		memcg_require_memsw
357		echo $limit > memory.memsw.limit_in_bytes
358		elimit=$(cat memory.memsw.limit_in_bytes)
359	else
360		elimit=$(cat memory.limit_in_bytes)
361	fi
362
363	# Kernels prior to 3.19 were rounding up,
364	# but newer kernels are rounding down
365	local limit_up=$(( PAGESIZE * (limit / PAGESIZE) ))
366	local limit_down=$(( PAGESIZE * ((limit + PAGESIZE - 1) / PAGESIZE) ))
367	if [ $limit_up -eq $elimit ] || [ $limit_down -eq $elimit ]; then
368		tst_res TPASS "input=$limit, limit_in_bytes=$elimit"
369	else
370		tst_res TFAIL "input=$limit, limit_in_bytes=$elimit"
371	fi
372}
373
374memcg_testfunc()
375{
376	ROD mkdir $test_dir/ltp_$$
377	cd $test_dir/ltp_$$
378
379	if type ${MEMCG_TESTFUNC}1 > /dev/null 2>&1; then
380		${MEMCG_TESTFUNC}$1 $1 "$2"
381	else
382		${MEMCG_TESTFUNC} $1 "$2"
383	fi
384
385	cd $TST_TMPDIR
386	ROD rmdir $test_dir/ltp_$$
387}
388
389memcg_no_testfunc()
390{
391	tst_brk TBROK "No testfunc specified, set MEMCG_TESTFUNC"
392}
393
394. cgroup_lib.sh
395
396MEMCG_SHMMAX=${MEMCG_SHMMAX:-0}
397MEMCG_TESTFUNC=${MEMCG_TESTFUNC:-memcg_no_testfunc}
398
399PAGESIZE=$(tst_getconf PAGESIZE)
400if [ $? -ne 0 ]; then
401	tst_brk TBROK "tst_getconf PAGESIZE failed"
402fi
403
404# Post 4.16 kernel updates stat in batch (> 32 pages) every time
405# Post 6.1 kernel updates stat in batch (> 64 pages) every time
406# 1813e51eece0ad6 ("memcg: increase MEMCG_CHARGE_BATCH to 64")
407# has been merged since 5.14.0-191.el9 and 4.18.0-438.el8.
408if tst_kvcmp -lt "6.1 RHEL9:5.14.0-191 RHEL8:4.18.0-438" ; then
409	PAGESIZES=$(($PAGESIZE * 33))
410else
411	PAGESIZES=$(($PAGESIZE * 65))
412fi
413
414# On recent Linux kernels (at least v5.4) updating stats happens in batches
415# (PAGESIZES) and also might depend on workload and number of CPUs.  The kernel
416# caches the data and does not prioritize stats precision.  This is especially
417# visible for max_usage_in_bytes where it usually exceeds
418# actual memory allocation.
419# When checking for usage_in_bytes and max_usage_in_bytes accept also higher values
420# from given range:
421MEM_USAGE_RANGE=$((PAGESIZES))
422
423HUGEPAGESIZE=$(awk '/Hugepagesize/ {print $2}' /proc/meminfo)
424[ -z $HUGEPAGESIZE ] && HUGEPAGESIZE=0
425HUGEPAGESIZE=$(($HUGEPAGESIZE * 1024))
426
427orig_memory_use_hierarchy=
428orig_shmmax=
429