1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define _GNU_SOURCE
3
4 #include <linux/limits.h>
5 #include <linux/oom.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <sys/socket.h>
14 #include <sys/wait.h>
15 #include <arpa/inet.h>
16 #include <netinet/in.h>
17 #include <netdb.h>
18 #include <errno.h>
19 #include <sys/mman.h>
20
21 #include "../kselftest.h"
22 #include "cgroup_util.h"
23
24 static bool has_localevents;
25 static bool has_recursiveprot;
26
27 /*
28 * This test creates two nested cgroups with and without enabling
29 * the memory controller.
30 */
test_memcg_subtree_control(const char * root)31 static int test_memcg_subtree_control(const char *root)
32 {
33 char *parent, *child, *parent2 = NULL, *child2 = NULL;
34 int ret = KSFT_FAIL;
35 char buf[PAGE_SIZE];
36
37 /* Create two nested cgroups with the memory controller enabled */
38 parent = cg_name(root, "memcg_test_0");
39 child = cg_name(root, "memcg_test_0/memcg_test_1");
40 if (!parent || !child)
41 goto cleanup_free;
42
43 if (cg_create(parent))
44 goto cleanup_free;
45
46 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
47 goto cleanup_parent;
48
49 if (cg_create(child))
50 goto cleanup_parent;
51
52 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
53 goto cleanup_child;
54
55 /* Create two nested cgroups without enabling memory controller */
56 parent2 = cg_name(root, "memcg_test_1");
57 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
58 if (!parent2 || !child2)
59 goto cleanup_free2;
60
61 if (cg_create(parent2))
62 goto cleanup_free2;
63
64 if (cg_create(child2))
65 goto cleanup_parent2;
66
67 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
68 goto cleanup_all;
69
70 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
71 goto cleanup_all;
72
73 ret = KSFT_PASS;
74
75 cleanup_all:
76 cg_destroy(child2);
77 cleanup_parent2:
78 cg_destroy(parent2);
79 cleanup_free2:
80 free(parent2);
81 free(child2);
82 cleanup_child:
83 cg_destroy(child);
84 cleanup_parent:
85 cg_destroy(parent);
86 cleanup_free:
87 free(parent);
88 free(child);
89
90 return ret;
91 }
92
alloc_anon_50M_check(const char * cgroup,void * arg)93 static int alloc_anon_50M_check(const char *cgroup, void *arg)
94 {
95 size_t size = MB(50);
96 char *buf, *ptr;
97 long anon, current;
98 int ret = -1;
99
100 buf = malloc(size);
101 if (buf == NULL) {
102 fprintf(stderr, "malloc() failed\n");
103 return -1;
104 }
105
106 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
107 *ptr = 0;
108
109 current = cg_read_long(cgroup, "memory.current");
110 if (current < size)
111 goto cleanup;
112
113 if (!values_close(size, current, 3))
114 goto cleanup;
115
116 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
117 if (anon < 0)
118 goto cleanup;
119
120 if (!values_close(anon, current, 3))
121 goto cleanup;
122
123 ret = 0;
124 cleanup:
125 free(buf);
126 return ret;
127 }
128
alloc_pagecache_50M_check(const char * cgroup,void * arg)129 static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
130 {
131 size_t size = MB(50);
132 int ret = -1;
133 long current, file;
134 int fd;
135
136 fd = get_temp_fd();
137 if (fd < 0)
138 return -1;
139
140 if (alloc_pagecache(fd, size))
141 goto cleanup;
142
143 current = cg_read_long(cgroup, "memory.current");
144 if (current < size)
145 goto cleanup;
146
147 file = cg_read_key_long(cgroup, "memory.stat", "file ");
148 if (file < 0)
149 goto cleanup;
150
151 if (!values_close(file, current, 10))
152 goto cleanup;
153
154 ret = 0;
155
156 cleanup:
157 close(fd);
158 return ret;
159 }
160
161 /*
162 * This test create a memory cgroup, allocates
163 * some anonymous memory and some pagecache
164 * and check memory.current and some memory.stat values.
165 */
test_memcg_current(const char * root)166 static int test_memcg_current(const char *root)
167 {
168 int ret = KSFT_FAIL;
169 long current;
170 char *memcg;
171
172 memcg = cg_name(root, "memcg_test");
173 if (!memcg)
174 goto cleanup;
175
176 if (cg_create(memcg))
177 goto cleanup;
178
179 current = cg_read_long(memcg, "memory.current");
180 if (current != 0)
181 goto cleanup;
182
183 if (cg_run(memcg, alloc_anon_50M_check, NULL))
184 goto cleanup;
185
186 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
187 goto cleanup;
188
189 ret = KSFT_PASS;
190
191 cleanup:
192 cg_destroy(memcg);
193 free(memcg);
194
195 return ret;
196 }
197
alloc_pagecache_50M_noexit(const char * cgroup,void * arg)198 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
199 {
200 int fd = (long)arg;
201 int ppid = getppid();
202
203 if (alloc_pagecache(fd, MB(50)))
204 return -1;
205
206 while (getppid() == ppid)
207 sleep(1);
208
209 return 0;
210 }
211
alloc_anon_noexit(const char * cgroup,void * arg)212 static int alloc_anon_noexit(const char *cgroup, void *arg)
213 {
214 int ppid = getppid();
215 size_t size = (unsigned long)arg;
216 char *buf, *ptr;
217
218 buf = malloc(size);
219 if (buf == NULL) {
220 fprintf(stderr, "malloc() failed\n");
221 return -1;
222 }
223
224 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
225 *ptr = 0;
226
227 while (getppid() == ppid)
228 sleep(1);
229
230 free(buf);
231 return 0;
232 }
233
234 /*
235 * Wait until processes are killed asynchronously by the OOM killer
236 * If we exceed a timeout, fail.
237 */
cg_test_proc_killed(const char * cgroup)238 static int cg_test_proc_killed(const char *cgroup)
239 {
240 int limit;
241
242 for (limit = 10; limit > 0; limit--) {
243 if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
244 return 0;
245
246 usleep(100000);
247 }
248 return -1;
249 }
250
251 /*
252 * First, this test creates the following hierarchy:
253 * A memory.min = 0, memory.max = 200M
254 * A/B memory.min = 50M
255 * A/B/C memory.min = 75M, memory.current = 50M
256 * A/B/D memory.min = 25M, memory.current = 50M
257 * A/B/E memory.min = 0, memory.current = 50M
258 * A/B/F memory.min = 500M, memory.current = 0
259 *
260 * (or memory.low if we test soft protection)
261 *
262 * Usages are pagecache and the test keeps a running
263 * process in every leaf cgroup.
264 * Then it creates A/G and creates a significant
265 * memory pressure in A.
266 *
267 * Then it checks actual memory usages and expects that:
268 * A/B memory.current ~= 50M
269 * A/B/C memory.current ~= 29M
270 * A/B/D memory.current ~= 21M
271 * A/B/E memory.current ~= 0
272 * A/B/F memory.current = 0
273 * (for origin of the numbers, see model in memcg_protection.m.)
274 *
275 * After that it tries to allocate more than there is
276 * unprotected memory in A available, and checks that:
277 * a) memory.min protects pagecache even in this case,
278 * b) memory.low allows reclaiming page cache with low events.
279 */
test_memcg_protection(const char * root,bool min)280 static int test_memcg_protection(const char *root, bool min)
281 {
282 int ret = KSFT_FAIL, rc;
283 char *parent[3] = {NULL};
284 char *children[4] = {NULL};
285 const char *attribute = min ? "memory.min" : "memory.low";
286 long c[4];
287 long current;
288 int i, attempts;
289 int fd;
290
291 fd = get_temp_fd();
292 if (fd < 0)
293 goto cleanup;
294
295 parent[0] = cg_name(root, "memcg_test_0");
296 if (!parent[0])
297 goto cleanup;
298
299 parent[1] = cg_name(parent[0], "memcg_test_1");
300 if (!parent[1])
301 goto cleanup;
302
303 parent[2] = cg_name(parent[0], "memcg_test_2");
304 if (!parent[2])
305 goto cleanup;
306
307 if (cg_create(parent[0]))
308 goto cleanup;
309
310 if (cg_read_long(parent[0], attribute)) {
311 /* No memory.min on older kernels is fine */
312 if (min)
313 ret = KSFT_SKIP;
314 goto cleanup;
315 }
316
317 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
318 goto cleanup;
319
320 if (cg_write(parent[0], "memory.max", "200M"))
321 goto cleanup;
322
323 if (cg_write(parent[0], "memory.swap.max", "0"))
324 goto cleanup;
325
326 if (cg_create(parent[1]))
327 goto cleanup;
328
329 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
330 goto cleanup;
331
332 if (cg_create(parent[2]))
333 goto cleanup;
334
335 for (i = 0; i < ARRAY_SIZE(children); i++) {
336 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
337 if (!children[i])
338 goto cleanup;
339
340 if (cg_create(children[i]))
341 goto cleanup;
342
343 if (i > 2)
344 continue;
345
346 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
347 (void *)(long)fd);
348 }
349
350 if (cg_write(parent[1], attribute, "50M"))
351 goto cleanup;
352 if (cg_write(children[0], attribute, "75M"))
353 goto cleanup;
354 if (cg_write(children[1], attribute, "25M"))
355 goto cleanup;
356 if (cg_write(children[2], attribute, "0"))
357 goto cleanup;
358 if (cg_write(children[3], attribute, "500M"))
359 goto cleanup;
360
361 attempts = 0;
362 while (!values_close(cg_read_long(parent[1], "memory.current"),
363 MB(150), 3)) {
364 if (attempts++ > 5)
365 break;
366 sleep(1);
367 }
368
369 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
370 goto cleanup;
371
372 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
373 goto cleanup;
374
375 for (i = 0; i < ARRAY_SIZE(children); i++)
376 c[i] = cg_read_long(children[i], "memory.current");
377
378 if (!values_close(c[0], MB(29), 10))
379 goto cleanup;
380
381 if (!values_close(c[1], MB(21), 10))
382 goto cleanup;
383
384 if (c[3] != 0)
385 goto cleanup;
386
387 rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
388 if (min && !rc)
389 goto cleanup;
390 else if (!min && rc) {
391 fprintf(stderr,
392 "memory.low prevents from allocating anon memory\n");
393 goto cleanup;
394 }
395
396 current = min ? MB(50) : MB(30);
397 if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
398 goto cleanup;
399
400 if (min) {
401 ret = KSFT_PASS;
402 goto cleanup;
403 }
404
405 for (i = 0; i < ARRAY_SIZE(children); i++) {
406 int no_low_events_index = 1;
407 long low, oom;
408
409 oom = cg_read_key_long(children[i], "memory.events", "oom ");
410 low = cg_read_key_long(children[i], "memory.events", "low ");
411
412 if (oom)
413 goto cleanup;
414 if (i <= no_low_events_index && low <= 0)
415 goto cleanup;
416 if (i > no_low_events_index && low)
417 goto cleanup;
418
419 }
420
421 ret = KSFT_PASS;
422
423 cleanup:
424 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
425 if (!children[i])
426 continue;
427
428 cg_destroy(children[i]);
429 free(children[i]);
430 }
431
432 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
433 if (!parent[i])
434 continue;
435
436 cg_destroy(parent[i]);
437 free(parent[i]);
438 }
439 close(fd);
440 return ret;
441 }
442
test_memcg_min(const char * root)443 static int test_memcg_min(const char *root)
444 {
445 return test_memcg_protection(root, true);
446 }
447
test_memcg_low(const char * root)448 static int test_memcg_low(const char *root)
449 {
450 return test_memcg_protection(root, false);
451 }
452
alloc_pagecache_max_30M(const char * cgroup,void * arg)453 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
454 {
455 size_t size = MB(50);
456 int ret = -1;
457 long current, high, max;
458 int fd;
459
460 high = cg_read_long(cgroup, "memory.high");
461 max = cg_read_long(cgroup, "memory.max");
462 if (high != MB(30) && max != MB(30))
463 return -1;
464
465 fd = get_temp_fd();
466 if (fd < 0)
467 return -1;
468
469 if (alloc_pagecache(fd, size))
470 goto cleanup;
471
472 current = cg_read_long(cgroup, "memory.current");
473 if (!values_close(current, MB(30), 5))
474 goto cleanup;
475
476 ret = 0;
477
478 cleanup:
479 close(fd);
480 return ret;
481
482 }
483
484 /*
485 * This test checks that memory.high limits the amount of
486 * memory which can be consumed by either anonymous memory
487 * or pagecache.
488 */
test_memcg_high(const char * root)489 static int test_memcg_high(const char *root)
490 {
491 int ret = KSFT_FAIL;
492 char *memcg;
493 long high;
494
495 memcg = cg_name(root, "memcg_test");
496 if (!memcg)
497 goto cleanup;
498
499 if (cg_create(memcg))
500 goto cleanup;
501
502 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
503 goto cleanup;
504
505 if (cg_write(memcg, "memory.swap.max", "0"))
506 goto cleanup;
507
508 if (cg_write(memcg, "memory.high", "30M"))
509 goto cleanup;
510
511 if (cg_run(memcg, alloc_anon, (void *)MB(31)))
512 goto cleanup;
513
514 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
515 goto cleanup;
516
517 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
518 goto cleanup;
519
520 high = cg_read_key_long(memcg, "memory.events", "high ");
521 if (high <= 0)
522 goto cleanup;
523
524 ret = KSFT_PASS;
525
526 cleanup:
527 cg_destroy(memcg);
528 free(memcg);
529
530 return ret;
531 }
532
alloc_anon_mlock(const char * cgroup,void * arg)533 static int alloc_anon_mlock(const char *cgroup, void *arg)
534 {
535 size_t size = (size_t)arg;
536 void *buf;
537
538 buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
539 0, 0);
540 if (buf == MAP_FAILED)
541 return -1;
542
543 mlock(buf, size);
544 munmap(buf, size);
545 return 0;
546 }
547
548 /*
549 * This test checks that memory.high is able to throttle big single shot
550 * allocation i.e. large allocation within one kernel entry.
551 */
test_memcg_high_sync(const char * root)552 static int test_memcg_high_sync(const char *root)
553 {
554 int ret = KSFT_FAIL, pid, fd = -1;
555 char *memcg;
556 long pre_high, pre_max;
557 long post_high, post_max;
558
559 memcg = cg_name(root, "memcg_test");
560 if (!memcg)
561 goto cleanup;
562
563 if (cg_create(memcg))
564 goto cleanup;
565
566 pre_high = cg_read_key_long(memcg, "memory.events", "high ");
567 pre_max = cg_read_key_long(memcg, "memory.events", "max ");
568 if (pre_high < 0 || pre_max < 0)
569 goto cleanup;
570
571 if (cg_write(memcg, "memory.swap.max", "0"))
572 goto cleanup;
573
574 if (cg_write(memcg, "memory.high", "30M"))
575 goto cleanup;
576
577 if (cg_write(memcg, "memory.max", "140M"))
578 goto cleanup;
579
580 fd = memcg_prepare_for_wait(memcg);
581 if (fd < 0)
582 goto cleanup;
583
584 pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
585 if (pid < 0)
586 goto cleanup;
587
588 cg_wait_for(fd);
589
590 post_high = cg_read_key_long(memcg, "memory.events", "high ");
591 post_max = cg_read_key_long(memcg, "memory.events", "max ");
592 if (post_high < 0 || post_max < 0)
593 goto cleanup;
594
595 if (pre_high == post_high || pre_max != post_max)
596 goto cleanup;
597
598 ret = KSFT_PASS;
599
600 cleanup:
601 if (fd >= 0)
602 close(fd);
603 cg_destroy(memcg);
604 free(memcg);
605
606 return ret;
607 }
608
609 /*
610 * This test checks that memory.max limits the amount of
611 * memory which can be consumed by either anonymous memory
612 * or pagecache.
613 */
test_memcg_max(const char * root)614 static int test_memcg_max(const char *root)
615 {
616 int ret = KSFT_FAIL;
617 char *memcg;
618 long current, max;
619
620 memcg = cg_name(root, "memcg_test");
621 if (!memcg)
622 goto cleanup;
623
624 if (cg_create(memcg))
625 goto cleanup;
626
627 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
628 goto cleanup;
629
630 if (cg_write(memcg, "memory.swap.max", "0"))
631 goto cleanup;
632
633 if (cg_write(memcg, "memory.max", "30M"))
634 goto cleanup;
635
636 /* Should be killed by OOM killer */
637 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
638 goto cleanup;
639
640 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
641 goto cleanup;
642
643 current = cg_read_long(memcg, "memory.current");
644 if (current > MB(30) || !current)
645 goto cleanup;
646
647 max = cg_read_key_long(memcg, "memory.events", "max ");
648 if (max <= 0)
649 goto cleanup;
650
651 ret = KSFT_PASS;
652
653 cleanup:
654 cg_destroy(memcg);
655 free(memcg);
656
657 return ret;
658 }
659
660 /*
661 * This test checks that memory.reclaim reclaims the given
662 * amount of memory (from both anon and file, if possible).
663 */
test_memcg_reclaim(const char * root)664 static int test_memcg_reclaim(const char *root)
665 {
666 int ret = KSFT_FAIL, fd, retries;
667 char *memcg;
668 long current, expected_usage, to_reclaim;
669 char buf[64];
670
671 memcg = cg_name(root, "memcg_test");
672 if (!memcg)
673 goto cleanup;
674
675 if (cg_create(memcg))
676 goto cleanup;
677
678 current = cg_read_long(memcg, "memory.current");
679 if (current != 0)
680 goto cleanup;
681
682 fd = get_temp_fd();
683 if (fd < 0)
684 goto cleanup;
685
686 cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
687
688 /*
689 * If swap is enabled, try to reclaim from both anon and file, else try
690 * to reclaim from file only.
691 */
692 if (is_swap_enabled()) {
693 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
694 expected_usage = MB(100);
695 } else
696 expected_usage = MB(50);
697
698 /*
699 * Wait until current usage reaches the expected usage (or we run out of
700 * retries).
701 */
702 retries = 5;
703 while (!values_close(cg_read_long(memcg, "memory.current"),
704 expected_usage, 10)) {
705 if (retries--) {
706 sleep(1);
707 continue;
708 } else {
709 fprintf(stderr,
710 "failed to allocate %ld for memcg reclaim test\n",
711 expected_usage);
712 goto cleanup;
713 }
714 }
715
716 /*
717 * Reclaim until current reaches 30M, this makes sure we hit both anon
718 * and file if swap is enabled.
719 */
720 retries = 5;
721 while (true) {
722 int err;
723
724 current = cg_read_long(memcg, "memory.current");
725 to_reclaim = current - MB(30);
726
727 /*
728 * We only keep looping if we get EAGAIN, which means we could
729 * not reclaim the full amount.
730 */
731 if (to_reclaim <= 0)
732 goto cleanup;
733
734
735 snprintf(buf, sizeof(buf), "%ld", to_reclaim);
736 err = cg_write(memcg, "memory.reclaim", buf);
737 if (!err) {
738 /*
739 * If writing succeeds, then the written amount should have been
740 * fully reclaimed (and maybe more).
741 */
742 current = cg_read_long(memcg, "memory.current");
743 if (!values_close(current, MB(30), 3) && current > MB(30))
744 goto cleanup;
745 break;
746 }
747
748 /* The kernel could not reclaim the full amount, try again. */
749 if (err == -EAGAIN && retries--)
750 continue;
751
752 /* We got an unexpected error or ran out of retries. */
753 goto cleanup;
754 }
755
756 ret = KSFT_PASS;
757 cleanup:
758 cg_destroy(memcg);
759 free(memcg);
760 close(fd);
761
762 return ret;
763 }
764
alloc_anon_50M_check_swap(const char * cgroup,void * arg)765 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
766 {
767 long mem_max = (long)arg;
768 size_t size = MB(50);
769 char *buf, *ptr;
770 long mem_current, swap_current;
771 int ret = -1;
772
773 buf = malloc(size);
774 if (buf == NULL) {
775 fprintf(stderr, "malloc() failed\n");
776 return -1;
777 }
778
779 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
780 *ptr = 0;
781
782 mem_current = cg_read_long(cgroup, "memory.current");
783 if (!mem_current || !values_close(mem_current, mem_max, 3))
784 goto cleanup;
785
786 swap_current = cg_read_long(cgroup, "memory.swap.current");
787 if (!swap_current ||
788 !values_close(mem_current + swap_current, size, 3))
789 goto cleanup;
790
791 ret = 0;
792 cleanup:
793 free(buf);
794 return ret;
795 }
796
797 /*
798 * This test checks that memory.swap.max limits the amount of
799 * anonymous memory which can be swapped out.
800 */
test_memcg_swap_max(const char * root)801 static int test_memcg_swap_max(const char *root)
802 {
803 int ret = KSFT_FAIL;
804 char *memcg;
805 long max;
806
807 if (!is_swap_enabled())
808 return KSFT_SKIP;
809
810 memcg = cg_name(root, "memcg_test");
811 if (!memcg)
812 goto cleanup;
813
814 if (cg_create(memcg))
815 goto cleanup;
816
817 if (cg_read_long(memcg, "memory.swap.current")) {
818 ret = KSFT_SKIP;
819 goto cleanup;
820 }
821
822 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
823 goto cleanup;
824
825 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
826 goto cleanup;
827
828 if (cg_write(memcg, "memory.swap.max", "30M"))
829 goto cleanup;
830
831 if (cg_write(memcg, "memory.max", "30M"))
832 goto cleanup;
833
834 /* Should be killed by OOM killer */
835 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
836 goto cleanup;
837
838 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
839 goto cleanup;
840
841 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
842 goto cleanup;
843
844 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
845 goto cleanup;
846
847 max = cg_read_key_long(memcg, "memory.events", "max ");
848 if (max <= 0)
849 goto cleanup;
850
851 ret = KSFT_PASS;
852
853 cleanup:
854 cg_destroy(memcg);
855 free(memcg);
856
857 return ret;
858 }
859
860 /*
861 * This test disables swapping and tries to allocate anonymous memory
862 * up to OOM. Then it checks for oom and oom_kill events in
863 * memory.events.
864 */
test_memcg_oom_events(const char * root)865 static int test_memcg_oom_events(const char *root)
866 {
867 int ret = KSFT_FAIL;
868 char *memcg;
869
870 memcg = cg_name(root, "memcg_test");
871 if (!memcg)
872 goto cleanup;
873
874 if (cg_create(memcg))
875 goto cleanup;
876
877 if (cg_write(memcg, "memory.max", "30M"))
878 goto cleanup;
879
880 if (cg_write(memcg, "memory.swap.max", "0"))
881 goto cleanup;
882
883 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
884 goto cleanup;
885
886 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
887 goto cleanup;
888
889 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
890 goto cleanup;
891
892 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
893 goto cleanup;
894
895 ret = KSFT_PASS;
896
897 cleanup:
898 cg_destroy(memcg);
899 free(memcg);
900
901 return ret;
902 }
903
904 struct tcp_server_args {
905 unsigned short port;
906 int ctl[2];
907 };
908
tcp_server(const char * cgroup,void * arg)909 static int tcp_server(const char *cgroup, void *arg)
910 {
911 struct tcp_server_args *srv_args = arg;
912 struct sockaddr_in6 saddr = { 0 };
913 socklen_t slen = sizeof(saddr);
914 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
915
916 close(srv_args->ctl[0]);
917 ctl_fd = srv_args->ctl[1];
918
919 saddr.sin6_family = AF_INET6;
920 saddr.sin6_addr = in6addr_any;
921 saddr.sin6_port = htons(srv_args->port);
922
923 sk = socket(AF_INET6, SOCK_STREAM, 0);
924 if (sk < 0)
925 return ret;
926
927 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
928 goto cleanup;
929
930 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
931 write(ctl_fd, &errno, sizeof(errno));
932 goto cleanup;
933 }
934
935 if (listen(sk, 1))
936 goto cleanup;
937
938 ret = 0;
939 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
940 ret = -1;
941 goto cleanup;
942 }
943
944 client_sk = accept(sk, NULL, NULL);
945 if (client_sk < 0)
946 goto cleanup;
947
948 ret = -1;
949 for (;;) {
950 uint8_t buf[0x100000];
951
952 if (write(client_sk, buf, sizeof(buf)) <= 0) {
953 if (errno == ECONNRESET)
954 ret = 0;
955 break;
956 }
957 }
958
959 close(client_sk);
960
961 cleanup:
962 close(sk);
963 return ret;
964 }
965
tcp_client(const char * cgroup,unsigned short port)966 static int tcp_client(const char *cgroup, unsigned short port)
967 {
968 const char server[] = "localhost";
969 struct addrinfo *ai;
970 char servport[6];
971 int retries = 0x10; /* nice round number */
972 int sk, ret;
973
974 snprintf(servport, sizeof(servport), "%hd", port);
975 ret = getaddrinfo(server, servport, NULL, &ai);
976 if (ret)
977 return ret;
978
979 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
980 if (sk < 0)
981 goto free_ainfo;
982
983 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
984 if (ret < 0)
985 goto close_sk;
986
987 ret = KSFT_FAIL;
988 while (retries--) {
989 uint8_t buf[0x100000];
990 long current, sock;
991
992 if (read(sk, buf, sizeof(buf)) <= 0)
993 goto close_sk;
994
995 current = cg_read_long(cgroup, "memory.current");
996 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
997
998 if (current < 0 || sock < 0)
999 goto close_sk;
1000
1001 if (values_close(current, sock, 10)) {
1002 ret = KSFT_PASS;
1003 break;
1004 }
1005 }
1006
1007 close_sk:
1008 close(sk);
1009 free_ainfo:
1010 freeaddrinfo(ai);
1011 return ret;
1012 }
1013
1014 /*
1015 * This test checks socket memory accounting.
1016 * The test forks a TCP server listens on a random port between 1000
1017 * and 61000. Once it gets a client connection, it starts writing to
1018 * its socket.
1019 * The TCP client interleaves reads from the socket with check whether
1020 * memory.current and memory.stat.sock are similar.
1021 */
test_memcg_sock(const char * root)1022 static int test_memcg_sock(const char *root)
1023 {
1024 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
1025 unsigned short port;
1026 char *memcg;
1027
1028 memcg = cg_name(root, "memcg_test");
1029 if (!memcg)
1030 goto cleanup;
1031
1032 if (cg_create(memcg))
1033 goto cleanup;
1034
1035 while (bind_retries--) {
1036 struct tcp_server_args args;
1037
1038 if (pipe(args.ctl))
1039 goto cleanup;
1040
1041 port = args.port = 1000 + rand() % 60000;
1042
1043 pid = cg_run_nowait(memcg, tcp_server, &args);
1044 if (pid < 0)
1045 goto cleanup;
1046
1047 close(args.ctl[1]);
1048 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
1049 goto cleanup;
1050 close(args.ctl[0]);
1051
1052 if (!err)
1053 break;
1054 if (err != EADDRINUSE)
1055 goto cleanup;
1056
1057 waitpid(pid, NULL, 0);
1058 }
1059
1060 if (err == EADDRINUSE) {
1061 ret = KSFT_SKIP;
1062 goto cleanup;
1063 }
1064
1065 if (tcp_client(memcg, port) != KSFT_PASS)
1066 goto cleanup;
1067
1068 waitpid(pid, &err, 0);
1069 if (WEXITSTATUS(err))
1070 goto cleanup;
1071
1072 if (cg_read_long(memcg, "memory.current") < 0)
1073 goto cleanup;
1074
1075 if (cg_read_key_long(memcg, "memory.stat", "sock "))
1076 goto cleanup;
1077
1078 ret = KSFT_PASS;
1079
1080 cleanup:
1081 cg_destroy(memcg);
1082 free(memcg);
1083
1084 return ret;
1085 }
1086
1087 /*
1088 * This test disables swapping and tries to allocate anonymous memory
1089 * up to OOM with memory.group.oom set. Then it checks that all
1090 * processes in the leaf were killed. It also checks that oom_events
1091 * were propagated to the parent level.
1092 */
test_memcg_oom_group_leaf_events(const char * root)1093 static int test_memcg_oom_group_leaf_events(const char *root)
1094 {
1095 int ret = KSFT_FAIL;
1096 char *parent, *child;
1097 long parent_oom_events;
1098
1099 parent = cg_name(root, "memcg_test_0");
1100 child = cg_name(root, "memcg_test_0/memcg_test_1");
1101
1102 if (!parent || !child)
1103 goto cleanup;
1104
1105 if (cg_create(parent))
1106 goto cleanup;
1107
1108 if (cg_create(child))
1109 goto cleanup;
1110
1111 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1112 goto cleanup;
1113
1114 if (cg_write(child, "memory.max", "50M"))
1115 goto cleanup;
1116
1117 if (cg_write(child, "memory.swap.max", "0"))
1118 goto cleanup;
1119
1120 if (cg_write(child, "memory.oom.group", "1"))
1121 goto cleanup;
1122
1123 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1124 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1125 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1126 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1127 goto cleanup;
1128
1129 if (cg_test_proc_killed(child))
1130 goto cleanup;
1131
1132 if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1133 goto cleanup;
1134
1135 parent_oom_events = cg_read_key_long(
1136 parent, "memory.events", "oom_kill ");
1137 /*
1138 * If memory_localevents is not enabled (the default), the parent should
1139 * count OOM events in its children groups. Otherwise, it should not
1140 * have observed any events.
1141 */
1142 if (has_localevents && parent_oom_events != 0)
1143 goto cleanup;
1144 else if (!has_localevents && parent_oom_events <= 0)
1145 goto cleanup;
1146
1147 ret = KSFT_PASS;
1148
1149 cleanup:
1150 if (child)
1151 cg_destroy(child);
1152 if (parent)
1153 cg_destroy(parent);
1154 free(child);
1155 free(parent);
1156
1157 return ret;
1158 }
1159
1160 /*
1161 * This test disables swapping and tries to allocate anonymous memory
1162 * up to OOM with memory.group.oom set. Then it checks that all
1163 * processes in the parent and leaf were killed.
1164 */
test_memcg_oom_group_parent_events(const char * root)1165 static int test_memcg_oom_group_parent_events(const char *root)
1166 {
1167 int ret = KSFT_FAIL;
1168 char *parent, *child;
1169
1170 parent = cg_name(root, "memcg_test_0");
1171 child = cg_name(root, "memcg_test_0/memcg_test_1");
1172
1173 if (!parent || !child)
1174 goto cleanup;
1175
1176 if (cg_create(parent))
1177 goto cleanup;
1178
1179 if (cg_create(child))
1180 goto cleanup;
1181
1182 if (cg_write(parent, "memory.max", "80M"))
1183 goto cleanup;
1184
1185 if (cg_write(parent, "memory.swap.max", "0"))
1186 goto cleanup;
1187
1188 if (cg_write(parent, "memory.oom.group", "1"))
1189 goto cleanup;
1190
1191 cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1192 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1193 cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1194
1195 if (!cg_run(child, alloc_anon, (void *)MB(100)))
1196 goto cleanup;
1197
1198 if (cg_test_proc_killed(child))
1199 goto cleanup;
1200 if (cg_test_proc_killed(parent))
1201 goto cleanup;
1202
1203 ret = KSFT_PASS;
1204
1205 cleanup:
1206 if (child)
1207 cg_destroy(child);
1208 if (parent)
1209 cg_destroy(parent);
1210 free(child);
1211 free(parent);
1212
1213 return ret;
1214 }
1215
1216 /*
1217 * This test disables swapping and tries to allocate anonymous memory
1218 * up to OOM with memory.group.oom set. Then it checks that all
1219 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1220 */
test_memcg_oom_group_score_events(const char * root)1221 static int test_memcg_oom_group_score_events(const char *root)
1222 {
1223 int ret = KSFT_FAIL;
1224 char *memcg;
1225 int safe_pid;
1226
1227 memcg = cg_name(root, "memcg_test_0");
1228
1229 if (!memcg)
1230 goto cleanup;
1231
1232 if (cg_create(memcg))
1233 goto cleanup;
1234
1235 if (cg_write(memcg, "memory.max", "50M"))
1236 goto cleanup;
1237
1238 if (cg_write(memcg, "memory.swap.max", "0"))
1239 goto cleanup;
1240
1241 if (cg_write(memcg, "memory.oom.group", "1"))
1242 goto cleanup;
1243
1244 safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1245 if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1246 goto cleanup;
1247
1248 cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1249 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1250 goto cleanup;
1251
1252 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1253 goto cleanup;
1254
1255 if (kill(safe_pid, SIGKILL))
1256 goto cleanup;
1257
1258 ret = KSFT_PASS;
1259
1260 cleanup:
1261 if (memcg)
1262 cg_destroy(memcg);
1263 free(memcg);
1264
1265 return ret;
1266 }
1267
1268 #define T(x) { x, #x }
1269 struct memcg_test {
1270 int (*fn)(const char *root);
1271 const char *name;
1272 } tests[] = {
1273 T(test_memcg_subtree_control),
1274 T(test_memcg_current),
1275 T(test_memcg_min),
1276 T(test_memcg_low),
1277 T(test_memcg_high),
1278 T(test_memcg_high_sync),
1279 T(test_memcg_max),
1280 T(test_memcg_reclaim),
1281 T(test_memcg_oom_events),
1282 T(test_memcg_swap_max),
1283 T(test_memcg_sock),
1284 T(test_memcg_oom_group_leaf_events),
1285 T(test_memcg_oom_group_parent_events),
1286 T(test_memcg_oom_group_score_events),
1287 };
1288 #undef T
1289
main(int argc,char ** argv)1290 int main(int argc, char **argv)
1291 {
1292 char root[PATH_MAX];
1293 int i, proc_status, ret = EXIT_SUCCESS;
1294
1295 if (cg_find_unified_root(root, sizeof(root)))
1296 ksft_exit_skip("cgroup v2 isn't mounted\n");
1297
1298 /*
1299 * Check that memory controller is available:
1300 * memory is listed in cgroup.controllers
1301 */
1302 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1303 ksft_exit_skip("memory controller isn't available\n");
1304
1305 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
1306 if (cg_write(root, "cgroup.subtree_control", "+memory"))
1307 ksft_exit_skip("Failed to set memory controller\n");
1308
1309 proc_status = proc_mount_contains("memory_recursiveprot");
1310 if (proc_status < 0)
1311 ksft_exit_skip("Failed to query cgroup mount option\n");
1312 has_recursiveprot = proc_status;
1313
1314 proc_status = proc_mount_contains("memory_localevents");
1315 if (proc_status < 0)
1316 ksft_exit_skip("Failed to query cgroup mount option\n");
1317 has_localevents = proc_status;
1318
1319 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1320 switch (tests[i].fn(root)) {
1321 case KSFT_PASS:
1322 ksft_test_result_pass("%s\n", tests[i].name);
1323 break;
1324 case KSFT_SKIP:
1325 ksft_test_result_skip("%s\n", tests[i].name);
1326 break;
1327 default:
1328 ret = EXIT_FAILURE;
1329 ksft_test_result_fail("%s\n", tests[i].name);
1330 break;
1331 }
1332 }
1333
1334 return ret;
1335 }
1336