1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #define _GNU_SOURCE
4
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18
19 #include "cgroup_util.h"
20 #include "../clone3/clone3_selftests.h"
21
read_text(const char * path,char * buf,size_t max_len)22 static ssize_t read_text(const char *path, char *buf, size_t max_len)
23 {
24 ssize_t len;
25 int fd;
26
27 fd = open(path, O_RDONLY);
28 if (fd < 0)
29 return fd;
30
31 len = read(fd, buf, max_len - 1);
32 if (len < 0)
33 goto out;
34
35 buf[len] = 0;
36 out:
37 close(fd);
38 return len;
39 }
40
write_text(const char * path,char * buf,ssize_t len)41 static ssize_t write_text(const char *path, char *buf, ssize_t len)
42 {
43 int fd;
44
45 fd = open(path, O_WRONLY | O_APPEND);
46 if (fd < 0)
47 return fd;
48
49 len = write(fd, buf, len);
50 if (len < 0) {
51 close(fd);
52 return len;
53 }
54
55 close(fd);
56
57 return len;
58 }
59
cg_name(const char * root,const char * name)60 char *cg_name(const char *root, const char *name)
61 {
62 size_t len = strlen(root) + strlen(name) + 2;
63 char *ret = malloc(len);
64
65 snprintf(ret, len, "%s/%s", root, name);
66
67 return ret;
68 }
69
cg_name_indexed(const char * root,const char * name,int index)70 char *cg_name_indexed(const char *root, const char *name, int index)
71 {
72 size_t len = strlen(root) + strlen(name) + 10;
73 char *ret = malloc(len);
74
75 snprintf(ret, len, "%s/%s_%d", root, name, index);
76
77 return ret;
78 }
79
cg_control(const char * cgroup,const char * control)80 char *cg_control(const char *cgroup, const char *control)
81 {
82 size_t len = strlen(cgroup) + strlen(control) + 2;
83 char *ret = malloc(len);
84
85 snprintf(ret, len, "%s/%s", cgroup, control);
86
87 return ret;
88 }
89
cg_read(const char * cgroup,const char * control,char * buf,size_t len)90 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
91 {
92 char path[PATH_MAX];
93
94 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
95
96 if (read_text(path, buf, len) >= 0)
97 return 0;
98
99 return -1;
100 }
101
cg_read_strcmp(const char * cgroup,const char * control,const char * expected)102 int cg_read_strcmp(const char *cgroup, const char *control,
103 const char *expected)
104 {
105 size_t size;
106 char *buf;
107 int ret;
108
109 /* Handle the case of comparing against empty string */
110 if (!expected)
111 return -1;
112 else
113 size = strlen(expected) + 1;
114
115 buf = malloc(size);
116 if (!buf)
117 return -1;
118
119 if (cg_read(cgroup, control, buf, size)) {
120 free(buf);
121 return -1;
122 }
123
124 ret = strcmp(expected, buf);
125 free(buf);
126 return ret;
127 }
128
cg_read_strstr(const char * cgroup,const char * control,const char * needle)129 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
130 {
131 char buf[PAGE_SIZE];
132
133 if (cg_read(cgroup, control, buf, sizeof(buf)))
134 return -1;
135
136 return strstr(buf, needle) ? 0 : -1;
137 }
138
cg_read_long(const char * cgroup,const char * control)139 long cg_read_long(const char *cgroup, const char *control)
140 {
141 char buf[128];
142
143 if (cg_read(cgroup, control, buf, sizeof(buf)))
144 return -1;
145
146 return atol(buf);
147 }
148
cg_read_key_long(const char * cgroup,const char * control,const char * key)149 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
150 {
151 char buf[PAGE_SIZE];
152 char *ptr;
153
154 if (cg_read(cgroup, control, buf, sizeof(buf)))
155 return -1;
156
157 ptr = strstr(buf, key);
158 if (!ptr)
159 return -1;
160
161 return atol(ptr + strlen(key));
162 }
163
cg_read_lc(const char * cgroup,const char * control)164 long cg_read_lc(const char *cgroup, const char *control)
165 {
166 char buf[PAGE_SIZE];
167 const char delim[] = "\n";
168 char *line;
169 long cnt = 0;
170
171 if (cg_read(cgroup, control, buf, sizeof(buf)))
172 return -1;
173
174 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
175 cnt++;
176
177 return cnt;
178 }
179
cg_write(const char * cgroup,const char * control,char * buf)180 int cg_write(const char *cgroup, const char *control, char *buf)
181 {
182 char path[PATH_MAX];
183 ssize_t len = strlen(buf);
184
185 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
186
187 if (write_text(path, buf, len) == len)
188 return 0;
189
190 return -1;
191 }
192
cg_find_unified_root(char * root,size_t len)193 int cg_find_unified_root(char *root, size_t len)
194 {
195 char buf[10 * PAGE_SIZE];
196 char *fs, *mount, *type;
197 const char delim[] = "\n\t ";
198
199 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
200 return -1;
201
202 /*
203 * Example:
204 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
205 */
206 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
207 mount = strtok(NULL, delim);
208 type = strtok(NULL, delim);
209 strtok(NULL, delim);
210 strtok(NULL, delim);
211 strtok(NULL, delim);
212
213 if (strcmp(type, "cgroup2") == 0) {
214 strncpy(root, mount, len);
215 return 0;
216 }
217 }
218
219 return -1;
220 }
221
cg_create(const char * cgroup)222 int cg_create(const char *cgroup)
223 {
224 return mkdir(cgroup, 0755);
225 }
226
cg_wait_for_proc_count(const char * cgroup,int count)227 int cg_wait_for_proc_count(const char *cgroup, int count)
228 {
229 char buf[10 * PAGE_SIZE] = {0};
230 int attempts;
231 char *ptr;
232
233 for (attempts = 10; attempts >= 0; attempts--) {
234 int nr = 0;
235
236 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
237 break;
238
239 for (ptr = buf; *ptr; ptr++)
240 if (*ptr == '\n')
241 nr++;
242
243 if (nr >= count)
244 return 0;
245
246 usleep(100000);
247 }
248
249 return -1;
250 }
251
cg_killall(const char * cgroup)252 int cg_killall(const char *cgroup)
253 {
254 char buf[PAGE_SIZE];
255 char *ptr = buf;
256
257 /* If cgroup.kill exists use it. */
258 if (!cg_write(cgroup, "cgroup.kill", "1"))
259 return 0;
260
261 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
262 return -1;
263
264 while (ptr < buf + sizeof(buf)) {
265 int pid = strtol(ptr, &ptr, 10);
266
267 if (pid == 0)
268 break;
269 if (*ptr)
270 ptr++;
271 else
272 break;
273 if (kill(pid, SIGKILL))
274 return -1;
275 }
276
277 return 0;
278 }
279
cg_destroy(const char * cgroup)280 int cg_destroy(const char *cgroup)
281 {
282 int ret;
283
284 retry:
285 ret = rmdir(cgroup);
286 if (ret && errno == EBUSY) {
287 cg_killall(cgroup);
288 usleep(100);
289 goto retry;
290 }
291
292 if (ret && errno == ENOENT)
293 ret = 0;
294
295 return ret;
296 }
297
cg_enter(const char * cgroup,int pid)298 int cg_enter(const char *cgroup, int pid)
299 {
300 char pidbuf[64];
301
302 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
303 return cg_write(cgroup, "cgroup.procs", pidbuf);
304 }
305
cg_enter_current(const char * cgroup)306 int cg_enter_current(const char *cgroup)
307 {
308 return cg_write(cgroup, "cgroup.procs", "0");
309 }
310
cg_enter_current_thread(const char * cgroup)311 int cg_enter_current_thread(const char *cgroup)
312 {
313 return cg_write(cgroup, "cgroup.threads", "0");
314 }
315
cg_run(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)316 int cg_run(const char *cgroup,
317 int (*fn)(const char *cgroup, void *arg),
318 void *arg)
319 {
320 int pid, retcode;
321
322 pid = fork();
323 if (pid < 0) {
324 return pid;
325 } else if (pid == 0) {
326 char buf[64];
327
328 snprintf(buf, sizeof(buf), "%d", getpid());
329 if (cg_write(cgroup, "cgroup.procs", buf))
330 exit(EXIT_FAILURE);
331 exit(fn(cgroup, arg));
332 } else {
333 waitpid(pid, &retcode, 0);
334 if (WIFEXITED(retcode))
335 return WEXITSTATUS(retcode);
336 else
337 return -1;
338 }
339 }
340
clone_into_cgroup(int cgroup_fd)341 pid_t clone_into_cgroup(int cgroup_fd)
342 {
343 #ifdef CLONE_ARGS_SIZE_VER2
344 pid_t pid;
345
346 struct __clone_args args = {
347 .flags = CLONE_INTO_CGROUP,
348 .exit_signal = SIGCHLD,
349 .cgroup = cgroup_fd,
350 };
351
352 pid = sys_clone3(&args, sizeof(struct __clone_args));
353 /*
354 * Verify that this is a genuine test failure:
355 * ENOSYS -> clone3() not available
356 * E2BIG -> CLONE_INTO_CGROUP not available
357 */
358 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
359 goto pretend_enosys;
360
361 return pid;
362
363 pretend_enosys:
364 #endif
365 errno = ENOSYS;
366 return -ENOSYS;
367 }
368
clone_reap(pid_t pid,int options)369 int clone_reap(pid_t pid, int options)
370 {
371 int ret;
372 siginfo_t info = {
373 .si_signo = 0,
374 };
375
376 again:
377 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
378 if (ret < 0) {
379 if (errno == EINTR)
380 goto again;
381 return -1;
382 }
383
384 if (options & WEXITED) {
385 if (WIFEXITED(info.si_status))
386 return WEXITSTATUS(info.si_status);
387 }
388
389 if (options & WSTOPPED) {
390 if (WIFSTOPPED(info.si_status))
391 return WSTOPSIG(info.si_status);
392 }
393
394 if (options & WCONTINUED) {
395 if (WIFCONTINUED(info.si_status))
396 return 0;
397 }
398
399 return -1;
400 }
401
dirfd_open_opath(const char * dir)402 int dirfd_open_opath(const char *dir)
403 {
404 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
405 }
406
407 #define close_prot_errno(fd) \
408 if (fd >= 0) { \
409 int _e_ = errno; \
410 close(fd); \
411 errno = _e_; \
412 }
413
clone_into_cgroup_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)414 static int clone_into_cgroup_run_nowait(const char *cgroup,
415 int (*fn)(const char *cgroup, void *arg),
416 void *arg)
417 {
418 int cgroup_fd;
419 pid_t pid;
420
421 cgroup_fd = dirfd_open_opath(cgroup);
422 if (cgroup_fd < 0)
423 return -1;
424
425 pid = clone_into_cgroup(cgroup_fd);
426 close_prot_errno(cgroup_fd);
427 if (pid == 0)
428 exit(fn(cgroup, arg));
429
430 return pid;
431 }
432
cg_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)433 int cg_run_nowait(const char *cgroup,
434 int (*fn)(const char *cgroup, void *arg),
435 void *arg)
436 {
437 int pid;
438
439 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
440 if (pid > 0)
441 return pid;
442
443 /* Genuine test failure. */
444 if (pid < 0 && errno != ENOSYS)
445 return -1;
446
447 pid = fork();
448 if (pid == 0) {
449 char buf[64];
450
451 snprintf(buf, sizeof(buf), "%d", getpid());
452 if (cg_write(cgroup, "cgroup.procs", buf))
453 exit(EXIT_FAILURE);
454 exit(fn(cgroup, arg));
455 }
456
457 return pid;
458 }
459
get_temp_fd(void)460 int get_temp_fd(void)
461 {
462 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
463 }
464
alloc_pagecache(int fd,size_t size)465 int alloc_pagecache(int fd, size_t size)
466 {
467 char buf[PAGE_SIZE];
468 struct stat st;
469 int i;
470
471 if (fstat(fd, &st))
472 goto cleanup;
473
474 size += st.st_size;
475
476 if (ftruncate(fd, size))
477 goto cleanup;
478
479 for (i = 0; i < size; i += sizeof(buf))
480 read(fd, buf, sizeof(buf));
481
482 return 0;
483
484 cleanup:
485 return -1;
486 }
487
alloc_anon(const char * cgroup,void * arg)488 int alloc_anon(const char *cgroup, void *arg)
489 {
490 size_t size = (unsigned long)arg;
491 char *buf, *ptr;
492
493 buf = malloc(size);
494 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
495 *ptr = 0;
496
497 free(buf);
498 return 0;
499 }
500
is_swap_enabled(void)501 int is_swap_enabled(void)
502 {
503 char buf[PAGE_SIZE];
504 const char delim[] = "\n";
505 int cnt = 0;
506 char *line;
507
508 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
509 return -1;
510
511 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
512 cnt++;
513
514 return cnt > 1;
515 }
516
set_oom_adj_score(int pid,int score)517 int set_oom_adj_score(int pid, int score)
518 {
519 char path[PATH_MAX];
520 int fd, len;
521
522 sprintf(path, "/proc/%d/oom_score_adj", pid);
523
524 fd = open(path, O_WRONLY | O_APPEND);
525 if (fd < 0)
526 return fd;
527
528 len = dprintf(fd, "%d", score);
529 if (len < 0) {
530 close(fd);
531 return len;
532 }
533
534 close(fd);
535 return 0;
536 }
537
proc_read_text(int pid,bool thread,const char * item,char * buf,size_t size)538 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
539 {
540 char path[PATH_MAX];
541
542 if (!pid)
543 snprintf(path, sizeof(path), "/proc/%s/%s",
544 thread ? "thread-self" : "self", item);
545 else
546 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
547
548 return read_text(path, buf, size);
549 }
550
proc_read_strstr(int pid,bool thread,const char * item,const char * needle)551 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
552 {
553 char buf[PAGE_SIZE];
554
555 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
556 return -1;
557
558 return strstr(buf, needle) ? 0 : -1;
559 }
560
clone_into_cgroup_run_wait(const char * cgroup)561 int clone_into_cgroup_run_wait(const char *cgroup)
562 {
563 int cgroup_fd;
564 pid_t pid;
565
566 cgroup_fd = dirfd_open_opath(cgroup);
567 if (cgroup_fd < 0)
568 return -1;
569
570 pid = clone_into_cgroup(cgroup_fd);
571 close_prot_errno(cgroup_fd);
572 if (pid < 0)
573 return -1;
574
575 if (pid == 0)
576 exit(EXIT_SUCCESS);
577
578 /*
579 * We don't care whether this fails. We only care whether the initial
580 * clone succeeded.
581 */
582 (void)clone_reap(pid, WEXITED);
583 return 0;
584 }
585
cg_prepare_for_wait(const char * cgroup)586 int cg_prepare_for_wait(const char *cgroup)
587 {
588 int fd, ret = -1;
589
590 fd = inotify_init1(0);
591 if (fd == -1)
592 return fd;
593
594 ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
595 IN_MODIFY);
596 if (ret == -1) {
597 close(fd);
598 fd = -1;
599 }
600
601 return fd;
602 }
603
cg_wait_for(int fd)604 int cg_wait_for(int fd)
605 {
606 int ret = -1;
607 struct pollfd fds = {
608 .fd = fd,
609 .events = POLLIN,
610 };
611
612 while (true) {
613 ret = poll(&fds, 1, 10000);
614
615 if (ret == -1) {
616 if (errno == EINTR)
617 continue;
618
619 break;
620 }
621
622 if (ret > 0 && fds.revents & POLLIN) {
623 ret = 0;
624 break;
625 }
626 }
627
628 return ret;
629 }
630