1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 #include <limits.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <sys/resource.h>
15
16 #include "../kselftest_harness.h"
17 #include "../clone3/clone3_selftests.h"
18
19 #ifndef __NR_close_range
20 #if defined __alpha__
21 #define __NR_close_range 546
22 #elif defined _MIPS_SIM
23 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
24 #define __NR_close_range (436 + 4000)
25 #endif
26 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
27 #define __NR_close_range (436 + 6000)
28 #endif
29 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
30 #define __NR_close_range (436 + 5000)
31 #endif
32 #elif defined __ia64__
33 #define __NR_close_range (436 + 1024)
34 #else
35 #define __NR_close_range 436
36 #endif
37 #endif
38
39 #ifndef CLOSE_RANGE_UNSHARE
40 #define CLOSE_RANGE_UNSHARE (1U << 1)
41 #endif
42
43 #ifndef CLOSE_RANGE_CLOEXEC
44 #define CLOSE_RANGE_CLOEXEC (1U << 2)
45 #endif
46
sys_close_range(unsigned int fd,unsigned int max_fd,unsigned int flags)47 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
48 unsigned int flags)
49 {
50 return syscall(__NR_close_range, fd, max_fd, flags);
51 }
52
TEST(core_close_range)53 TEST(core_close_range)
54 {
55 int i, ret;
56 int open_fds[101];
57
58 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
59 int fd;
60
61 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
62 ASSERT_GE(fd, 0) {
63 if (errno == ENOENT)
64 SKIP(return, "Skipping test since /dev/null does not exist");
65 }
66
67 open_fds[i] = fd;
68 }
69
70 EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
71 if (errno == ENOSYS)
72 SKIP(return, "close_range() syscall not supported");
73 }
74
75 EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
76
77 for (i = 0; i <= 50; i++)
78 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
79
80 for (i = 51; i <= 100; i++)
81 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
82
83 /* create a couple of gaps */
84 close(57);
85 close(78);
86 close(81);
87 close(82);
88 close(84);
89 close(90);
90
91 EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
92
93 for (i = 51; i <= 92; i++)
94 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
95
96 for (i = 93; i <= 100; i++)
97 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
98
99 /* test that the kernel caps and still closes all fds */
100 EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
101
102 for (i = 93; i <= 99; i++)
103 EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
104
105 EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
106
107 EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
108
109 EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
110 }
111
TEST(close_range_unshare)112 TEST(close_range_unshare)
113 {
114 int i, ret, status;
115 pid_t pid;
116 int open_fds[101];
117 struct __clone_args args = {
118 .flags = CLONE_FILES,
119 .exit_signal = SIGCHLD,
120 };
121
122 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
123 int fd;
124
125 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
126 ASSERT_GE(fd, 0) {
127 if (errno == ENOENT)
128 SKIP(return, "Skipping test since /dev/null does not exist");
129 }
130
131 open_fds[i] = fd;
132 }
133
134 pid = sys_clone3(&args, sizeof(args));
135 ASSERT_GE(pid, 0);
136
137 if (pid == 0) {
138 ret = sys_close_range(open_fds[0], open_fds[50],
139 CLOSE_RANGE_UNSHARE);
140 if (ret)
141 exit(EXIT_FAILURE);
142
143 for (i = 0; i <= 50; i++)
144 if (fcntl(open_fds[i], F_GETFL) != -1)
145 exit(EXIT_FAILURE);
146
147 for (i = 51; i <= 100; i++)
148 if (fcntl(open_fds[i], F_GETFL) == -1)
149 exit(EXIT_FAILURE);
150
151 /* create a couple of gaps */
152 close(57);
153 close(78);
154 close(81);
155 close(82);
156 close(84);
157 close(90);
158
159 ret = sys_close_range(open_fds[51], open_fds[92],
160 CLOSE_RANGE_UNSHARE);
161 if (ret)
162 exit(EXIT_FAILURE);
163
164 for (i = 51; i <= 92; i++)
165 if (fcntl(open_fds[i], F_GETFL) != -1)
166 exit(EXIT_FAILURE);
167
168 for (i = 93; i <= 100; i++)
169 if (fcntl(open_fds[i], F_GETFL) == -1)
170 exit(EXIT_FAILURE);
171
172 /* test that the kernel caps and still closes all fds */
173 ret = sys_close_range(open_fds[93], open_fds[99],
174 CLOSE_RANGE_UNSHARE);
175 if (ret)
176 exit(EXIT_FAILURE);
177
178 for (i = 93; i <= 99; i++)
179 if (fcntl(open_fds[i], F_GETFL) != -1)
180 exit(EXIT_FAILURE);
181
182 if (fcntl(open_fds[100], F_GETFL) == -1)
183 exit(EXIT_FAILURE);
184
185 ret = sys_close_range(open_fds[100], open_fds[100],
186 CLOSE_RANGE_UNSHARE);
187 if (ret)
188 exit(EXIT_FAILURE);
189
190 if (fcntl(open_fds[100], F_GETFL) != -1)
191 exit(EXIT_FAILURE);
192
193 exit(EXIT_SUCCESS);
194 }
195
196 EXPECT_EQ(waitpid(pid, &status, 0), pid);
197 EXPECT_EQ(true, WIFEXITED(status));
198 EXPECT_EQ(0, WEXITSTATUS(status));
199 }
200
TEST(close_range_unshare_capped)201 TEST(close_range_unshare_capped)
202 {
203 int i, ret, status;
204 pid_t pid;
205 int open_fds[101];
206 struct __clone_args args = {
207 .flags = CLONE_FILES,
208 .exit_signal = SIGCHLD,
209 };
210
211 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
212 int fd;
213
214 fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
215 ASSERT_GE(fd, 0) {
216 if (errno == ENOENT)
217 SKIP(return, "Skipping test since /dev/null does not exist");
218 }
219
220 open_fds[i] = fd;
221 }
222
223 pid = sys_clone3(&args, sizeof(args));
224 ASSERT_GE(pid, 0);
225
226 if (pid == 0) {
227 ret = sys_close_range(open_fds[0], UINT_MAX,
228 CLOSE_RANGE_UNSHARE);
229 if (ret)
230 exit(EXIT_FAILURE);
231
232 for (i = 0; i <= 100; i++)
233 if (fcntl(open_fds[i], F_GETFL) != -1)
234 exit(EXIT_FAILURE);
235
236 exit(EXIT_SUCCESS);
237 }
238
239 EXPECT_EQ(waitpid(pid, &status, 0), pid);
240 EXPECT_EQ(true, WIFEXITED(status));
241 EXPECT_EQ(0, WEXITSTATUS(status));
242 }
243
TEST(close_range_cloexec)244 TEST(close_range_cloexec)
245 {
246 int i, ret;
247 int open_fds[101];
248 struct rlimit rlimit;
249
250 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
251 int fd;
252
253 fd = open("/dev/null", O_RDONLY);
254 ASSERT_GE(fd, 0) {
255 if (errno == ENOENT)
256 SKIP(return, "Skipping test since /dev/null does not exist");
257 }
258
259 open_fds[i] = fd;
260 }
261
262 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
263 if (ret < 0) {
264 if (errno == ENOSYS)
265 SKIP(return, "close_range() syscall not supported");
266 if (errno == EINVAL)
267 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
268 }
269
270 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
271 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
272 rlimit.rlim_cur = 25;
273 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
274
275 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
276 ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
277 ASSERT_EQ(0, ret);
278 ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
279 ASSERT_EQ(0, ret);
280
281 for (i = 0; i <= 50; i++) {
282 int flags = fcntl(open_fds[i], F_GETFD);
283
284 EXPECT_GT(flags, -1);
285 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
286 }
287
288 for (i = 51; i <= 74; i++) {
289 int flags = fcntl(open_fds[i], F_GETFD);
290
291 EXPECT_GT(flags, -1);
292 EXPECT_EQ(flags & FD_CLOEXEC, 0);
293 }
294
295 for (i = 75; i <= 100; i++) {
296 int flags = fcntl(open_fds[i], F_GETFD);
297
298 EXPECT_GT(flags, -1);
299 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
300 }
301
302 /* Test a common pattern. */
303 ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
304 for (i = 0; i <= 100; i++) {
305 int flags = fcntl(open_fds[i], F_GETFD);
306
307 EXPECT_GT(flags, -1);
308 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
309 }
310 }
311
TEST(close_range_cloexec_unshare)312 TEST(close_range_cloexec_unshare)
313 {
314 int i, ret;
315 int open_fds[101];
316 struct rlimit rlimit;
317
318 for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
319 int fd;
320
321 fd = open("/dev/null", O_RDONLY);
322 ASSERT_GE(fd, 0) {
323 if (errno == ENOENT)
324 SKIP(return, "Skipping test since /dev/null does not exist");
325 }
326
327 open_fds[i] = fd;
328 }
329
330 ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
331 if (ret < 0) {
332 if (errno == ENOSYS)
333 SKIP(return, "close_range() syscall not supported");
334 if (errno == EINVAL)
335 SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
336 }
337
338 /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
339 ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
340 rlimit.rlim_cur = 25;
341 ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
342
343 /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
344 ret = sys_close_range(open_fds[0], open_fds[50],
345 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
346 ASSERT_EQ(0, ret);
347 ret = sys_close_range(open_fds[75], open_fds[100],
348 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
349 ASSERT_EQ(0, ret);
350
351 for (i = 0; i <= 50; i++) {
352 int flags = fcntl(open_fds[i], F_GETFD);
353
354 EXPECT_GT(flags, -1);
355 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
356 }
357
358 for (i = 51; i <= 74; i++) {
359 int flags = fcntl(open_fds[i], F_GETFD);
360
361 EXPECT_GT(flags, -1);
362 EXPECT_EQ(flags & FD_CLOEXEC, 0);
363 }
364
365 for (i = 75; i <= 100; i++) {
366 int flags = fcntl(open_fds[i], F_GETFD);
367
368 EXPECT_GT(flags, -1);
369 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
370 }
371
372 /* Test a common pattern. */
373 ret = sys_close_range(3, UINT_MAX,
374 CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
375 for (i = 0; i <= 100; i++) {
376 int flags = fcntl(open_fds[i], F_GETFD);
377
378 EXPECT_GT(flags, -1);
379 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
380 }
381 }
382
383 /*
384 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
385 */
TEST(close_range_cloexec_syzbot)386 TEST(close_range_cloexec_syzbot)
387 {
388 int fd1, fd2, fd3, flags, ret, status;
389 pid_t pid;
390 struct __clone_args args = {
391 .flags = CLONE_FILES,
392 .exit_signal = SIGCHLD,
393 };
394
395 /* Create a huge gap in the fd table. */
396 fd1 = open("/dev/null", O_RDWR);
397 EXPECT_GT(fd1, 0);
398
399 fd2 = dup2(fd1, 1000);
400 EXPECT_GT(fd2, 0);
401
402 pid = sys_clone3(&args, sizeof(args));
403 ASSERT_GE(pid, 0);
404
405 if (pid == 0) {
406 ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
407 if (ret)
408 exit(EXIT_FAILURE);
409
410 /*
411 * We now have a private file descriptor table and all
412 * our open fds should still be open but made
413 * close-on-exec.
414 */
415 flags = fcntl(fd1, F_GETFD);
416 EXPECT_GT(flags, -1);
417 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
418
419 flags = fcntl(fd2, F_GETFD);
420 EXPECT_GT(flags, -1);
421 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
422
423 fd3 = dup2(fd1, 42);
424 EXPECT_GT(fd3, 0);
425
426 /*
427 * Duplicating the file descriptor must remove the
428 * FD_CLOEXEC flag.
429 */
430 flags = fcntl(fd3, F_GETFD);
431 EXPECT_GT(flags, -1);
432 EXPECT_EQ(flags & FD_CLOEXEC, 0);
433
434 exit(EXIT_SUCCESS);
435 }
436
437 EXPECT_EQ(waitpid(pid, &status, 0), pid);
438 EXPECT_EQ(true, WIFEXITED(status));
439 EXPECT_EQ(0, WEXITSTATUS(status));
440
441 /*
442 * We had a shared file descriptor table before along with requesting
443 * close-on-exec so the original fds must not be close-on-exec.
444 */
445 flags = fcntl(fd1, F_GETFD);
446 EXPECT_GT(flags, -1);
447 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
448
449 flags = fcntl(fd2, F_GETFD);
450 EXPECT_GT(flags, -1);
451 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
452
453 fd3 = dup2(fd1, 42);
454 EXPECT_GT(fd3, 0);
455
456 flags = fcntl(fd3, F_GETFD);
457 EXPECT_GT(flags, -1);
458 EXPECT_EQ(flags & FD_CLOEXEC, 0);
459
460 EXPECT_EQ(close(fd1), 0);
461 EXPECT_EQ(close(fd2), 0);
462 EXPECT_EQ(close(fd3), 0);
463 }
464
465 /*
466 * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
467 */
TEST(close_range_cloexec_unshare_syzbot)468 TEST(close_range_cloexec_unshare_syzbot)
469 {
470 int i, fd1, fd2, fd3, flags, ret, status;
471 pid_t pid;
472 struct __clone_args args = {
473 .flags = CLONE_FILES,
474 .exit_signal = SIGCHLD,
475 };
476
477 /*
478 * Create a huge gap in the fd table. When we now call
479 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
480 * bound the kernel will only copy up to fd1 file descriptors into the
481 * new fd table. If the kernel is buggy and doesn't handle
482 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
483 * descriptors and we will oops!
484 *
485 * On a buggy kernel this should immediately oops. But let's loop just
486 * to be sure.
487 */
488 fd1 = open("/dev/null", O_RDWR);
489 EXPECT_GT(fd1, 0);
490
491 fd2 = dup2(fd1, 1000);
492 EXPECT_GT(fd2, 0);
493
494 for (i = 0; i < 100; i++) {
495
496 pid = sys_clone3(&args, sizeof(args));
497 ASSERT_GE(pid, 0);
498
499 if (pid == 0) {
500 ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
501 CLOSE_RANGE_CLOEXEC);
502 if (ret)
503 exit(EXIT_FAILURE);
504
505 /*
506 * We now have a private file descriptor table and all
507 * our open fds should still be open but made
508 * close-on-exec.
509 */
510 flags = fcntl(fd1, F_GETFD);
511 EXPECT_GT(flags, -1);
512 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
513
514 flags = fcntl(fd2, F_GETFD);
515 EXPECT_GT(flags, -1);
516 EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
517
518 fd3 = dup2(fd1, 42);
519 EXPECT_GT(fd3, 0);
520
521 /*
522 * Duplicating the file descriptor must remove the
523 * FD_CLOEXEC flag.
524 */
525 flags = fcntl(fd3, F_GETFD);
526 EXPECT_GT(flags, -1);
527 EXPECT_EQ(flags & FD_CLOEXEC, 0);
528
529 EXPECT_EQ(close(fd1), 0);
530 EXPECT_EQ(close(fd2), 0);
531 EXPECT_EQ(close(fd3), 0);
532
533 exit(EXIT_SUCCESS);
534 }
535
536 EXPECT_EQ(waitpid(pid, &status, 0), pid);
537 EXPECT_EQ(true, WIFEXITED(status));
538 EXPECT_EQ(0, WEXITSTATUS(status));
539 }
540
541 /*
542 * We created a private file descriptor table before along with
543 * requesting close-on-exec so the original fds must not be
544 * close-on-exec.
545 */
546 flags = fcntl(fd1, F_GETFD);
547 EXPECT_GT(flags, -1);
548 EXPECT_EQ(flags & FD_CLOEXEC, 0);
549
550 flags = fcntl(fd2, F_GETFD);
551 EXPECT_GT(flags, -1);
552 EXPECT_EQ(flags & FD_CLOEXEC, 0);
553
554 fd3 = dup2(fd1, 42);
555 EXPECT_GT(fd3, 0);
556
557 flags = fcntl(fd3, F_GETFD);
558 EXPECT_GT(flags, -1);
559 EXPECT_EQ(flags & FD_CLOEXEC, 0);
560
561 EXPECT_EQ(close(fd1), 0);
562 EXPECT_EQ(close(fd2), 0);
563 EXPECT_EQ(close(fd3), 0);
564 }
565
TEST(close_range_bitmap_corruption)566 TEST(close_range_bitmap_corruption)
567 {
568 pid_t pid;
569 int status;
570 struct __clone_args args = {
571 .flags = CLONE_FILES,
572 .exit_signal = SIGCHLD,
573 };
574
575 /* get the first 128 descriptors open */
576 for (int i = 2; i < 128; i++)
577 EXPECT_GE(dup2(0, i), 0);
578
579 /* get descriptor table shared */
580 pid = sys_clone3(&args, sizeof(args));
581 ASSERT_GE(pid, 0);
582
583 if (pid == 0) {
584 /* unshare and truncate descriptor table down to 64 */
585 if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
586 exit(EXIT_FAILURE);
587
588 ASSERT_EQ(fcntl(64, F_GETFD), -1);
589 /* ... and verify that the range 64..127 is not
590 stuck "fully used" according to secondary bitmap */
591 EXPECT_EQ(dup(0), 64)
592 exit(EXIT_FAILURE);
593 exit(EXIT_SUCCESS);
594 }
595
596 EXPECT_EQ(waitpid(pid, &status, 0), pid);
597 EXPECT_EQ(true, WIFEXITED(status));
598 EXPECT_EQ(0, WEXITSTATUS(status));
599 }
600
601 TEST_HARNESS_MAIN
602