1 /* Authors: Gregory P. Smith & Jeffrey Yasskin */
2 #include "Python.h"
3 #include "pycore_fileutils.h"
4 #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
5 # define _GNU_SOURCE
6 #endif
7 #include <unistd.h>
8 #include <fcntl.h>
9 #ifdef HAVE_SYS_TYPES_H
10 #include <sys/types.h>
11 #endif
12 #if defined(HAVE_SYS_STAT_H)
13 #include <sys/stat.h>
14 #endif
15 #ifdef HAVE_SYS_SYSCALL_H
16 #include <sys/syscall.h>
17 #endif
18 #if defined(HAVE_SYS_RESOURCE_H)
19 #include <sys/resource.h>
20 #endif
21 #ifdef HAVE_DIRENT_H
22 #include <dirent.h>
23 #endif
24 #ifdef HAVE_GRP_H
25 #include <grp.h>
26 #endif /* HAVE_GRP_H */
27
28 #include "posixmodule.h"
29
30 #ifdef _Py_MEMORY_SANITIZER
31 # include <sanitizer/msan_interface.h>
32 #endif
33
34 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
35 # include <sys/linux-syscalls.h>
36 # define SYS_getdents64 __NR_getdents64
37 #endif
38
39 #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
40 defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
41 /* If this is ever expanded to non-Linux platforms, verify what calls are
42 * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
43 # include <signal.h>
44 # define VFORK_USABLE 1
45 #endif
46
47 #if defined(__sun) && defined(__SVR4)
48 /* readdir64 is used to work around Solaris 9 bug 6395699. */
49 # define readdir readdir64
50 # define dirent dirent64
51 # if !defined(HAVE_DIRFD)
52 /* Some versions of Solaris lack dirfd(). */
53 # define dirfd(dirp) ((dirp)->dd_fd)
54 # define HAVE_DIRFD
55 # endif
56 #endif
57
58 #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
59 # define FD_DIR "/dev/fd"
60 #else
61 # define FD_DIR "/proc/self/fd"
62 #endif
63
64 #ifdef NGROUPS_MAX
65 #define MAX_GROUPS NGROUPS_MAX
66 #else
67 #define MAX_GROUPS 64
68 #endif
69
70 #define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)
71
72 static struct PyModuleDef _posixsubprocessmodule;
73
74 /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
75 static int
_pos_int_from_ascii(const char * name)76 _pos_int_from_ascii(const char *name)
77 {
78 int num = 0;
79 while (*name >= '0' && *name <= '9') {
80 num = num * 10 + (*name - '0');
81 ++name;
82 }
83 if (*name)
84 return -1; /* Non digit found, not a number. */
85 return num;
86 }
87
88
89 #if defined(__FreeBSD__) || defined(__DragonFly__)
90 /* When /dev/fd isn't mounted it is often a static directory populated
91 * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
92 * NetBSD and OpenBSD have a /proc fs available (though not necessarily
93 * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs
94 * that properly supports /dev/fd.
95 */
96 static int
_is_fdescfs_mounted_on_dev_fd(void)97 _is_fdescfs_mounted_on_dev_fd(void)
98 {
99 struct stat dev_stat;
100 struct stat dev_fd_stat;
101 if (stat("/dev", &dev_stat) != 0)
102 return 0;
103 if (stat(FD_DIR, &dev_fd_stat) != 0)
104 return 0;
105 if (dev_stat.st_dev == dev_fd_stat.st_dev)
106 return 0; /* / == /dev == /dev/fd means it is static. #fail */
107 return 1;
108 }
109 #endif
110
111
112 /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
113 static int
_sanity_check_python_fd_sequence(PyObject * fd_sequence)114 _sanity_check_python_fd_sequence(PyObject *fd_sequence)
115 {
116 Py_ssize_t seq_idx;
117 long prev_fd = -1;
118 for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
119 PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
120 long iter_fd;
121 if (!PyLong_Check(py_fd)) {
122 return 1;
123 }
124 iter_fd = PyLong_AsLong(py_fd);
125 if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
126 /* Negative, overflow, unsorted, too big for a fd. */
127 return 1;
128 }
129 prev_fd = iter_fd;
130 }
131 return 0;
132 }
133
134
135 /* Is fd found in the sorted Python Sequence? */
136 static int
_is_fd_in_sorted_fd_sequence(int fd,PyObject * fd_sequence)137 _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
138 {
139 /* Binary search. */
140 Py_ssize_t search_min = 0;
141 Py_ssize_t search_max = PyTuple_GET_SIZE(fd_sequence) - 1;
142 if (search_max < 0)
143 return 0;
144 do {
145 long middle = (search_min + search_max) / 2;
146 long middle_fd = PyLong_AsLong(PyTuple_GET_ITEM(fd_sequence, middle));
147 if (fd == middle_fd)
148 return 1;
149 if (fd > middle_fd)
150 search_min = middle + 1;
151 else
152 search_max = middle - 1;
153 } while (search_min <= search_max);
154 return 0;
155 }
156
157 static int
make_inheritable(PyObject * py_fds_to_keep,int errpipe_write)158 make_inheritable(PyObject *py_fds_to_keep, int errpipe_write)
159 {
160 Py_ssize_t i, len;
161
162 len = PyTuple_GET_SIZE(py_fds_to_keep);
163 for (i = 0; i < len; ++i) {
164 PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
165 long fd = PyLong_AsLong(fdobj);
166 assert(!PyErr_Occurred());
167 assert(0 <= fd && fd <= INT_MAX);
168 if (fd == errpipe_write) {
169 /* errpipe_write is part of py_fds_to_keep. It must be closed at
170 exec(), but kept open in the child process until exec() is
171 called. */
172 continue;
173 }
174 if (_Py_set_inheritable_async_safe((int)fd, 1, NULL) < 0)
175 return -1;
176 }
177 return 0;
178 }
179
180
181 /* Get the maximum file descriptor that could be opened by this process.
182 * This function is async signal safe for use between fork() and exec().
183 */
184 static long
safe_get_max_fd(void)185 safe_get_max_fd(void)
186 {
187 long local_max_fd;
188 #if defined(__NetBSD__)
189 local_max_fd = fcntl(0, F_MAXFD);
190 if (local_max_fd >= 0)
191 return local_max_fd;
192 #endif
193 #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
194 struct rlimit rl;
195 /* Not on the POSIX async signal safe functions list but likely
196 * safe. TODO - Someone should audit OpenBSD to make sure. */
197 if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
198 return (long) rl.rlim_max;
199 #endif
200 #ifdef _SC_OPEN_MAX
201 local_max_fd = sysconf(_SC_OPEN_MAX);
202 if (local_max_fd == -1)
203 #endif
204 local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */
205 return local_max_fd;
206 }
207
208
209 /* Close all file descriptors in the range from start_fd and higher
210 * except for those in py_fds_to_keep. If the range defined by
211 * [start_fd, safe_get_max_fd()) is large this will take a long
212 * time as it calls close() on EVERY possible fd.
213 *
214 * It isn't possible to know for sure what the max fd to go up to
215 * is for processes with the capability of raising their maximum.
216 */
217 static void
_close_fds_by_brute_force(long start_fd,PyObject * py_fds_to_keep)218 _close_fds_by_brute_force(long start_fd, PyObject *py_fds_to_keep)
219 {
220 long end_fd = safe_get_max_fd();
221 Py_ssize_t num_fds_to_keep = PyTuple_GET_SIZE(py_fds_to_keep);
222 Py_ssize_t keep_seq_idx;
223 /* As py_fds_to_keep is sorted we can loop through the list closing
224 * fds in between any in the keep list falling within our range. */
225 for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
226 PyObject* py_keep_fd = PyTuple_GET_ITEM(py_fds_to_keep, keep_seq_idx);
227 int keep_fd = PyLong_AsLong(py_keep_fd);
228 if (keep_fd < start_fd)
229 continue;
230 _Py_closerange(start_fd, keep_fd - 1);
231 start_fd = keep_fd + 1;
232 }
233 if (start_fd <= end_fd) {
234 _Py_closerange(start_fd, end_fd);
235 }
236 }
237
238
239 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
240 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
241 * only to read a directory of short file descriptor number names. The kernel
242 * will return an error if we didn't give it enough space. Highly Unlikely.
243 * This structure is very old and stable: It will not change unless the kernel
244 * chooses to break compatibility with all existing binaries. Highly Unlikely.
245 */
246 struct linux_dirent64 {
247 unsigned long long d_ino;
248 long long d_off;
249 unsigned short d_reclen; /* Length of this linux_dirent */
250 unsigned char d_type;
251 char d_name[256]; /* Filename (null-terminated) */
252 };
253
254 /* Close all open file descriptors in the range from start_fd and higher
255 * Do not close any in the sorted py_fds_to_keep list.
256 *
257 * This version is async signal safe as it does not make any unsafe C library
258 * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
259 * to resort to making a kernel system call directly but this is the ONLY api
260 * available that does no harm. opendir/readdir/closedir perform memory
261 * allocation and locking so while they usually work they are not guaranteed
262 * to (especially if you have replaced your malloc implementation). A version
263 * of this function that uses those can be found in the _maybe_unsafe variant.
264 *
265 * This is Linux specific because that is all I am ready to test it on. It
266 * should be easy to add OS specific dirent or dirent64 structures and modify
267 * it with some cpp #define magic to work on other OSes as well if you want.
268 */
269 static void
_close_open_fds_safe(int start_fd,PyObject * py_fds_to_keep)270 _close_open_fds_safe(int start_fd, PyObject* py_fds_to_keep)
271 {
272 int fd_dir_fd;
273
274 fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
275 if (fd_dir_fd == -1) {
276 /* No way to get a list of open fds. */
277 _close_fds_by_brute_force(start_fd, py_fds_to_keep);
278 return;
279 } else {
280 char buffer[sizeof(struct linux_dirent64)];
281 int bytes;
282 while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
283 (struct linux_dirent64 *)buffer,
284 sizeof(buffer))) > 0) {
285 struct linux_dirent64 *entry;
286 int offset;
287 #ifdef _Py_MEMORY_SANITIZER
288 __msan_unpoison(buffer, bytes);
289 #endif
290 for (offset = 0; offset < bytes; offset += entry->d_reclen) {
291 int fd;
292 entry = (struct linux_dirent64 *)(buffer + offset);
293 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
294 continue; /* Not a number. */
295 if (fd != fd_dir_fd && fd >= start_fd &&
296 !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
297 close(fd);
298 }
299 }
300 }
301 close(fd_dir_fd);
302 }
303 }
304
305 #define _close_open_fds _close_open_fds_safe
306
307 #else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
308
309
310 /* Close all open file descriptors from start_fd and higher.
311 * Do not close any in the sorted py_fds_to_keep tuple.
312 *
313 * This function violates the strict use of async signal safe functions. :(
314 * It calls opendir(), readdir() and closedir(). Of these, the one most
315 * likely to ever cause a problem is opendir() as it performs an internal
316 * malloc(). Practically this should not be a problem. The Java VM makes the
317 * same calls between fork and exec in its own UNIXProcess_md.c implementation.
318 *
319 * readdir_r() is not used because it provides no benefit. It is typically
320 * implemented as readdir() followed by memcpy(). See also:
321 * http://womble.decadent.org.uk/readdir_r-advisory.html
322 */
323 static void
_close_open_fds_maybe_unsafe(long start_fd,PyObject * py_fds_to_keep)324 _close_open_fds_maybe_unsafe(long start_fd, PyObject* py_fds_to_keep)
325 {
326 DIR *proc_fd_dir;
327 #ifndef HAVE_DIRFD
328 while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep)) {
329 ++start_fd;
330 }
331 /* Close our lowest fd before we call opendir so that it is likely to
332 * reuse that fd otherwise we might close opendir's file descriptor in
333 * our loop. This trick assumes that fd's are allocated on a lowest
334 * available basis. */
335 close(start_fd);
336 ++start_fd;
337 #endif
338
339 #if defined(__FreeBSD__) || defined(__DragonFly__)
340 if (!_is_fdescfs_mounted_on_dev_fd())
341 proc_fd_dir = NULL;
342 else
343 #endif
344 proc_fd_dir = opendir(FD_DIR);
345 if (!proc_fd_dir) {
346 /* No way to get a list of open fds. */
347 _close_fds_by_brute_force(start_fd, py_fds_to_keep);
348 } else {
349 struct dirent *dir_entry;
350 #ifdef HAVE_DIRFD
351 int fd_used_by_opendir = dirfd(proc_fd_dir);
352 #else
353 int fd_used_by_opendir = start_fd - 1;
354 #endif
355 errno = 0;
356 while ((dir_entry = readdir(proc_fd_dir))) {
357 int fd;
358 if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
359 continue; /* Not a number. */
360 if (fd != fd_used_by_opendir && fd >= start_fd &&
361 !_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
362 close(fd);
363 }
364 errno = 0;
365 }
366 if (errno) {
367 /* readdir error, revert behavior. Highly Unlikely. */
368 _close_fds_by_brute_force(start_fd, py_fds_to_keep);
369 }
370 closedir(proc_fd_dir);
371 }
372 }
373
374 #define _close_open_fds _close_open_fds_maybe_unsafe
375
376 #endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
377
378
379 #ifdef VFORK_USABLE
380 /* Reset dispositions for all signals to SIG_DFL except for ignored
381 * signals. This way we ensure that no signal handlers can run
382 * after we unblock signals in a child created by vfork().
383 */
384 static void
reset_signal_handlers(const sigset_t * child_sigmask)385 reset_signal_handlers(const sigset_t *child_sigmask)
386 {
387 struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
388 for (int sig = 1; sig < _NSIG; sig++) {
389 /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
390 if (sig == SIGKILL || sig == SIGSTOP) {
391 continue;
392 }
393
394 /* There is no need to reset the disposition of signals that will
395 * remain blocked across execve() since the kernel will do it. */
396 if (sigismember(child_sigmask, sig) == 1) {
397 continue;
398 }
399
400 struct sigaction sa;
401 /* C libraries usually return EINVAL for signals used
402 * internally (e.g. for thread cancellation), so simply
403 * skip errors here. */
404 if (sigaction(sig, NULL, &sa) == -1) {
405 continue;
406 }
407
408 /* void *h works as these fields are both pointer types already. */
409 void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
410 (void *)sa.sa_handler);
411 if (h == SIG_IGN || h == SIG_DFL) {
412 continue;
413 }
414
415 /* This call can't reasonably fail, but if it does, terminating
416 * the child seems to be too harsh, so ignore errors. */
417 (void) sigaction(sig, &sa_dfl, NULL);
418 }
419 }
420 #endif /* VFORK_USABLE */
421
422
423 /*
424 * This function is code executed in the child process immediately after
425 * (v)fork to set things up and call exec().
426 *
427 * All of the code in this function must only use async-signal-safe functions,
428 * listed at `man 7 signal` or
429 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
430 *
431 * This restriction is documented at
432 * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
433 *
434 * If this function is called after vfork(), even more care must be taken.
435 * The lack of preparations that C libraries normally take on fork(),
436 * as well as sharing the address space with the parent, might make even
437 * async-signal-safe functions vfork-unsafe. In particular, on Linux,
438 * set*id() and setgroups() library functions must not be called, since
439 * they have to interact with the library-level thread list and send
440 * library-internal signals to implement per-process credentials semantics
441 * required by POSIX but not supported natively on Linux. Another reason to
442 * avoid this family of functions is that sharing an address space between
443 * processes running with different privileges is inherently insecure.
444 * See bpo-35823 for further discussion and references.
445 *
446 * In some C libraries, setrlimit() has the same thread list/signalling
447 * behavior since resource limits were per-thread attributes before
448 * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
449 * (https://www.openwall.com/lists/musl/2020/10/15/6).
450 *
451 * If vfork-unsafe functionality is desired after vfork(), consider using
452 * syscall() to obtain it.
453 */
454 _Py_NO_INLINE static void
child_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,int call_setgid,gid_t gid,int call_setgroups,size_t groups_size,const gid_t * groups,int call_setuid,uid_t uid,int child_umask,const void * child_sigmask,PyObject * py_fds_to_keep,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)455 child_exec(char *const exec_array[],
456 char *const argv[],
457 char *const envp[],
458 const char *cwd,
459 int p2cread, int p2cwrite,
460 int c2pread, int c2pwrite,
461 int errread, int errwrite,
462 int errpipe_read, int errpipe_write,
463 int close_fds, int restore_signals,
464 int call_setsid,
465 int call_setgid, gid_t gid,
466 int call_setgroups, size_t groups_size, const gid_t *groups,
467 int call_setuid, uid_t uid, int child_umask,
468 const void *child_sigmask,
469 PyObject *py_fds_to_keep,
470 PyObject *preexec_fn,
471 PyObject *preexec_fn_args_tuple)
472 {
473 int i, saved_errno, reached_preexec = 0;
474 PyObject *result;
475 const char* err_msg = "";
476 /* Buffer large enough to hold a hex integer. We can't malloc. */
477 char hex_errno[sizeof(saved_errno)*2+1];
478
479 if (make_inheritable(py_fds_to_keep, errpipe_write) < 0)
480 goto error;
481
482 /* Close parent's pipe ends. */
483 if (p2cwrite != -1)
484 POSIX_CALL(close(p2cwrite));
485 if (c2pread != -1)
486 POSIX_CALL(close(c2pread));
487 if (errread != -1)
488 POSIX_CALL(close(errread));
489 POSIX_CALL(close(errpipe_read));
490
491 /* When duping fds, if there arises a situation where one of the fds is
492 either 0, 1 or 2, it is possible that it is overwritten (#12607). */
493 if (c2pwrite == 0) {
494 POSIX_CALL(c2pwrite = dup(c2pwrite));
495 /* issue32270 */
496 if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
497 goto error;
498 }
499 }
500 while (errwrite == 0 || errwrite == 1) {
501 POSIX_CALL(errwrite = dup(errwrite));
502 /* issue32270 */
503 if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
504 goto error;
505 }
506 }
507
508 /* Dup fds for child.
509 dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
510 would be a no-op (issue #10806). */
511 if (p2cread == 0) {
512 if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
513 goto error;
514 }
515 else if (p2cread != -1)
516 POSIX_CALL(dup2(p2cread, 0)); /* stdin */
517
518 if (c2pwrite == 1) {
519 if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
520 goto error;
521 }
522 else if (c2pwrite != -1)
523 POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */
524
525 if (errwrite == 2) {
526 if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
527 goto error;
528 }
529 else if (errwrite != -1)
530 POSIX_CALL(dup2(errwrite, 2)); /* stderr */
531
532 /* We no longer manually close p2cread, c2pwrite, and errwrite here as
533 * _close_open_fds takes care when it is not already non-inheritable. */
534
535 if (cwd)
536 POSIX_CALL(chdir(cwd));
537
538 if (child_umask >= 0)
539 umask(child_umask); /* umask() always succeeds. */
540
541 if (restore_signals)
542 _Py_RestoreSignals();
543
544 #ifdef VFORK_USABLE
545 if (child_sigmask) {
546 reset_signal_handlers(child_sigmask);
547 if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
548 goto error;
549 }
550 }
551 #endif
552
553 #ifdef HAVE_SETSID
554 if (call_setsid)
555 POSIX_CALL(setsid());
556 #endif
557
558 #ifdef HAVE_SETGROUPS
559 if (call_setgroups)
560 POSIX_CALL(setgroups(groups_size, groups));
561 #endif /* HAVE_SETGROUPS */
562
563 #ifdef HAVE_SETREGID
564 if (call_setgid)
565 POSIX_CALL(setregid(gid, gid));
566 #endif /* HAVE_SETREGID */
567
568 #ifdef HAVE_SETREUID
569 if (call_setuid)
570 POSIX_CALL(setreuid(uid, uid));
571 #endif /* HAVE_SETREUID */
572
573
574 reached_preexec = 1;
575 if (preexec_fn != Py_None && preexec_fn_args_tuple) {
576 /* This is where the user has asked us to deadlock their program. */
577 result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
578 if (result == NULL) {
579 /* Stringifying the exception or traceback would involve
580 * memory allocation and thus potential for deadlock.
581 * We've already faced potential deadlock by calling back
582 * into Python in the first place, so it probably doesn't
583 * matter but we avoid it to minimize the possibility. */
584 err_msg = "Exception occurred in preexec_fn.";
585 errno = 0; /* We don't want to report an OSError. */
586 goto error;
587 }
588 /* Py_DECREF(result); - We're about to exec so why bother? */
589 }
590
591 /* close FDs after executing preexec_fn, which might open FDs */
592 if (close_fds) {
593 /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
594 _close_open_fds(3, py_fds_to_keep);
595 }
596
597 /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
598 /* given the executable_list generated by Lib/subprocess.py. */
599 saved_errno = 0;
600 for (i = 0; exec_array[i] != NULL; ++i) {
601 const char *executable = exec_array[i];
602 if (envp) {
603 execve(executable, argv, envp);
604 } else {
605 execv(executable, argv);
606 }
607 if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
608 saved_errno = errno;
609 }
610 }
611 /* Report the first exec error, not the last. */
612 if (saved_errno)
613 errno = saved_errno;
614
615 error:
616 saved_errno = errno;
617 /* Report the posix error to our parent process. */
618 /* We ignore all write() return values as the total size of our writes is
619 less than PIPEBUF and we cannot do anything about an error anyways.
620 Use _Py_write_noraise() to retry write() if it is interrupted by a
621 signal (fails with EINTR). */
622 if (saved_errno) {
623 char *cur;
624 _Py_write_noraise(errpipe_write, "OSError:", 8);
625 cur = hex_errno + sizeof(hex_errno);
626 while (saved_errno != 0 && cur != hex_errno) {
627 *--cur = Py_hexdigits[saved_errno % 16];
628 saved_errno /= 16;
629 }
630 _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
631 _Py_write_noraise(errpipe_write, ":", 1);
632 if (!reached_preexec) {
633 /* Indicate to the parent that the error happened before exec(). */
634 _Py_write_noraise(errpipe_write, "noexec", 6);
635 }
636 /* We can't call strerror(saved_errno). It is not async signal safe.
637 * The parent process will look the error message up. */
638 } else {
639 _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
640 _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
641 }
642 }
643
644
645 /* The main purpose of this wrapper function is to isolate vfork() from both
646 * subprocess_fork_exec() and child_exec(). A child process created via
647 * vfork() executes on the same stack as the parent process while the latter is
648 * suspended, so this function should not be inlined to avoid compiler bugs
649 * that might clobber data needed by the parent later. Additionally,
650 * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
651 * GCC (see bpo-35823).
652 */
653 _Py_NO_INLINE static pid_t
do_fork_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,int call_setgid,gid_t gid,int call_setgroups,size_t groups_size,const gid_t * groups,int call_setuid,uid_t uid,int child_umask,const void * child_sigmask,PyObject * py_fds_to_keep,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)654 do_fork_exec(char *const exec_array[],
655 char *const argv[],
656 char *const envp[],
657 const char *cwd,
658 int p2cread, int p2cwrite,
659 int c2pread, int c2pwrite,
660 int errread, int errwrite,
661 int errpipe_read, int errpipe_write,
662 int close_fds, int restore_signals,
663 int call_setsid,
664 int call_setgid, gid_t gid,
665 int call_setgroups, size_t groups_size, const gid_t *groups,
666 int call_setuid, uid_t uid, int child_umask,
667 const void *child_sigmask,
668 PyObject *py_fds_to_keep,
669 PyObject *preexec_fn,
670 PyObject *preexec_fn_args_tuple)
671 {
672
673 pid_t pid;
674
675 #ifdef VFORK_USABLE
676 if (child_sigmask) {
677 /* These are checked by our caller; verify them in debug builds. */
678 assert(!call_setuid);
679 assert(!call_setgid);
680 assert(!call_setgroups);
681 assert(preexec_fn == Py_None);
682
683 pid = vfork();
684 } else
685 #endif
686 {
687 pid = fork();
688 }
689
690 if (pid != 0) {
691 return pid;
692 }
693
694 /* Child process.
695 * See the comment above child_exec() for restrictions imposed on
696 * the code below.
697 */
698
699 if (preexec_fn != Py_None) {
700 /* We'll be calling back into Python later so we need to do this.
701 * This call may not be async-signal-safe but neither is calling
702 * back into Python. The user asked us to use hope as a strategy
703 * to avoid deadlock... */
704 PyOS_AfterFork_Child();
705 }
706
707 child_exec(exec_array, argv, envp, cwd,
708 p2cread, p2cwrite, c2pread, c2pwrite,
709 errread, errwrite, errpipe_read, errpipe_write,
710 close_fds, restore_signals, call_setsid,
711 call_setgid, gid, call_setgroups, groups_size, groups,
712 call_setuid, uid, child_umask, child_sigmask,
713 py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
714 _exit(255);
715 return 0; /* Dead code to avoid a potential compiler warning. */
716 }
717
718
719 static PyObject *
subprocess_fork_exec(PyObject * module,PyObject * args)720 subprocess_fork_exec(PyObject *module, PyObject *args)
721 {
722 PyObject *gc_module = NULL;
723 PyObject *executable_list, *py_fds_to_keep;
724 PyObject *env_list, *preexec_fn;
725 PyObject *process_args, *converted_args = NULL, *fast_args = NULL;
726 PyObject *preexec_fn_args_tuple = NULL;
727 PyObject *groups_list;
728 PyObject *uid_object, *gid_object;
729 int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite;
730 int errpipe_read, errpipe_write, close_fds, restore_signals;
731 int call_setsid;
732 int call_setgid = 0, call_setgroups = 0, call_setuid = 0;
733 uid_t uid;
734 gid_t gid, *groups = NULL;
735 int child_umask;
736 PyObject *cwd_obj, *cwd_obj2 = NULL;
737 const char *cwd;
738 pid_t pid = -1;
739 int need_to_reenable_gc = 0;
740 char *const *exec_array, *const *argv = NULL, *const *envp = NULL;
741 Py_ssize_t arg_num, num_groups = 0;
742 int need_after_fork = 0;
743 int saved_errno = 0;
744
745 if (!PyArg_ParseTuple(
746 args, "OOpO!OOiiiiiiiiiiOOOiO:fork_exec",
747 &process_args, &executable_list,
748 &close_fds, &PyTuple_Type, &py_fds_to_keep,
749 &cwd_obj, &env_list,
750 &p2cread, &p2cwrite, &c2pread, &c2pwrite,
751 &errread, &errwrite, &errpipe_read, &errpipe_write,
752 &restore_signals, &call_setsid,
753 &gid_object, &groups_list, &uid_object, &child_umask,
754 &preexec_fn))
755 return NULL;
756
757 if ((preexec_fn != Py_None) &&
758 (PyInterpreterState_Get() != PyInterpreterState_Main())) {
759 PyErr_SetString(PyExc_RuntimeError,
760 "preexec_fn not supported within subinterpreters");
761 return NULL;
762 }
763
764 if (close_fds && errpipe_write < 3) { /* precondition */
765 PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
766 return NULL;
767 }
768 if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
769 PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
770 return NULL;
771 }
772
773 PyInterpreterState *interp = PyInterpreterState_Get();
774 const PyConfig *config = _PyInterpreterState_GetConfig(interp);
775 if (config->_isolated_interpreter) {
776 PyErr_SetString(PyExc_RuntimeError,
777 "subprocess not supported for isolated subinterpreters");
778 return NULL;
779 }
780
781 /* We need to call gc.disable() when we'll be calling preexec_fn */
782 if (preexec_fn != Py_None) {
783 need_to_reenable_gc = PyGC_Disable();
784 }
785
786 exec_array = _PySequence_BytesToCharpArray(executable_list);
787 if (!exec_array)
788 goto cleanup;
789
790 /* Convert args and env into appropriate arguments for exec() */
791 /* These conversions are done in the parent process to avoid allocating
792 or freeing memory in the child process. */
793 if (process_args != Py_None) {
794 Py_ssize_t num_args;
795 /* Equivalent to: */
796 /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */
797 fast_args = PySequence_Fast(process_args, "argv must be a tuple");
798 if (fast_args == NULL)
799 goto cleanup;
800 num_args = PySequence_Fast_GET_SIZE(fast_args);
801 converted_args = PyTuple_New(num_args);
802 if (converted_args == NULL)
803 goto cleanup;
804 for (arg_num = 0; arg_num < num_args; ++arg_num) {
805 PyObject *borrowed_arg, *converted_arg;
806 if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
807 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
808 goto cleanup;
809 }
810 borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
811 if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
812 goto cleanup;
813 PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
814 }
815
816 argv = _PySequence_BytesToCharpArray(converted_args);
817 Py_CLEAR(converted_args);
818 Py_CLEAR(fast_args);
819 if (!argv)
820 goto cleanup;
821 }
822
823 if (env_list != Py_None) {
824 envp = _PySequence_BytesToCharpArray(env_list);
825 if (!envp)
826 goto cleanup;
827 }
828
829 if (cwd_obj != Py_None) {
830 if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
831 goto cleanup;
832 cwd = PyBytes_AsString(cwd_obj2);
833 } else {
834 cwd = NULL;
835 }
836
837 if (groups_list != Py_None) {
838 #ifdef HAVE_SETGROUPS
839 Py_ssize_t i;
840 gid_t gid;
841
842 if (!PyList_Check(groups_list)) {
843 PyErr_SetString(PyExc_TypeError,
844 "setgroups argument must be a list");
845 goto cleanup;
846 }
847 num_groups = PySequence_Size(groups_list);
848
849 if (num_groups < 0)
850 goto cleanup;
851
852 if (num_groups > MAX_GROUPS) {
853 PyErr_SetString(PyExc_ValueError, "too many groups");
854 goto cleanup;
855 }
856
857 if ((groups = PyMem_RawMalloc(num_groups * sizeof(gid_t))) == NULL) {
858 PyErr_SetString(PyExc_MemoryError,
859 "failed to allocate memory for group list");
860 goto cleanup;
861 }
862
863 for (i = 0; i < num_groups; i++) {
864 PyObject *elem;
865 elem = PySequence_GetItem(groups_list, i);
866 if (!elem)
867 goto cleanup;
868 if (!PyLong_Check(elem)) {
869 PyErr_SetString(PyExc_TypeError,
870 "groups must be integers");
871 Py_DECREF(elem);
872 goto cleanup;
873 } else {
874 if (!_Py_Gid_Converter(elem, &gid)) {
875 Py_DECREF(elem);
876 PyErr_SetString(PyExc_ValueError, "invalid group id");
877 goto cleanup;
878 }
879 groups[i] = gid;
880 }
881 Py_DECREF(elem);
882 }
883 call_setgroups = 1;
884
885 #else /* HAVE_SETGROUPS */
886 PyErr_BadInternalCall();
887 goto cleanup;
888 #endif /* HAVE_SETGROUPS */
889 }
890
891 if (gid_object != Py_None) {
892 #ifdef HAVE_SETREGID
893 if (!_Py_Gid_Converter(gid_object, &gid))
894 goto cleanup;
895
896 call_setgid = 1;
897
898 #else /* HAVE_SETREGID */
899 PyErr_BadInternalCall();
900 goto cleanup;
901 #endif /* HAVE_SETREUID */
902 }
903
904 if (uid_object != Py_None) {
905 #ifdef HAVE_SETREUID
906 if (!_Py_Uid_Converter(uid_object, &uid))
907 goto cleanup;
908
909 call_setuid = 1;
910
911 #else /* HAVE_SETREUID */
912 PyErr_BadInternalCall();
913 goto cleanup;
914 #endif /* HAVE_SETREUID */
915 }
916
917 /* This must be the last thing done before fork() because we do not
918 * want to call PyOS_BeforeFork() if there is any chance of another
919 * error leading to the cleanup: code without calling fork(). */
920 if (preexec_fn != Py_None) {
921 preexec_fn_args_tuple = PyTuple_New(0);
922 if (!preexec_fn_args_tuple)
923 goto cleanup;
924 PyOS_BeforeFork();
925 need_after_fork = 1;
926 }
927
928 /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
929 const void *old_sigmask = NULL;
930 #ifdef VFORK_USABLE
931 /* Use vfork() only if it's safe. See the comment above child_exec(). */
932 sigset_t old_sigs;
933 if (preexec_fn == Py_None &&
934 !call_setuid && !call_setgid && !call_setgroups) {
935 /* Block all signals to ensure that no signal handlers are run in the
936 * child process while it shares memory with us. Note that signals
937 * used internally by C libraries won't be blocked by
938 * pthread_sigmask(), but signal handlers installed by C libraries
939 * normally service only signals originating from *within the process*,
940 * so it should be sufficient to consider any library function that
941 * might send such a signal to be vfork-unsafe and do not call it in
942 * the child.
943 */
944 sigset_t all_sigs;
945 sigfillset(&all_sigs);
946 if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
947 goto cleanup;
948 }
949 old_sigmask = &old_sigs;
950 }
951 #endif
952
953 pid = do_fork_exec(exec_array, argv, envp, cwd,
954 p2cread, p2cwrite, c2pread, c2pwrite,
955 errread, errwrite, errpipe_read, errpipe_write,
956 close_fds, restore_signals, call_setsid,
957 call_setgid, gid, call_setgroups, num_groups, groups,
958 call_setuid, uid, child_umask, old_sigmask,
959 py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
960
961 /* Parent (original) process */
962 if (pid == -1) {
963 /* Capture errno for the exception. */
964 saved_errno = errno;
965 }
966
967 #ifdef VFORK_USABLE
968 if (old_sigmask) {
969 /* vfork() semantics guarantees that the parent is blocked
970 * until the child performs _exit() or execve(), so it is safe
971 * to unblock signals once we're here.
972 * Note that in environments where vfork() is implemented as fork(),
973 * such as QEMU user-mode emulation, the parent won't be blocked,
974 * but it won't share the address space with the child,
975 * so it's still safe to unblock the signals.
976 *
977 * We don't handle errors here because this call can't fail
978 * if valid arguments are given, and because there is no good
979 * way for the caller to deal with a failure to restore
980 * the thread signal mask. */
981 (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
982 }
983 #endif
984
985 if (need_after_fork)
986 PyOS_AfterFork_Parent();
987
988 cleanup:
989 if (saved_errno != 0) {
990 errno = saved_errno;
991 /* We can't call this above as PyOS_AfterFork_Parent() calls back
992 * into Python code which would see the unreturned error. */
993 PyErr_SetFromErrno(PyExc_OSError);
994 }
995
996 Py_XDECREF(preexec_fn_args_tuple);
997 PyMem_RawFree(groups);
998 Py_XDECREF(cwd_obj2);
999 if (envp)
1000 _Py_FreeCharPArray(envp);
1001 Py_XDECREF(converted_args);
1002 Py_XDECREF(fast_args);
1003 if (argv)
1004 _Py_FreeCharPArray(argv);
1005 if (exec_array)
1006 _Py_FreeCharPArray(exec_array);
1007
1008 if (need_to_reenable_gc) {
1009 PyGC_Enable();
1010 }
1011 Py_XDECREF(gc_module);
1012
1013 return pid == -1 ? NULL : PyLong_FromPid(pid);
1014 }
1015
1016
1017 PyDoc_STRVAR(subprocess_fork_exec_doc,
1018 "fork_exec(args, executable_list, close_fds, pass_fds, cwd, env,\n\
1019 p2cread, p2cwrite, c2pread, c2pwrite,\n\
1020 errread, errwrite, errpipe_read, errpipe_write,\n\
1021 restore_signals, call_setsid,\n\
1022 gid, groups_list, uid,\n\
1023 preexec_fn)\n\
1024 \n\
1025 Forks a child process, closes parent file descriptors as appropriate in the\n\
1026 child and dups the few that are needed before calling exec() in the child\n\
1027 process.\n\
1028 \n\
1029 If close_fds is true, close file descriptors 3 and higher, except those listed\n\
1030 in the sorted tuple pass_fds.\n\
1031 \n\
1032 The preexec_fn, if supplied, will be called immediately before closing file\n\
1033 descriptors and exec.\n\
1034 WARNING: preexec_fn is NOT SAFE if your application uses threads.\n\
1035 It may trigger infrequent, difficult to debug deadlocks.\n\
1036 \n\
1037 If an error occurs in the child process before the exec, it is\n\
1038 serialized and written to the errpipe_write fd per subprocess.py.\n\
1039 \n\
1040 Returns: the child process's PID.\n\
1041 \n\
1042 Raises: Only on an error in the parent process.\n\
1043 ");
1044
1045 /* module level code ********************************************************/
1046
1047 PyDoc_STRVAR(module_doc,
1048 "A POSIX helper for the subprocess module.");
1049
1050 static PyMethodDef module_methods[] = {
1051 {"fork_exec", subprocess_fork_exec, METH_VARARGS, subprocess_fork_exec_doc},
1052 {NULL, NULL} /* sentinel */
1053 };
1054
1055 static PyModuleDef_Slot _posixsubprocess_slots[] = {
1056 {0, NULL}
1057 };
1058
1059 static struct PyModuleDef _posixsubprocessmodule = {
1060 PyModuleDef_HEAD_INIT,
1061 .m_name = "_posixsubprocess",
1062 .m_doc = module_doc,
1063 .m_size = 0,
1064 .m_methods = module_methods,
1065 .m_slots = _posixsubprocess_slots,
1066 };
1067
1068 PyMODINIT_FUNC
PyInit__posixsubprocess(void)1069 PyInit__posixsubprocess(void)
1070 {
1071 return PyModuleDef_Init(&_posixsubprocessmodule);
1072 }
1073