• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Authors: Gregory P. Smith & Jeffrey Yasskin */
2 #ifndef Py_BUILD_CORE_BUILTIN
3 #  define Py_BUILD_CORE_MODULE 1
4 #endif
5 
6 #include "Python.h"
7 #include "pycore_fileutils.h"
8 #include "pycore_pystate.h"
9 #include "pycore_signal.h"        // _Py_RestoreSignals()
10 #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
11 #  define _GNU_SOURCE
12 #endif
13 #include <unistd.h>               // close()
14 #include <fcntl.h>                // fcntl()
15 #ifdef HAVE_SYS_TYPES_H
16 #  include <sys/types.h>
17 #endif
18 #if defined(HAVE_SYS_STAT_H)
19 #  include <sys/stat.h>           // stat()
20 #endif
21 #ifdef HAVE_SYS_SYSCALL_H
22 #  include <sys/syscall.h>
23 #endif
24 #if defined(HAVE_SYS_RESOURCE_H)
25 #  include <sys/resource.h>
26 #endif
27 #ifdef HAVE_DIRENT_H
28 #  include <dirent.h>             // opendir()
29 #endif
30 #if defined(HAVE_SETGROUPS)
31 #  include <grp.h>                // setgroups()
32 #endif
33 
34 #include "posixmodule.h"
35 
36 #ifdef _Py_MEMORY_SANITIZER
37 # include <sanitizer/msan_interface.h>
38 #endif
39 
40 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
41 # include <sys/linux-syscalls.h>
42 # define SYS_getdents64  __NR_getdents64
43 #endif
44 
45 #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
46     defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
47 /* If this is ever expanded to non-Linux platforms, verify what calls are
48  * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
49 # include <signal.h>
50 # define VFORK_USABLE 1
51 #endif
52 
53 #if defined(__sun) && defined(__SVR4)
54 /* readdir64 is used to work around Solaris 9 bug 6395699. */
55 # define readdir readdir64
56 # define dirent dirent64
57 # if !defined(HAVE_DIRFD)
58 /* Some versions of Solaris lack dirfd(). */
59 #  define dirfd(dirp) ((dirp)->dd_fd)
60 #  define HAVE_DIRFD
61 # endif
62 #endif
63 
64 #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
65 # define FD_DIR "/dev/fd"
66 #else
67 # define FD_DIR "/proc/self/fd"
68 #endif
69 
70 #ifdef NGROUPS_MAX
71 #define MAX_GROUPS NGROUPS_MAX
72 #else
73 #define MAX_GROUPS 64
74 #endif
75 
76 #define POSIX_CALL(call)   do { if ((call) == -1) goto error; } while (0)
77 
78 static struct PyModuleDef _posixsubprocessmodule;
79 
80 /*[clinic input]
81 module _posixsubprocess
82 [clinic start generated code]*/
83 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c62211df27cf7334]*/
84 
85 /*[python input]
86 class pid_t_converter(CConverter):
87     type = 'pid_t'
88     format_unit = '" _Py_PARSE_PID "'
89 
90     def parse_arg(self, argname, displayname, *, limited_capi):
91         return self.format_code("""
92             {paramname} = PyLong_AsPid({argname});
93             if ({paramname} == -1 && PyErr_Occurred()) {{{{
94                 goto exit;
95             }}}}
96             """,
97             argname=argname)
98 [python start generated code]*/
99 /*[python end generated code: output=da39a3ee5e6b4b0d input=c94349aa1aad151d]*/
100 
101 #include "clinic/_posixsubprocess.c.h"
102 
103 /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
104 static int
_pos_int_from_ascii(const char * name)105 _pos_int_from_ascii(const char *name)
106 {
107     int num = 0;
108     while (*name >= '0' && *name <= '9') {
109         num = num * 10 + (*name - '0');
110         ++name;
111     }
112     if (*name)
113         return -1;  /* Non digit found, not a number. */
114     return num;
115 }
116 
117 
118 #if defined(__FreeBSD__) || defined(__DragonFly__)
119 /* When /dev/fd isn't mounted it is often a static directory populated
120  * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
121  * NetBSD and OpenBSD have a /proc fs available (though not necessarily
122  * mounted) and do not have fdescfs for /dev/fd.  MacOS X has a devfs
123  * that properly supports /dev/fd.
124  */
125 static int
_is_fdescfs_mounted_on_dev_fd(void)126 _is_fdescfs_mounted_on_dev_fd(void)
127 {
128     struct stat dev_stat;
129     struct stat dev_fd_stat;
130     if (stat("/dev", &dev_stat) != 0)
131         return 0;
132     if (stat(FD_DIR, &dev_fd_stat) != 0)
133         return 0;
134     if (dev_stat.st_dev == dev_fd_stat.st_dev)
135         return 0;  /* / == /dev == /dev/fd means it is static. #fail */
136     return 1;
137 }
138 #endif
139 
140 
141 /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
142 static int
_sanity_check_python_fd_sequence(PyObject * fd_sequence)143 _sanity_check_python_fd_sequence(PyObject *fd_sequence)
144 {
145     Py_ssize_t seq_idx;
146     long prev_fd = -1;
147     for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
148         PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
149         long iter_fd;
150         if (!PyLong_Check(py_fd)) {
151             return 1;
152         }
153         iter_fd = PyLong_AsLong(py_fd);
154         if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
155             /* Negative, overflow, unsorted, too big for a fd. */
156             return 1;
157         }
158         prev_fd = iter_fd;
159     }
160     return 0;
161 }
162 
163 
164 /* Is fd found in the sorted Python Sequence? */
165 static int
_is_fd_in_sorted_fd_sequence(int fd,int * fd_sequence,Py_ssize_t fd_sequence_len)166 _is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
167                              Py_ssize_t fd_sequence_len)
168 {
169     /* Binary search. */
170     Py_ssize_t search_min = 0;
171     Py_ssize_t search_max = fd_sequence_len - 1;
172     if (search_max < 0)
173         return 0;
174     do {
175         long middle = (search_min + search_max) / 2;
176         long middle_fd = fd_sequence[middle];
177         if (fd == middle_fd)
178             return 1;
179         if (fd > middle_fd)
180             search_min = middle + 1;
181         else
182             search_max = middle - 1;
183     } while (search_min <= search_max);
184     return 0;
185 }
186 
187 
188 // Forward declaration
189 static void _Py_FreeCharPArray(char *const array[]);
190 
191 /*
192  * Flatten a sequence of bytes() objects into a C array of
193  * NULL terminated string pointers with a NULL char* terminating the array.
194  * (ie: an argv or env list)
195  *
196  * Memory allocated for the returned list is allocated using PyMem_Malloc()
197  * and MUST be freed by _Py_FreeCharPArray().
198  */
199 static char *const *
_PySequence_BytesToCharpArray(PyObject * self)200 _PySequence_BytesToCharpArray(PyObject* self)
201 {
202     char **array;
203     Py_ssize_t i, argc;
204     PyObject *item = NULL;
205     Py_ssize_t size;
206 
207     argc = PySequence_Size(self);
208     if (argc == -1)
209         return NULL;
210 
211     assert(argc >= 0);
212 
213     if ((size_t)argc > (PY_SSIZE_T_MAX-sizeof(char *)) / sizeof(char *)) {
214         PyErr_NoMemory();
215         return NULL;
216     }
217 
218     array = PyMem_Malloc((argc + 1) * sizeof(char *));
219     if (array == NULL) {
220         PyErr_NoMemory();
221         return NULL;
222     }
223     for (i = 0; i < argc; ++i) {
224         char *data;
225         item = PySequence_GetItem(self, i);
226         if (item == NULL) {
227             /* NULL terminate before freeing. */
228             array[i] = NULL;
229             goto fail;
230         }
231         /* check for embedded null bytes */
232         if (PyBytes_AsStringAndSize(item, &data, NULL) < 0) {
233             /* NULL terminate before freeing. */
234             array[i] = NULL;
235             goto fail;
236         }
237         size = PyBytes_GET_SIZE(item) + 1;
238         array[i] = PyMem_Malloc(size);
239         if (!array[i]) {
240             PyErr_NoMemory();
241             goto fail;
242         }
243         memcpy(array[i], data, size);
244         Py_DECREF(item);
245     }
246     array[argc] = NULL;
247 
248     return array;
249 
250 fail:
251     Py_XDECREF(item);
252     _Py_FreeCharPArray(array);
253     return NULL;
254 }
255 
256 
257 /* Free's a NULL terminated char** array of C strings. */
258 static void
_Py_FreeCharPArray(char * const array[])259 _Py_FreeCharPArray(char *const array[])
260 {
261     Py_ssize_t i;
262     for (i = 0; array[i] != NULL; ++i) {
263         PyMem_Free(array[i]);
264     }
265     PyMem_Free((void*)array);
266 }
267 
268 
269 /*
270  * Do all the Python C API calls in the parent process to turn the pass_fds
271  * "py_fds_to_keep" tuple into a C array.  The caller owns allocation and
272  * freeing of the array.
273  *
274  * On error an unknown number of array elements may have been filled in.
275  * A Python exception has been set when an error is returned.
276  *
277  * Returns: -1 on error, 0 on success.
278  */
279 static int
convert_fds_to_keep_to_c(PyObject * py_fds_to_keep,int * c_fds_to_keep)280 convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
281 {
282     Py_ssize_t i, len;
283 
284     len = PyTuple_GET_SIZE(py_fds_to_keep);
285     for (i = 0; i < len; ++i) {
286         PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
287         long fd = PyLong_AsLong(fdobj);
288         if (fd == -1 && PyErr_Occurred()) {
289             return -1;
290         }
291         if (fd < 0 || fd > INT_MAX) {
292             PyErr_SetString(PyExc_ValueError,
293                             "fd out of range in fds_to_keep.");
294             return -1;
295         }
296         c_fds_to_keep[i] = (int)fd;
297     }
298     return 0;
299 }
300 
301 
302 /* This function must be async-signal-safe as it is called from child_exec()
303  * after fork() or vfork().
304  */
305 static int
make_inheritable(int * c_fds_to_keep,Py_ssize_t len,int errpipe_write)306 make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
307 {
308     Py_ssize_t i;
309 
310     for (i = 0; i < len; ++i) {
311         int fd = c_fds_to_keep[i];
312         if (fd == errpipe_write) {
313             /* errpipe_write is part of fds_to_keep. It must be closed at
314                exec(), but kept open in the child process until exec() is
315                called. */
316             continue;
317         }
318         if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
319             return -1;
320     }
321     return 0;
322 }
323 
324 
325 /* Get the maximum file descriptor that could be opened by this process.
326  * This function is async signal safe for use between fork() and exec().
327  */
328 static long
safe_get_max_fd(void)329 safe_get_max_fd(void)
330 {
331     long local_max_fd;
332 #if defined(__NetBSD__)
333     local_max_fd = fcntl(0, F_MAXFD);
334     if (local_max_fd >= 0)
335         return local_max_fd;
336 #endif
337 #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
338     struct rlimit rl;
339     /* Not on the POSIX async signal safe functions list but likely
340      * safe.  TODO - Someone should audit OpenBSD to make sure. */
341     if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
342         return (long) rl.rlim_max;
343 #endif
344 #ifdef _SC_OPEN_MAX
345     local_max_fd = sysconf(_SC_OPEN_MAX);
346     if (local_max_fd == -1)
347 #endif
348         local_max_fd = 256;  /* Matches legacy Lib/subprocess.py behavior. */
349     return local_max_fd;
350 }
351 
352 
353 /* Close all file descriptors in the given range except for those in
354  * fds_to_keep by invoking closer on each subrange.
355  *
356  * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
357  * possible to know for sure what the max fd to go up to is for
358  * processes with the capability of raising their maximum, or in case
359  * a process opened a high fd and then lowered its maximum.
360  */
361 static int
_close_range_except(int start_fd,int end_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len,int (* closer)(int,int))362 _close_range_except(int start_fd,
363                     int end_fd,
364                     int *fds_to_keep,
365                     Py_ssize_t fds_to_keep_len,
366                     int (*closer)(int, int))
367 {
368     if (end_fd == -1) {
369         end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
370     }
371     Py_ssize_t keep_seq_idx;
372     /* As fds_to_keep is sorted we can loop through the list closing
373      * fds in between any in the keep list falling within our range. */
374     for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
375         int keep_fd = fds_to_keep[keep_seq_idx];
376         if (keep_fd < start_fd)
377             continue;
378         if (closer(start_fd, keep_fd - 1) != 0)
379             return -1;
380         start_fd = keep_fd + 1;
381     }
382     if (start_fd <= end_fd) {
383         if (closer(start_fd, end_fd) != 0)
384             return -1;
385     }
386     return 0;
387 }
388 
389 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
390 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
391  * only to read a directory of short file descriptor number names.  The kernel
392  * will return an error if we didn't give it enough space.  Highly Unlikely.
393  * This structure is very old and stable: It will not change unless the kernel
394  * chooses to break compatibility with all existing binaries.  Highly Unlikely.
395  */
396 struct linux_dirent64 {
397    unsigned long long d_ino;
398    long long d_off;
399    unsigned short d_reclen;     /* Length of this linux_dirent */
400    unsigned char  d_type;
401    char           d_name[256];  /* Filename (null-terminated) */
402 };
403 
404 static int
_brute_force_closer(int first,int last)405 _brute_force_closer(int first, int last)
406 {
407     for (int i = first; i <= last; i++) {
408         /* Ignore errors */
409         (void)close(i);
410     }
411     return 0;
412 }
413 
414 /* Close all open file descriptors in the range from start_fd and higher
415  * Do not close any in the sorted fds_to_keep list.
416  *
417  * This version is async signal safe as it does not make any unsafe C library
418  * calls, malloc calls or handle any locks.  It is _unfortunate_ to be forced
419  * to resort to making a kernel system call directly but this is the ONLY api
420  * available that does no harm.  opendir/readdir/closedir perform memory
421  * allocation and locking so while they usually work they are not guaranteed
422  * to (especially if you have replaced your malloc implementation).  A version
423  * of this function that uses those can be found in the _maybe_unsafe variant.
424  *
425  * This is Linux specific because that is all I am ready to test it on.  It
426  * should be easy to add OS specific dirent or dirent64 structures and modify
427  * it with some cpp #define magic to work on other OSes as well if you want.
428  */
429 static void
_close_open_fds_safe(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)430 _close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
431 {
432     int fd_dir_fd;
433 
434     fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
435     if (fd_dir_fd == -1) {
436         /* No way to get a list of open fds. */
437         _close_range_except(start_fd, -1,
438                             fds_to_keep, fds_to_keep_len,
439                             _brute_force_closer);
440         return;
441     } else {
442         char buffer[sizeof(struct linux_dirent64)];
443         int bytes;
444         while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
445                                 (struct linux_dirent64 *)buffer,
446                                 sizeof(buffer))) > 0) {
447             struct linux_dirent64 *entry;
448             int offset;
449 #ifdef _Py_MEMORY_SANITIZER
450             __msan_unpoison(buffer, bytes);
451 #endif
452             for (offset = 0; offset < bytes; offset += entry->d_reclen) {
453                 int fd;
454                 entry = (struct linux_dirent64 *)(buffer + offset);
455                 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
456                     continue;  /* Not a number. */
457                 if (fd != fd_dir_fd && fd >= start_fd &&
458                     !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
459                                                   fds_to_keep_len)) {
460                     close(fd);
461                 }
462             }
463         }
464         close(fd_dir_fd);
465     }
466 }
467 
468 #define _close_open_fds_fallback _close_open_fds_safe
469 
470 #else  /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
471 
472 static int
_unsafe_closer(int first,int last)473 _unsafe_closer(int first, int last)
474 {
475     _Py_closerange(first, last);
476     return 0;
477 }
478 
479 /* Close all open file descriptors from start_fd and higher.
480  * Do not close any in the sorted fds_to_keep tuple.
481  *
482  * This function violates the strict use of async signal safe functions. :(
483  * It calls opendir(), readdir() and closedir().  Of these, the one most
484  * likely to ever cause a problem is opendir() as it performs an internal
485  * malloc().  Practically this should not be a problem.  The Java VM makes the
486  * same calls between fork and exec in its own UNIXProcess_md.c implementation.
487  *
488  * readdir_r() is not used because it provides no benefit.  It is typically
489  * implemented as readdir() followed by memcpy().  See also:
490  *   http://womble.decadent.org.uk/readdir_r-advisory.html
491  */
492 static void
_close_open_fds_maybe_unsafe(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)493 _close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
494                              Py_ssize_t fds_to_keep_len)
495 {
496     DIR *proc_fd_dir;
497 #ifndef HAVE_DIRFD
498     while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
499                                         fds_to_keep_len)) {
500         ++start_fd;
501     }
502     /* Close our lowest fd before we call opendir so that it is likely to
503      * reuse that fd otherwise we might close opendir's file descriptor in
504      * our loop.  This trick assumes that fd's are allocated on a lowest
505      * available basis. */
506     close(start_fd);
507     ++start_fd;
508 #endif
509 
510 #if defined(__FreeBSD__) || defined(__DragonFly__)
511     if (!_is_fdescfs_mounted_on_dev_fd())
512         proc_fd_dir = NULL;
513     else
514 #endif
515         proc_fd_dir = opendir(FD_DIR);
516     if (!proc_fd_dir) {
517         /* No way to get a list of open fds. */
518         _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
519                             _unsafe_closer);
520     } else {
521         struct dirent *dir_entry;
522 #ifdef HAVE_DIRFD
523         int fd_used_by_opendir = dirfd(proc_fd_dir);
524 #else
525         int fd_used_by_opendir = start_fd - 1;
526 #endif
527         errno = 0;
528         while ((dir_entry = readdir(proc_fd_dir))) {
529             int fd;
530             if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
531                 continue;  /* Not a number. */
532             if (fd != fd_used_by_opendir && fd >= start_fd &&
533                 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
534                                               fds_to_keep_len)) {
535                 close(fd);
536             }
537             errno = 0;
538         }
539         if (errno) {
540             /* readdir error, revert behavior. Highly Unlikely. */
541             _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
542                                 _unsafe_closer);
543         }
544         closedir(proc_fd_dir);
545     }
546 }
547 
548 #define _close_open_fds_fallback _close_open_fds_maybe_unsafe
549 
550 #endif  /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
551 
552 /* We can use close_range() library function only if it's known to be
553  * async-signal-safe.
554  *
555  * On Linux, glibc explicitly documents it to be a thin wrapper over
556  * the system call, and other C libraries are likely to follow glibc.
557  */
558 #if defined(HAVE_CLOSE_RANGE) && \
559     (defined(__linux__) || defined(__FreeBSD__))
560 #define HAVE_ASYNC_SAFE_CLOSE_RANGE
561 
562 static int
_close_range_closer(int first,int last)563 _close_range_closer(int first, int last)
564 {
565     return close_range(first, last, 0);
566 }
567 #endif
568 
569 static void
_close_open_fds(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)570 _close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
571 {
572 #ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
573     if (_close_range_except(
574             start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
575             _close_range_closer) == 0) {
576         return;
577     }
578 #endif
579     _close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
580 }
581 
582 #ifdef VFORK_USABLE
583 /* Reset dispositions for all signals to SIG_DFL except for ignored
584  * signals. This way we ensure that no signal handlers can run
585  * after we unblock signals in a child created by vfork().
586  */
587 static void
reset_signal_handlers(const sigset_t * child_sigmask)588 reset_signal_handlers(const sigset_t *child_sigmask)
589 {
590     struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
591     for (int sig = 1; sig < _NSIG; sig++) {
592         /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
593         if (sig == SIGKILL || sig == SIGSTOP) {
594             continue;
595         }
596 
597         /* There is no need to reset the disposition of signals that will
598          * remain blocked across execve() since the kernel will do it. */
599         if (sigismember(child_sigmask, sig) == 1) {
600             continue;
601         }
602 
603         struct sigaction sa;
604         /* C libraries usually return EINVAL for signals used
605          * internally (e.g. for thread cancellation), so simply
606          * skip errors here. */
607         if (sigaction(sig, NULL, &sa) == -1) {
608             continue;
609         }
610 
611         /* void *h works as these fields are both pointer types already. */
612         void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
613                                               (void *)sa.sa_handler);
614         if (h == SIG_IGN || h == SIG_DFL) {
615             continue;
616         }
617 
618         /* This call can't reasonably fail, but if it does, terminating
619          * the child seems to be too harsh, so ignore errors. */
620         (void) sigaction(sig, &sa_dfl, NULL);
621     }
622 }
623 #endif /* VFORK_USABLE */
624 
625 
626 /*
627  * This function is code executed in the child process immediately after
628  * (v)fork to set things up and call exec().
629  *
630  * All of the code in this function must only use async-signal-safe functions,
631  * listed at `man 7 signal` or
632  * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
633  *
634  * This restriction is documented at
635  * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
636  *
637  * If this function is called after vfork(), even more care must be taken.
638  * The lack of preparations that C libraries normally take on fork(),
639  * as well as sharing the address space with the parent, might make even
640  * async-signal-safe functions vfork-unsafe. In particular, on Linux,
641  * set*id() and setgroups() library functions must not be called, since
642  * they have to interact with the library-level thread list and send
643  * library-internal signals to implement per-process credentials semantics
644  * required by POSIX but not supported natively on Linux. Another reason to
645  * avoid this family of functions is that sharing an address space between
646  * processes running with different privileges is inherently insecure.
647  * See https://bugs.python.org/issue35823 for discussion and references.
648  *
649  * In some C libraries, setrlimit() has the same thread list/signalling
650  * behavior since resource limits were per-thread attributes before
651  * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
652  * (https://www.openwall.com/lists/musl/2020/10/15/6).
653  *
654  * If vfork-unsafe functionality is desired after vfork(), consider using
655  * syscall() to obtain it.
656  */
657 Py_NO_INLINE static void
child_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,pid_t pgid_to_set,gid_t gid,Py_ssize_t extra_group_size,const gid_t * extra_groups,uid_t uid,int child_umask,const void * child_sigmask,int * fds_to_keep,Py_ssize_t fds_to_keep_len,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)658 child_exec(char *const exec_array[],
659            char *const argv[],
660            char *const envp[],
661            const char *cwd,
662            int p2cread, int p2cwrite,
663            int c2pread, int c2pwrite,
664            int errread, int errwrite,
665            int errpipe_read, int errpipe_write,
666            int close_fds, int restore_signals,
667            int call_setsid, pid_t pgid_to_set,
668            gid_t gid,
669            Py_ssize_t extra_group_size, const gid_t *extra_groups,
670            uid_t uid, int child_umask,
671            const void *child_sigmask,
672            int *fds_to_keep, Py_ssize_t fds_to_keep_len,
673            PyObject *preexec_fn,
674            PyObject *preexec_fn_args_tuple)
675 {
676     int i, saved_errno;
677     PyObject *result;
678     /* Indicate to the parent that the error happened before exec(). */
679     const char *err_msg = "noexec";
680     /* Buffer large enough to hold a hex integer.  We can't malloc. */
681     char hex_errno[sizeof(saved_errno)*2+1];
682 
683     if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
684         goto error;
685 
686     /* Close parent's pipe ends. */
687     if (p2cwrite != -1)
688         POSIX_CALL(close(p2cwrite));
689     if (c2pread != -1)
690         POSIX_CALL(close(c2pread));
691     if (errread != -1)
692         POSIX_CALL(close(errread));
693     POSIX_CALL(close(errpipe_read));
694 
695     /* When duping fds, if there arises a situation where one of the fds is
696        either 0, 1 or 2, it is possible that it is overwritten (#12607). */
697     if (c2pwrite == 0) {
698         POSIX_CALL(c2pwrite = dup(c2pwrite));
699         /* issue32270 */
700         if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
701             goto error;
702         }
703     }
704     while (errwrite == 0 || errwrite == 1) {
705         POSIX_CALL(errwrite = dup(errwrite));
706         /* issue32270 */
707         if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
708             goto error;
709         }
710     }
711 
712     /* Dup fds for child.
713        dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
714        would be a no-op (issue #10806). */
715     if (p2cread == 0) {
716         if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
717             goto error;
718     }
719     else if (p2cread != -1)
720         POSIX_CALL(dup2(p2cread, 0));  /* stdin */
721 
722     if (c2pwrite == 1) {
723         if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
724             goto error;
725     }
726     else if (c2pwrite != -1)
727         POSIX_CALL(dup2(c2pwrite, 1));  /* stdout */
728 
729     if (errwrite == 2) {
730         if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
731             goto error;
732     }
733     else if (errwrite != -1)
734         POSIX_CALL(dup2(errwrite, 2));  /* stderr */
735 
736     /* We no longer manually close p2cread, c2pwrite, and errwrite here as
737      * _close_open_fds takes care when it is not already non-inheritable. */
738 
739     if (cwd) {
740         if (chdir(cwd) == -1) {
741             err_msg = "noexec:chdir";
742             goto error;
743         }
744     }
745 
746     if (child_umask >= 0)
747         umask(child_umask);  /* umask() always succeeds. */
748 
749     if (restore_signals) {
750         _Py_RestoreSignals();
751     }
752 
753 #ifdef VFORK_USABLE
754     if (child_sigmask) {
755         reset_signal_handlers(child_sigmask);
756         if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
757             goto error;
758         }
759     }
760 #endif
761 
762 #ifdef HAVE_SETSID
763     if (call_setsid)
764         POSIX_CALL(setsid());
765 #endif
766 
767 #ifdef HAVE_SETPGID
768     static_assert(_Py_IS_TYPE_SIGNED(pid_t), "pid_t is unsigned");
769     if (pgid_to_set >= 0) {
770         POSIX_CALL(setpgid(0, pgid_to_set));
771     }
772 #endif
773 
774 #ifdef HAVE_SETGROUPS
775     if (extra_group_size >= 0) {
776         assert((extra_group_size == 0) == (extra_groups == NULL));
777         POSIX_CALL(setgroups(extra_group_size, extra_groups));
778     }
779 #endif /* HAVE_SETGROUPS */
780 
781 #ifdef HAVE_SETREGID
782     if (gid != (gid_t)-1)
783         POSIX_CALL(setregid(gid, gid));
784 #endif /* HAVE_SETREGID */
785 
786 #ifdef HAVE_SETREUID
787     if (uid != (uid_t)-1)
788         POSIX_CALL(setreuid(uid, uid));
789 #endif /* HAVE_SETREUID */
790 
791 
792     err_msg = "";
793     if (preexec_fn != Py_None && preexec_fn_args_tuple) {
794         /* This is where the user has asked us to deadlock their program. */
795         result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
796         if (result == NULL) {
797             /* Stringifying the exception or traceback would involve
798              * memory allocation and thus potential for deadlock.
799              * We've already faced potential deadlock by calling back
800              * into Python in the first place, so it probably doesn't
801              * matter but we avoid it to minimize the possibility. */
802             err_msg = "Exception occurred in preexec_fn.";
803             errno = 0;  /* We don't want to report an OSError. */
804             goto error;
805         }
806         /* Py_DECREF(result); - We're about to exec so why bother? */
807     }
808 
809     /* close FDs after executing preexec_fn, which might open FDs */
810     if (close_fds) {
811         /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
812         _close_open_fds(3, fds_to_keep, fds_to_keep_len);
813     }
814 
815     /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
816     /* given the executable_list generated by Lib/subprocess.py.     */
817     saved_errno = 0;
818     for (i = 0; exec_array[i] != NULL; ++i) {
819         const char *executable = exec_array[i];
820         if (envp) {
821             execve(executable, argv, envp);
822         } else {
823             execv(executable, argv);
824         }
825         if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
826             saved_errno = errno;
827         }
828     }
829     /* Report the first exec error, not the last. */
830     if (saved_errno)
831         errno = saved_errno;
832 
833 error:
834     saved_errno = errno;
835     /* Report the posix error to our parent process. */
836     /* We ignore all write() return values as the total size of our writes is
837        less than PIPEBUF and we cannot do anything about an error anyways.
838        Use _Py_write_noraise() to retry write() if it is interrupted by a
839        signal (fails with EINTR). */
840     if (saved_errno) {
841         char *cur;
842         _Py_write_noraise(errpipe_write, "OSError:", 8);
843         cur = hex_errno + sizeof(hex_errno);
844         while (saved_errno != 0 && cur != hex_errno) {
845             *--cur = Py_hexdigits[saved_errno % 16];
846             saved_errno /= 16;
847         }
848         _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
849         _Py_write_noraise(errpipe_write, ":", 1);
850         /* We can't call strerror(saved_errno).  It is not async signal safe.
851          * The parent process will look the error message up. */
852     } else {
853         _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
854     }
855     _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
856 }
857 
858 
859 /* The main purpose of this wrapper function is to isolate vfork() from both
860  * subprocess_fork_exec() and child_exec(). A child process created via
861  * vfork() executes on the same stack as the parent process while the latter is
862  * suspended, so this function should not be inlined to avoid compiler bugs
863  * that might clobber data needed by the parent later. Additionally,
864  * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
865  * GCC (see bpo-35823).
866  */
867 Py_NO_INLINE static pid_t
do_fork_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,pid_t pgid_to_set,gid_t gid,Py_ssize_t extra_group_size,const gid_t * extra_groups,uid_t uid,int child_umask,const void * child_sigmask,int * fds_to_keep,Py_ssize_t fds_to_keep_len,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)868 do_fork_exec(char *const exec_array[],
869              char *const argv[],
870              char *const envp[],
871              const char *cwd,
872              int p2cread, int p2cwrite,
873              int c2pread, int c2pwrite,
874              int errread, int errwrite,
875              int errpipe_read, int errpipe_write,
876              int close_fds, int restore_signals,
877              int call_setsid, pid_t pgid_to_set,
878              gid_t gid,
879              Py_ssize_t extra_group_size, const gid_t *extra_groups,
880              uid_t uid, int child_umask,
881              const void *child_sigmask,
882              int *fds_to_keep, Py_ssize_t fds_to_keep_len,
883              PyObject *preexec_fn,
884              PyObject *preexec_fn_args_tuple)
885 {
886 
887     pid_t pid;
888 
889 #ifdef VFORK_USABLE
890     PyThreadState *vfork_tstate_save;
891     if (child_sigmask) {
892         /* These are checked by our caller; verify them in debug builds. */
893         assert(uid == (uid_t)-1);
894         assert(gid == (gid_t)-1);
895         assert(extra_group_size < 0);
896         assert(preexec_fn == Py_None);
897 
898         /* Drop the GIL so that other threads can continue execution while this
899          * thread in the parent remains blocked per vfork-semantics on the
900          * child's exec syscall outcome. Exec does filesystem access which
901          * can take an arbitrarily long time. This addresses GH-104372.
902          *
903          * The vfork'ed child still runs in our address space. Per POSIX it
904          * must be limited to nothing but exec, but the Linux implementation
905          * is a little more usable. See the child_exec() comment - The child
906          * MUST NOT re-acquire the GIL.
907          */
908         vfork_tstate_save = PyEval_SaveThread();
909         pid = vfork();
910         if (pid != 0) {
911             // Not in the child process, reacquire the GIL.
912             PyEval_RestoreThread(vfork_tstate_save);
913         }
914         if (pid == (pid_t)-1) {
915             /* If vfork() fails, fall back to using fork(). When it isn't
916              * allowed in a process by the kernel, vfork can return -1
917              * with errno EINVAL. https://bugs.python.org/issue47151. */
918             pid = fork();
919         }
920     } else
921 #endif
922     {
923         pid = fork();
924     }
925 
926     if (pid != 0) {
927         // Parent process.
928         return pid;
929     }
930 
931     /* Child process.
932      * See the comment above child_exec() for restrictions imposed on
933      * the code below.
934      */
935 
936     if (preexec_fn != Py_None) {
937         /* We'll be calling back into Python later so we need to do this.
938          * This call may not be async-signal-safe but neither is calling
939          * back into Python.  The user asked us to use hope as a strategy
940          * to avoid deadlock... */
941         PyOS_AfterFork_Child();
942     }
943 
944     child_exec(exec_array, argv, envp, cwd,
945                p2cread, p2cwrite, c2pread, c2pwrite,
946                errread, errwrite, errpipe_read, errpipe_write,
947                close_fds, restore_signals, call_setsid, pgid_to_set,
948                gid, extra_group_size, extra_groups,
949                uid, child_umask, child_sigmask,
950                fds_to_keep, fds_to_keep_len,
951                preexec_fn, preexec_fn_args_tuple);
952     _exit(255);
953     return 0;  /* Dead code to avoid a potential compiler warning. */
954 }
955 
956 /*[clinic input]
957 _posixsubprocess.fork_exec as subprocess_fork_exec
958     args as process_args: object
959     executable_list: object
960     close_fds: bool
961     pass_fds as py_fds_to_keep: object(subclass_of='&PyTuple_Type')
962     cwd as cwd_obj: object
963     env as env_list: object
964     p2cread: int
965     p2cwrite: int
966     c2pread: int
967     c2pwrite: int
968     errread: int
969     errwrite: int
970     errpipe_read: int
971     errpipe_write: int
972     restore_signals: bool
973     call_setsid: bool
974     pgid_to_set: pid_t
975     gid as gid_object: object
976     extra_groups as extra_groups_packed: object
977     uid as uid_object: object
978     child_umask: int
979     preexec_fn: object
980     allow_vfork: bool
981     /
982 
983 Spawn a fresh new child process.
984 
985 Fork a child process, close parent file descriptors as appropriate in the
986 child and duplicate the few that are needed before calling exec() in the
987 child process.
988 
989 If close_fds is True, close file descriptors 3 and higher, except those listed
990 in the sorted tuple pass_fds.
991 
992 The preexec_fn, if supplied, will be called immediately before closing file
993 descriptors and exec.
994 
995 WARNING: preexec_fn is NOT SAFE if your application uses threads.
996          It may trigger infrequent, difficult to debug deadlocks.
997 
998 If an error occurs in the child process before the exec, it is
999 serialized and written to the errpipe_write fd per subprocess.py.
1000 
1001 Returns: the child process's PID.
1002 
1003 Raises: Only on an error in the parent process.
1004 [clinic start generated code]*/
1005 
1006 static PyObject *
subprocess_fork_exec_impl(PyObject * module,PyObject * process_args,PyObject * executable_list,int close_fds,PyObject * py_fds_to_keep,PyObject * cwd_obj,PyObject * env_list,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int restore_signals,int call_setsid,pid_t pgid_to_set,PyObject * gid_object,PyObject * extra_groups_packed,PyObject * uid_object,int child_umask,PyObject * preexec_fn,int allow_vfork)1007 subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
1008                           PyObject *executable_list, int close_fds,
1009                           PyObject *py_fds_to_keep, PyObject *cwd_obj,
1010                           PyObject *env_list, int p2cread, int p2cwrite,
1011                           int c2pread, int c2pwrite, int errread,
1012                           int errwrite, int errpipe_read, int errpipe_write,
1013                           int restore_signals, int call_setsid,
1014                           pid_t pgid_to_set, PyObject *gid_object,
1015                           PyObject *extra_groups_packed,
1016                           PyObject *uid_object, int child_umask,
1017                           PyObject *preexec_fn, int allow_vfork)
1018 /*[clinic end generated code: output=7ee4f6ee5cf22b5b input=51757287ef266ffa]*/
1019 {
1020     PyObject *converted_args = NULL, *fast_args = NULL;
1021     PyObject *preexec_fn_args_tuple = NULL;
1022     gid_t *extra_groups = NULL;
1023     PyObject *cwd_obj2 = NULL;
1024     const char *cwd = NULL;
1025     pid_t pid = -1;
1026     int need_to_reenable_gc = 0;
1027     char *const *argv = NULL, *const *envp = NULL;
1028     int need_after_fork = 0;
1029     int saved_errno = 0;
1030     int *c_fds_to_keep = NULL;
1031     Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);
1032 
1033     PyInterpreterState *interp = _PyInterpreterState_GET();
1034     if ((preexec_fn != Py_None) &&
1035         _PyInterpreterState_GetFinalizing(interp) != NULL)
1036     {
1037         PyErr_SetString(PyExc_PythonFinalizationError,
1038                         "preexec_fn not supported at interpreter shutdown");
1039         return NULL;
1040     }
1041     if ((preexec_fn != Py_None) && (interp != PyInterpreterState_Main())) {
1042         PyErr_SetString(PyExc_RuntimeError,
1043                         "preexec_fn not supported within subinterpreters");
1044         return NULL;
1045     }
1046 
1047     if (close_fds && errpipe_write < 3) {  /* precondition */
1048         PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
1049         return NULL;
1050     }
1051     if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
1052         PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
1053         return NULL;
1054     }
1055 
1056     /* We need to call gc.disable() when we'll be calling preexec_fn */
1057     if (preexec_fn != Py_None) {
1058         need_to_reenable_gc = PyGC_Disable();
1059     }
1060 
1061     char *const *exec_array = _PySequence_BytesToCharpArray(executable_list);
1062     if (!exec_array)
1063         goto cleanup;
1064 
1065     /* Convert args and env into appropriate arguments for exec() */
1066     /* These conversions are done in the parent process to avoid allocating
1067        or freeing memory in the child process. */
1068     if (process_args != Py_None) {
1069         Py_ssize_t num_args;
1070         /* Equivalent to:  */
1071         /*  tuple(PyUnicode_FSConverter(arg) for arg in process_args)  */
1072         fast_args = PySequence_Fast(process_args, "argv must be a tuple");
1073         if (fast_args == NULL)
1074             goto cleanup;
1075         num_args = PySequence_Fast_GET_SIZE(fast_args);
1076         converted_args = PyTuple_New(num_args);
1077         if (converted_args == NULL)
1078             goto cleanup;
1079         for (Py_ssize_t arg_num = 0; arg_num < num_args; ++arg_num) {
1080             PyObject *borrowed_arg, *converted_arg;
1081             if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
1082                 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
1083                 goto cleanup;
1084             }
1085             borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
1086             if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
1087                 goto cleanup;
1088             PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
1089         }
1090 
1091         argv = _PySequence_BytesToCharpArray(converted_args);
1092         Py_CLEAR(converted_args);
1093         Py_CLEAR(fast_args);
1094         if (!argv)
1095             goto cleanup;
1096     }
1097 
1098     if (env_list != Py_None) {
1099         envp = _PySequence_BytesToCharpArray(env_list);
1100         if (!envp)
1101             goto cleanup;
1102     }
1103 
1104     if (cwd_obj != Py_None) {
1105         if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
1106             goto cleanup;
1107         cwd = PyBytes_AsString(cwd_obj2);
1108     }
1109 
1110     // Special initial value meaning that subprocess API was called with
1111     // extra_groups=None leading to _posixsubprocess.fork_exec(gids=None).
1112     // We use this to differentiate between code desiring a setgroups(0, NULL)
1113     // call vs no call at all.  The fast vfork() code path could be used when
1114     // there is no setgroups call.
1115     Py_ssize_t extra_group_size = -2;
1116 
1117     if (extra_groups_packed != Py_None) {
1118 #ifdef HAVE_SETGROUPS
1119         if (!PyList_Check(extra_groups_packed)) {
1120             PyErr_SetString(PyExc_TypeError,
1121                     "setgroups argument must be a list");
1122             goto cleanup;
1123         }
1124         extra_group_size = PySequence_Size(extra_groups_packed);
1125 
1126         if (extra_group_size < 0)
1127             goto cleanup;
1128 
1129         if (extra_group_size > MAX_GROUPS) {
1130             PyErr_SetString(PyExc_ValueError, "too many extra_groups");
1131             goto cleanup;
1132         }
1133 
1134         /* Deliberately keep extra_groups == NULL for extra_group_size == 0 */
1135         if (extra_group_size > 0) {
1136             extra_groups = PyMem_RawMalloc(extra_group_size * sizeof(gid_t));
1137             if (extra_groups == NULL) {
1138                 PyErr_SetString(PyExc_MemoryError,
1139                         "failed to allocate memory for group list");
1140                 goto cleanup;
1141             }
1142         }
1143 
1144         for (Py_ssize_t i = 0; i < extra_group_size; i++) {
1145             PyObject *elem;
1146             elem = PySequence_GetItem(extra_groups_packed, i);
1147             if (!elem)
1148                 goto cleanup;
1149             if (!PyLong_Check(elem)) {
1150                 PyErr_SetString(PyExc_TypeError,
1151                                 "extra_groups must be integers");
1152                 Py_DECREF(elem);
1153                 goto cleanup;
1154             } else {
1155                 gid_t gid;
1156                 if (!_Py_Gid_Converter(elem, &gid)) {
1157                     Py_DECREF(elem);
1158                     PyErr_SetString(PyExc_ValueError, "invalid group id");
1159                     goto cleanup;
1160                 }
1161                 extra_groups[i] = gid;
1162             }
1163             Py_DECREF(elem);
1164         }
1165 
1166 #else /* HAVE_SETGROUPS */
1167         PyErr_BadInternalCall();
1168         goto cleanup;
1169 #endif /* HAVE_SETGROUPS */
1170     }
1171 
1172     gid_t gid = (gid_t)-1;
1173     if (gid_object != Py_None) {
1174 #ifdef HAVE_SETREGID
1175         if (!_Py_Gid_Converter(gid_object, &gid))
1176             goto cleanup;
1177 
1178 #else /* HAVE_SETREGID */
1179         PyErr_BadInternalCall();
1180         goto cleanup;
1181 #endif /* HAVE_SETREUID */
1182     }
1183 
1184     uid_t uid = (uid_t)-1;
1185     if (uid_object != Py_None) {
1186 #ifdef HAVE_SETREUID
1187         if (!_Py_Uid_Converter(uid_object, &uid))
1188             goto cleanup;
1189 
1190 #else /* HAVE_SETREUID */
1191         PyErr_BadInternalCall();
1192         goto cleanup;
1193 #endif /* HAVE_SETREUID */
1194     }
1195 
1196     c_fds_to_keep = PyMem_Malloc(fds_to_keep_len * sizeof(int));
1197     if (c_fds_to_keep == NULL) {
1198         PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
1199         goto cleanup;
1200     }
1201     if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
1202         goto cleanup;
1203     }
1204 
1205     /* This must be the last thing done before fork() because we do not
1206      * want to call PyOS_BeforeFork() if there is any chance of another
1207      * error leading to the cleanup: code without calling fork(). */
1208     if (preexec_fn != Py_None) {
1209         preexec_fn_args_tuple = PyTuple_New(0);
1210         if (!preexec_fn_args_tuple)
1211             goto cleanup;
1212         PyOS_BeforeFork();
1213         need_after_fork = 1;
1214     }
1215 
1216     /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
1217     const void *old_sigmask = NULL;
1218 #ifdef VFORK_USABLE
1219     /* Use vfork() only if it's safe. See the comment above child_exec(). */
1220     sigset_t old_sigs;
1221     if (preexec_fn == Py_None && allow_vfork &&
1222         uid == (uid_t)-1 && gid == (gid_t)-1 && extra_group_size < 0) {
1223         /* Block all signals to ensure that no signal handlers are run in the
1224          * child process while it shares memory with us. Note that signals
1225          * used internally by C libraries won't be blocked by
1226          * pthread_sigmask(), but signal handlers installed by C libraries
1227          * normally service only signals originating from *within the process*,
1228          * so it should be sufficient to consider any library function that
1229          * might send such a signal to be vfork-unsafe and do not call it in
1230          * the child.
1231          */
1232         sigset_t all_sigs;
1233         sigfillset(&all_sigs);
1234         if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
1235             goto cleanup;
1236         }
1237         old_sigmask = &old_sigs;
1238     }
1239 #endif
1240 
1241     pid = do_fork_exec(exec_array, argv, envp, cwd,
1242                        p2cread, p2cwrite, c2pread, c2pwrite,
1243                        errread, errwrite, errpipe_read, errpipe_write,
1244                        close_fds, restore_signals, call_setsid, pgid_to_set,
1245                        gid, extra_group_size, extra_groups,
1246                        uid, child_umask, old_sigmask,
1247                        c_fds_to_keep, fds_to_keep_len,
1248                        preexec_fn, preexec_fn_args_tuple);
1249 
1250     /* Parent (original) process */
1251     if (pid == (pid_t)-1) {
1252         /* Capture errno for the exception. */
1253         saved_errno = errno;
1254     }
1255 
1256 #ifdef VFORK_USABLE
1257     if (old_sigmask) {
1258         /* vfork() semantics guarantees that the parent is blocked
1259          * until the child performs _exit() or execve(), so it is safe
1260          * to unblock signals once we're here.
1261          * Note that in environments where vfork() is implemented as fork(),
1262          * such as QEMU user-mode emulation, the parent won't be blocked,
1263          * but it won't share the address space with the child,
1264          * so it's still safe to unblock the signals.
1265          *
1266          * We don't handle errors here because this call can't fail
1267          * if valid arguments are given, and because there is no good
1268          * way for the caller to deal with a failure to restore
1269          * the thread signal mask. */
1270         (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
1271     }
1272 #endif
1273 
1274     if (need_after_fork)
1275         PyOS_AfterFork_Parent();
1276 
1277 cleanup:
1278     if (c_fds_to_keep != NULL) {
1279         PyMem_Free(c_fds_to_keep);
1280     }
1281 
1282     if (saved_errno != 0) {
1283         errno = saved_errno;
1284         /* We can't call this above as PyOS_AfterFork_Parent() calls back
1285          * into Python code which would see the unreturned error. */
1286         PyErr_SetFromErrno(PyExc_OSError);
1287     }
1288 
1289     Py_XDECREF(preexec_fn_args_tuple);
1290     PyMem_RawFree(extra_groups);
1291     Py_XDECREF(cwd_obj2);
1292     if (envp)
1293         _Py_FreeCharPArray(envp);
1294     Py_XDECREF(converted_args);
1295     Py_XDECREF(fast_args);
1296     if (argv)
1297         _Py_FreeCharPArray(argv);
1298     if (exec_array)
1299         _Py_FreeCharPArray(exec_array);
1300 
1301     if (need_to_reenable_gc) {
1302         PyGC_Enable();
1303     }
1304 
1305     return pid == -1 ? NULL : PyLong_FromPid(pid);
1306 }
1307 
1308 /* module level code ********************************************************/
1309 
1310 PyDoc_STRVAR(module_doc,
1311 "A POSIX helper for the subprocess module.");
1312 
1313 static PyMethodDef module_methods[] = {
1314     SUBPROCESS_FORK_EXEC_METHODDEF
1315     {NULL, NULL}  /* sentinel */
1316 };
1317 
1318 static PyModuleDef_Slot _posixsubprocess_slots[] = {
1319     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1320     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1321     {0, NULL}
1322 };
1323 
1324 static struct PyModuleDef _posixsubprocessmodule = {
1325         PyModuleDef_HEAD_INIT,
1326         .m_name = "_posixsubprocess",
1327         .m_doc = module_doc,
1328         .m_size = 0,
1329         .m_methods = module_methods,
1330         .m_slots = _posixsubprocess_slots,
1331 };
1332 
1333 PyMODINIT_FUNC
PyInit__posixsubprocess(void)1334 PyInit__posixsubprocess(void)
1335 {
1336     return PyModuleDef_Init(&_posixsubprocessmodule);
1337 }
1338