1 /* Authors: Gregory P. Smith & Jeffrey Yasskin */
2 #ifndef Py_BUILD_CORE_BUILTIN
3 # define Py_BUILD_CORE_MODULE 1
4 #endif
5
6 #include "Python.h"
7 #include "pycore_fileutils.h"
8 #include "pycore_pystate.h"
9 #include "pycore_signal.h" // _Py_RestoreSignals()
10 #if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE)
11 # define _GNU_SOURCE
12 #endif
13 #include <unistd.h> // close()
14 #include <fcntl.h> // fcntl()
15 #ifdef HAVE_SYS_TYPES_H
16 # include <sys/types.h>
17 #endif
18 #if defined(HAVE_SYS_STAT_H)
19 # include <sys/stat.h> // stat()
20 #endif
21 #ifdef HAVE_SYS_SYSCALL_H
22 # include <sys/syscall.h>
23 #endif
24 #if defined(HAVE_SYS_RESOURCE_H)
25 # include <sys/resource.h>
26 #endif
27 #ifdef HAVE_DIRENT_H
28 # include <dirent.h> // opendir()
29 #endif
30 #if defined(HAVE_SETGROUPS)
31 # include <grp.h> // setgroups()
32 #endif
33
34 #include "posixmodule.h"
35
36 #ifdef _Py_MEMORY_SANITIZER
37 # include <sanitizer/msan_interface.h>
38 #endif
39
40 #if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64)
41 # include <sys/linux-syscalls.h>
42 # define SYS_getdents64 __NR_getdents64
43 #endif
44
45 #if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \
46 defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK)
47 /* If this is ever expanded to non-Linux platforms, verify what calls are
48 * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */
49 # include <signal.h>
50 # define VFORK_USABLE 1
51 #endif
52
53 #if defined(__sun) && defined(__SVR4)
54 /* readdir64 is used to work around Solaris 9 bug 6395699. */
55 # define readdir readdir64
56 # define dirent dirent64
57 # if !defined(HAVE_DIRFD)
58 /* Some versions of Solaris lack dirfd(). */
59 # define dirfd(dirp) ((dirp)->dd_fd)
60 # define HAVE_DIRFD
61 # endif
62 #endif
63
64 #if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
65 # define FD_DIR "/dev/fd"
66 #else
67 # define FD_DIR "/proc/self/fd"
68 #endif
69
70 #ifdef NGROUPS_MAX
71 #define MAX_GROUPS NGROUPS_MAX
72 #else
73 #define MAX_GROUPS 64
74 #endif
75
76 #define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)
77
78 static struct PyModuleDef _posixsubprocessmodule;
79
80 /*[clinic input]
81 module _posixsubprocess
82 [clinic start generated code]*/
83 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c62211df27cf7334]*/
84
85 /*[python input]
86 class pid_t_converter(CConverter):
87 type = 'pid_t'
88 format_unit = '" _Py_PARSE_PID "'
89
90 def parse_arg(self, argname, displayname, *, limited_capi):
91 return self.format_code("""
92 {paramname} = PyLong_AsPid({argname});
93 if ({paramname} == -1 && PyErr_Occurred()) {{{{
94 goto exit;
95 }}}}
96 """,
97 argname=argname)
98 [python start generated code]*/
99 /*[python end generated code: output=da39a3ee5e6b4b0d input=c94349aa1aad151d]*/
100
101 #include "clinic/_posixsubprocess.c.h"
102
103 /* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
104 static int
_pos_int_from_ascii(const char * name)105 _pos_int_from_ascii(const char *name)
106 {
107 int num = 0;
108 while (*name >= '0' && *name <= '9') {
109 num = num * 10 + (*name - '0');
110 ++name;
111 }
112 if (*name)
113 return -1; /* Non digit found, not a number. */
114 return num;
115 }
116
117
118 #if defined(__FreeBSD__) || defined(__DragonFly__)
119 /* When /dev/fd isn't mounted it is often a static directory populated
120 * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD.
121 * NetBSD and OpenBSD have a /proc fs available (though not necessarily
122 * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs
123 * that properly supports /dev/fd.
124 */
125 static int
_is_fdescfs_mounted_on_dev_fd(void)126 _is_fdescfs_mounted_on_dev_fd(void)
127 {
128 struct stat dev_stat;
129 struct stat dev_fd_stat;
130 if (stat("/dev", &dev_stat) != 0)
131 return 0;
132 if (stat(FD_DIR, &dev_fd_stat) != 0)
133 return 0;
134 if (dev_stat.st_dev == dev_fd_stat.st_dev)
135 return 0; /* / == /dev == /dev/fd means it is static. #fail */
136 return 1;
137 }
138 #endif
139
140
141 /* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
142 static int
_sanity_check_python_fd_sequence(PyObject * fd_sequence)143 _sanity_check_python_fd_sequence(PyObject *fd_sequence)
144 {
145 Py_ssize_t seq_idx;
146 long prev_fd = -1;
147 for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) {
148 PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx);
149 long iter_fd;
150 if (!PyLong_Check(py_fd)) {
151 return 1;
152 }
153 iter_fd = PyLong_AsLong(py_fd);
154 if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) {
155 /* Negative, overflow, unsorted, too big for a fd. */
156 return 1;
157 }
158 prev_fd = iter_fd;
159 }
160 return 0;
161 }
162
163
164 /* Is fd found in the sorted Python Sequence? */
165 static int
_is_fd_in_sorted_fd_sequence(int fd,int * fd_sequence,Py_ssize_t fd_sequence_len)166 _is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence,
167 Py_ssize_t fd_sequence_len)
168 {
169 /* Binary search. */
170 Py_ssize_t search_min = 0;
171 Py_ssize_t search_max = fd_sequence_len - 1;
172 if (search_max < 0)
173 return 0;
174 do {
175 long middle = (search_min + search_max) / 2;
176 long middle_fd = fd_sequence[middle];
177 if (fd == middle_fd)
178 return 1;
179 if (fd > middle_fd)
180 search_min = middle + 1;
181 else
182 search_max = middle - 1;
183 } while (search_min <= search_max);
184 return 0;
185 }
186
187
188 // Forward declaration
189 static void _Py_FreeCharPArray(char *const array[]);
190
191 /*
192 * Flatten a sequence of bytes() objects into a C array of
193 * NULL terminated string pointers with a NULL char* terminating the array.
194 * (ie: an argv or env list)
195 *
196 * Memory allocated for the returned list is allocated using PyMem_Malloc()
197 * and MUST be freed by _Py_FreeCharPArray().
198 */
199 static char *const *
_PySequence_BytesToCharpArray(PyObject * self)200 _PySequence_BytesToCharpArray(PyObject* self)
201 {
202 char **array;
203 Py_ssize_t i, argc;
204 PyObject *item = NULL;
205 Py_ssize_t size;
206
207 argc = PySequence_Size(self);
208 if (argc == -1)
209 return NULL;
210
211 assert(argc >= 0);
212
213 if ((size_t)argc > (PY_SSIZE_T_MAX-sizeof(char *)) / sizeof(char *)) {
214 PyErr_NoMemory();
215 return NULL;
216 }
217
218 array = PyMem_Malloc((argc + 1) * sizeof(char *));
219 if (array == NULL) {
220 PyErr_NoMemory();
221 return NULL;
222 }
223 for (i = 0; i < argc; ++i) {
224 char *data;
225 item = PySequence_GetItem(self, i);
226 if (item == NULL) {
227 /* NULL terminate before freeing. */
228 array[i] = NULL;
229 goto fail;
230 }
231 /* check for embedded null bytes */
232 if (PyBytes_AsStringAndSize(item, &data, NULL) < 0) {
233 /* NULL terminate before freeing. */
234 array[i] = NULL;
235 goto fail;
236 }
237 size = PyBytes_GET_SIZE(item) + 1;
238 array[i] = PyMem_Malloc(size);
239 if (!array[i]) {
240 PyErr_NoMemory();
241 goto fail;
242 }
243 memcpy(array[i], data, size);
244 Py_DECREF(item);
245 }
246 array[argc] = NULL;
247
248 return array;
249
250 fail:
251 Py_XDECREF(item);
252 _Py_FreeCharPArray(array);
253 return NULL;
254 }
255
256
257 /* Free's a NULL terminated char** array of C strings. */
258 static void
_Py_FreeCharPArray(char * const array[])259 _Py_FreeCharPArray(char *const array[])
260 {
261 Py_ssize_t i;
262 for (i = 0; array[i] != NULL; ++i) {
263 PyMem_Free(array[i]);
264 }
265 PyMem_Free((void*)array);
266 }
267
268
269 /*
270 * Do all the Python C API calls in the parent process to turn the pass_fds
271 * "py_fds_to_keep" tuple into a C array. The caller owns allocation and
272 * freeing of the array.
273 *
274 * On error an unknown number of array elements may have been filled in.
275 * A Python exception has been set when an error is returned.
276 *
277 * Returns: -1 on error, 0 on success.
278 */
279 static int
convert_fds_to_keep_to_c(PyObject * py_fds_to_keep,int * c_fds_to_keep)280 convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep)
281 {
282 Py_ssize_t i, len;
283
284 len = PyTuple_GET_SIZE(py_fds_to_keep);
285 for (i = 0; i < len; ++i) {
286 PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i);
287 long fd = PyLong_AsLong(fdobj);
288 if (fd == -1 && PyErr_Occurred()) {
289 return -1;
290 }
291 if (fd < 0 || fd > INT_MAX) {
292 PyErr_SetString(PyExc_ValueError,
293 "fd out of range in fds_to_keep.");
294 return -1;
295 }
296 c_fds_to_keep[i] = (int)fd;
297 }
298 return 0;
299 }
300
301
302 /* This function must be async-signal-safe as it is called from child_exec()
303 * after fork() or vfork().
304 */
305 static int
make_inheritable(int * c_fds_to_keep,Py_ssize_t len,int errpipe_write)306 make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write)
307 {
308 Py_ssize_t i;
309
310 for (i = 0; i < len; ++i) {
311 int fd = c_fds_to_keep[i];
312 if (fd == errpipe_write) {
313 /* errpipe_write is part of fds_to_keep. It must be closed at
314 exec(), but kept open in the child process until exec() is
315 called. */
316 continue;
317 }
318 if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
319 return -1;
320 }
321 return 0;
322 }
323
324
325 /* Get the maximum file descriptor that could be opened by this process.
326 * This function is async signal safe for use between fork() and exec().
327 */
328 static long
safe_get_max_fd(void)329 safe_get_max_fd(void)
330 {
331 long local_max_fd;
332 #if defined(__NetBSD__)
333 local_max_fd = fcntl(0, F_MAXFD);
334 if (local_max_fd >= 0)
335 return local_max_fd;
336 #endif
337 #if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
338 struct rlimit rl;
339 /* Not on the POSIX async signal safe functions list but likely
340 * safe. TODO - Someone should audit OpenBSD to make sure. */
341 if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
342 return (long) rl.rlim_max;
343 #endif
344 #ifdef _SC_OPEN_MAX
345 local_max_fd = sysconf(_SC_OPEN_MAX);
346 if (local_max_fd == -1)
347 #endif
348 local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */
349 return local_max_fd;
350 }
351
352
353 /* Close all file descriptors in the given range except for those in
354 * fds_to_keep by invoking closer on each subrange.
355 *
356 * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't
357 * possible to know for sure what the max fd to go up to is for
358 * processes with the capability of raising their maximum, or in case
359 * a process opened a high fd and then lowered its maximum.
360 */
361 static int
_close_range_except(int start_fd,int end_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len,int (* closer)(int,int))362 _close_range_except(int start_fd,
363 int end_fd,
364 int *fds_to_keep,
365 Py_ssize_t fds_to_keep_len,
366 int (*closer)(int, int))
367 {
368 if (end_fd == -1) {
369 end_fd = Py_MIN(safe_get_max_fd(), INT_MAX);
370 }
371 Py_ssize_t keep_seq_idx;
372 /* As fds_to_keep is sorted we can loop through the list closing
373 * fds in between any in the keep list falling within our range. */
374 for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) {
375 int keep_fd = fds_to_keep[keep_seq_idx];
376 if (keep_fd < start_fd)
377 continue;
378 if (closer(start_fd, keep_fd - 1) != 0)
379 return -1;
380 start_fd = keep_fd + 1;
381 }
382 if (start_fd <= end_fd) {
383 if (closer(start_fd, end_fd) != 0)
384 return -1;
385 }
386 return 0;
387 }
388
389 #if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
390 /* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
391 * only to read a directory of short file descriptor number names. The kernel
392 * will return an error if we didn't give it enough space. Highly Unlikely.
393 * This structure is very old and stable: It will not change unless the kernel
394 * chooses to break compatibility with all existing binaries. Highly Unlikely.
395 */
396 struct linux_dirent64 {
397 unsigned long long d_ino;
398 long long d_off;
399 unsigned short d_reclen; /* Length of this linux_dirent */
400 unsigned char d_type;
401 char d_name[256]; /* Filename (null-terminated) */
402 };
403
404 static int
_brute_force_closer(int first,int last)405 _brute_force_closer(int first, int last)
406 {
407 for (int i = first; i <= last; i++) {
408 /* Ignore errors */
409 (void)close(i);
410 }
411 return 0;
412 }
413
414 /* Close all open file descriptors in the range from start_fd and higher
415 * Do not close any in the sorted fds_to_keep list.
416 *
417 * This version is async signal safe as it does not make any unsafe C library
418 * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
419 * to resort to making a kernel system call directly but this is the ONLY api
420 * available that does no harm. opendir/readdir/closedir perform memory
421 * allocation and locking so while they usually work they are not guaranteed
422 * to (especially if you have replaced your malloc implementation). A version
423 * of this function that uses those can be found in the _maybe_unsafe variant.
424 *
425 * This is Linux specific because that is all I am ready to test it on. It
426 * should be easy to add OS specific dirent or dirent64 structures and modify
427 * it with some cpp #define magic to work on other OSes as well if you want.
428 */
429 static void
_close_open_fds_safe(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)430 _close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
431 {
432 int fd_dir_fd;
433
434 fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY);
435 if (fd_dir_fd == -1) {
436 /* No way to get a list of open fds. */
437 _close_range_except(start_fd, -1,
438 fds_to_keep, fds_to_keep_len,
439 _brute_force_closer);
440 return;
441 } else {
442 char buffer[sizeof(struct linux_dirent64)];
443 int bytes;
444 while ((bytes = syscall(SYS_getdents64, fd_dir_fd,
445 (struct linux_dirent64 *)buffer,
446 sizeof(buffer))) > 0) {
447 struct linux_dirent64 *entry;
448 int offset;
449 #ifdef _Py_MEMORY_SANITIZER
450 __msan_unpoison(buffer, bytes);
451 #endif
452 for (offset = 0; offset < bytes; offset += entry->d_reclen) {
453 int fd;
454 entry = (struct linux_dirent64 *)(buffer + offset);
455 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
456 continue; /* Not a number. */
457 if (fd != fd_dir_fd && fd >= start_fd &&
458 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
459 fds_to_keep_len)) {
460 close(fd);
461 }
462 }
463 }
464 close(fd_dir_fd);
465 }
466 }
467
468 #define _close_open_fds_fallback _close_open_fds_safe
469
470 #else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
471
472 static int
_unsafe_closer(int first,int last)473 _unsafe_closer(int first, int last)
474 {
475 _Py_closerange(first, last);
476 return 0;
477 }
478
479 /* Close all open file descriptors from start_fd and higher.
480 * Do not close any in the sorted fds_to_keep tuple.
481 *
482 * This function violates the strict use of async signal safe functions. :(
483 * It calls opendir(), readdir() and closedir(). Of these, the one most
484 * likely to ever cause a problem is opendir() as it performs an internal
485 * malloc(). Practically this should not be a problem. The Java VM makes the
486 * same calls between fork and exec in its own UNIXProcess_md.c implementation.
487 *
488 * readdir_r() is not used because it provides no benefit. It is typically
489 * implemented as readdir() followed by memcpy(). See also:
490 * http://womble.decadent.org.uk/readdir_r-advisory.html
491 */
492 static void
_close_open_fds_maybe_unsafe(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)493 _close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
494 Py_ssize_t fds_to_keep_len)
495 {
496 DIR *proc_fd_dir;
497 #ifndef HAVE_DIRFD
498 while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep,
499 fds_to_keep_len)) {
500 ++start_fd;
501 }
502 /* Close our lowest fd before we call opendir so that it is likely to
503 * reuse that fd otherwise we might close opendir's file descriptor in
504 * our loop. This trick assumes that fd's are allocated on a lowest
505 * available basis. */
506 close(start_fd);
507 ++start_fd;
508 #endif
509
510 #if defined(__FreeBSD__) || defined(__DragonFly__)
511 if (!_is_fdescfs_mounted_on_dev_fd())
512 proc_fd_dir = NULL;
513 else
514 #endif
515 proc_fd_dir = opendir(FD_DIR);
516 if (!proc_fd_dir) {
517 /* No way to get a list of open fds. */
518 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
519 _unsafe_closer);
520 } else {
521 struct dirent *dir_entry;
522 #ifdef HAVE_DIRFD
523 int fd_used_by_opendir = dirfd(proc_fd_dir);
524 #else
525 int fd_used_by_opendir = start_fd - 1;
526 #endif
527 errno = 0;
528 while ((dir_entry = readdir(proc_fd_dir))) {
529 int fd;
530 if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
531 continue; /* Not a number. */
532 if (fd != fd_used_by_opendir && fd >= start_fd &&
533 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep,
534 fds_to_keep_len)) {
535 close(fd);
536 }
537 errno = 0;
538 }
539 if (errno) {
540 /* readdir error, revert behavior. Highly Unlikely. */
541 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
542 _unsafe_closer);
543 }
544 closedir(proc_fd_dir);
545 }
546 }
547
548 #define _close_open_fds_fallback _close_open_fds_maybe_unsafe
549
550 #endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
551
552 /* We can use close_range() library function only if it's known to be
553 * async-signal-safe.
554 *
555 * On Linux, glibc explicitly documents it to be a thin wrapper over
556 * the system call, and other C libraries are likely to follow glibc.
557 */
558 #if defined(HAVE_CLOSE_RANGE) && \
559 (defined(__linux__) || defined(__FreeBSD__))
560 #define HAVE_ASYNC_SAFE_CLOSE_RANGE
561
562 static int
_close_range_closer(int first,int last)563 _close_range_closer(int first, int last)
564 {
565 return close_range(first, last, 0);
566 }
567 #endif
568
569 static void
_close_open_fds(int start_fd,int * fds_to_keep,Py_ssize_t fds_to_keep_len)570 _close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len)
571 {
572 #ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE
573 if (_close_range_except(
574 start_fd, INT_MAX, fds_to_keep, fds_to_keep_len,
575 _close_range_closer) == 0) {
576 return;
577 }
578 #endif
579 _close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len);
580 }
581
582 #ifdef VFORK_USABLE
583 /* Reset dispositions for all signals to SIG_DFL except for ignored
584 * signals. This way we ensure that no signal handlers can run
585 * after we unblock signals in a child created by vfork().
586 */
587 static void
reset_signal_handlers(const sigset_t * child_sigmask)588 reset_signal_handlers(const sigset_t *child_sigmask)
589 {
590 struct sigaction sa_dfl = {.sa_handler = SIG_DFL};
591 for (int sig = 1; sig < _NSIG; sig++) {
592 /* Dispositions for SIGKILL and SIGSTOP can't be changed. */
593 if (sig == SIGKILL || sig == SIGSTOP) {
594 continue;
595 }
596
597 /* There is no need to reset the disposition of signals that will
598 * remain blocked across execve() since the kernel will do it. */
599 if (sigismember(child_sigmask, sig) == 1) {
600 continue;
601 }
602
603 struct sigaction sa;
604 /* C libraries usually return EINVAL for signals used
605 * internally (e.g. for thread cancellation), so simply
606 * skip errors here. */
607 if (sigaction(sig, NULL, &sa) == -1) {
608 continue;
609 }
610
611 /* void *h works as these fields are both pointer types already. */
612 void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction :
613 (void *)sa.sa_handler);
614 if (h == SIG_IGN || h == SIG_DFL) {
615 continue;
616 }
617
618 /* This call can't reasonably fail, but if it does, terminating
619 * the child seems to be too harsh, so ignore errors. */
620 (void) sigaction(sig, &sa_dfl, NULL);
621 }
622 }
623 #endif /* VFORK_USABLE */
624
625
626 /*
627 * This function is code executed in the child process immediately after
628 * (v)fork to set things up and call exec().
629 *
630 * All of the code in this function must only use async-signal-safe functions,
631 * listed at `man 7 signal` or
632 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
633 *
634 * This restriction is documented at
635 * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
636 *
637 * If this function is called after vfork(), even more care must be taken.
638 * The lack of preparations that C libraries normally take on fork(),
639 * as well as sharing the address space with the parent, might make even
640 * async-signal-safe functions vfork-unsafe. In particular, on Linux,
641 * set*id() and setgroups() library functions must not be called, since
642 * they have to interact with the library-level thread list and send
643 * library-internal signals to implement per-process credentials semantics
644 * required by POSIX but not supported natively on Linux. Another reason to
645 * avoid this family of functions is that sharing an address space between
646 * processes running with different privileges is inherently insecure.
647 * See https://bugs.python.org/issue35823 for discussion and references.
648 *
649 * In some C libraries, setrlimit() has the same thread list/signalling
650 * behavior since resource limits were per-thread attributes before
651 * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue
652 * (https://www.openwall.com/lists/musl/2020/10/15/6).
653 *
654 * If vfork-unsafe functionality is desired after vfork(), consider using
655 * syscall() to obtain it.
656 */
657 Py_NO_INLINE static void
child_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,pid_t pgid_to_set,gid_t gid,Py_ssize_t extra_group_size,const gid_t * extra_groups,uid_t uid,int child_umask,const void * child_sigmask,int * fds_to_keep,Py_ssize_t fds_to_keep_len,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)658 child_exec(char *const exec_array[],
659 char *const argv[],
660 char *const envp[],
661 const char *cwd,
662 int p2cread, int p2cwrite,
663 int c2pread, int c2pwrite,
664 int errread, int errwrite,
665 int errpipe_read, int errpipe_write,
666 int close_fds, int restore_signals,
667 int call_setsid, pid_t pgid_to_set,
668 gid_t gid,
669 Py_ssize_t extra_group_size, const gid_t *extra_groups,
670 uid_t uid, int child_umask,
671 const void *child_sigmask,
672 int *fds_to_keep, Py_ssize_t fds_to_keep_len,
673 PyObject *preexec_fn,
674 PyObject *preexec_fn_args_tuple)
675 {
676 int i, saved_errno;
677 PyObject *result;
678 /* Indicate to the parent that the error happened before exec(). */
679 const char *err_msg = "noexec";
680 /* Buffer large enough to hold a hex integer. We can't malloc. */
681 char hex_errno[sizeof(saved_errno)*2+1];
682
683 if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
684 goto error;
685
686 /* Close parent's pipe ends. */
687 if (p2cwrite != -1)
688 POSIX_CALL(close(p2cwrite));
689 if (c2pread != -1)
690 POSIX_CALL(close(c2pread));
691 if (errread != -1)
692 POSIX_CALL(close(errread));
693 POSIX_CALL(close(errpipe_read));
694
695 /* When duping fds, if there arises a situation where one of the fds is
696 either 0, 1 or 2, it is possible that it is overwritten (#12607). */
697 if (c2pwrite == 0) {
698 POSIX_CALL(c2pwrite = dup(c2pwrite));
699 /* issue32270 */
700 if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
701 goto error;
702 }
703 }
704 while (errwrite == 0 || errwrite == 1) {
705 POSIX_CALL(errwrite = dup(errwrite));
706 /* issue32270 */
707 if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
708 goto error;
709 }
710 }
711
712 /* Dup fds for child.
713 dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
714 would be a no-op (issue #10806). */
715 if (p2cread == 0) {
716 if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
717 goto error;
718 }
719 else if (p2cread != -1)
720 POSIX_CALL(dup2(p2cread, 0)); /* stdin */
721
722 if (c2pwrite == 1) {
723 if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
724 goto error;
725 }
726 else if (c2pwrite != -1)
727 POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */
728
729 if (errwrite == 2) {
730 if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
731 goto error;
732 }
733 else if (errwrite != -1)
734 POSIX_CALL(dup2(errwrite, 2)); /* stderr */
735
736 /* We no longer manually close p2cread, c2pwrite, and errwrite here as
737 * _close_open_fds takes care when it is not already non-inheritable. */
738
739 if (cwd) {
740 if (chdir(cwd) == -1) {
741 err_msg = "noexec:chdir";
742 goto error;
743 }
744 }
745
746 if (child_umask >= 0)
747 umask(child_umask); /* umask() always succeeds. */
748
749 if (restore_signals) {
750 _Py_RestoreSignals();
751 }
752
753 #ifdef VFORK_USABLE
754 if (child_sigmask) {
755 reset_signal_handlers(child_sigmask);
756 if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) {
757 goto error;
758 }
759 }
760 #endif
761
762 #ifdef HAVE_SETSID
763 if (call_setsid)
764 POSIX_CALL(setsid());
765 #endif
766
767 #ifdef HAVE_SETPGID
768 static_assert(_Py_IS_TYPE_SIGNED(pid_t), "pid_t is unsigned");
769 if (pgid_to_set >= 0) {
770 POSIX_CALL(setpgid(0, pgid_to_set));
771 }
772 #endif
773
774 #ifdef HAVE_SETGROUPS
775 if (extra_group_size >= 0) {
776 assert((extra_group_size == 0) == (extra_groups == NULL));
777 POSIX_CALL(setgroups(extra_group_size, extra_groups));
778 }
779 #endif /* HAVE_SETGROUPS */
780
781 #ifdef HAVE_SETREGID
782 if (gid != (gid_t)-1)
783 POSIX_CALL(setregid(gid, gid));
784 #endif /* HAVE_SETREGID */
785
786 #ifdef HAVE_SETREUID
787 if (uid != (uid_t)-1)
788 POSIX_CALL(setreuid(uid, uid));
789 #endif /* HAVE_SETREUID */
790
791
792 err_msg = "";
793 if (preexec_fn != Py_None && preexec_fn_args_tuple) {
794 /* This is where the user has asked us to deadlock their program. */
795 result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL);
796 if (result == NULL) {
797 /* Stringifying the exception or traceback would involve
798 * memory allocation and thus potential for deadlock.
799 * We've already faced potential deadlock by calling back
800 * into Python in the first place, so it probably doesn't
801 * matter but we avoid it to minimize the possibility. */
802 err_msg = "Exception occurred in preexec_fn.";
803 errno = 0; /* We don't want to report an OSError. */
804 goto error;
805 }
806 /* Py_DECREF(result); - We're about to exec so why bother? */
807 }
808
809 /* close FDs after executing preexec_fn, which might open FDs */
810 if (close_fds) {
811 /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
812 _close_open_fds(3, fds_to_keep, fds_to_keep_len);
813 }
814
815 /* This loop matches the Lib/os.py _execvpe()'s PATH search when */
816 /* given the executable_list generated by Lib/subprocess.py. */
817 saved_errno = 0;
818 for (i = 0; exec_array[i] != NULL; ++i) {
819 const char *executable = exec_array[i];
820 if (envp) {
821 execve(executable, argv, envp);
822 } else {
823 execv(executable, argv);
824 }
825 if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
826 saved_errno = errno;
827 }
828 }
829 /* Report the first exec error, not the last. */
830 if (saved_errno)
831 errno = saved_errno;
832
833 error:
834 saved_errno = errno;
835 /* Report the posix error to our parent process. */
836 /* We ignore all write() return values as the total size of our writes is
837 less than PIPEBUF and we cannot do anything about an error anyways.
838 Use _Py_write_noraise() to retry write() if it is interrupted by a
839 signal (fails with EINTR). */
840 if (saved_errno) {
841 char *cur;
842 _Py_write_noraise(errpipe_write, "OSError:", 8);
843 cur = hex_errno + sizeof(hex_errno);
844 while (saved_errno != 0 && cur != hex_errno) {
845 *--cur = Py_hexdigits[saved_errno % 16];
846 saved_errno /= 16;
847 }
848 _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
849 _Py_write_noraise(errpipe_write, ":", 1);
850 /* We can't call strerror(saved_errno). It is not async signal safe.
851 * The parent process will look the error message up. */
852 } else {
853 _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
854 }
855 _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
856 }
857
858
859 /* The main purpose of this wrapper function is to isolate vfork() from both
860 * subprocess_fork_exec() and child_exec(). A child process created via
861 * vfork() executes on the same stack as the parent process while the latter is
862 * suspended, so this function should not be inlined to avoid compiler bugs
863 * that might clobber data needed by the parent later. Additionally,
864 * child_exec() should not be inlined to avoid spurious -Wclobber warnings from
865 * GCC (see bpo-35823).
866 */
867 Py_NO_INLINE static pid_t
do_fork_exec(char * const exec_array[],char * const argv[],char * const envp[],const char * cwd,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int close_fds,int restore_signals,int call_setsid,pid_t pgid_to_set,gid_t gid,Py_ssize_t extra_group_size,const gid_t * extra_groups,uid_t uid,int child_umask,const void * child_sigmask,int * fds_to_keep,Py_ssize_t fds_to_keep_len,PyObject * preexec_fn,PyObject * preexec_fn_args_tuple)868 do_fork_exec(char *const exec_array[],
869 char *const argv[],
870 char *const envp[],
871 const char *cwd,
872 int p2cread, int p2cwrite,
873 int c2pread, int c2pwrite,
874 int errread, int errwrite,
875 int errpipe_read, int errpipe_write,
876 int close_fds, int restore_signals,
877 int call_setsid, pid_t pgid_to_set,
878 gid_t gid,
879 Py_ssize_t extra_group_size, const gid_t *extra_groups,
880 uid_t uid, int child_umask,
881 const void *child_sigmask,
882 int *fds_to_keep, Py_ssize_t fds_to_keep_len,
883 PyObject *preexec_fn,
884 PyObject *preexec_fn_args_tuple)
885 {
886
887 pid_t pid;
888
889 #ifdef VFORK_USABLE
890 PyThreadState *vfork_tstate_save;
891 if (child_sigmask) {
892 /* These are checked by our caller; verify them in debug builds. */
893 assert(uid == (uid_t)-1);
894 assert(gid == (gid_t)-1);
895 assert(extra_group_size < 0);
896 assert(preexec_fn == Py_None);
897
898 /* Drop the GIL so that other threads can continue execution while this
899 * thread in the parent remains blocked per vfork-semantics on the
900 * child's exec syscall outcome. Exec does filesystem access which
901 * can take an arbitrarily long time. This addresses GH-104372.
902 *
903 * The vfork'ed child still runs in our address space. Per POSIX it
904 * must be limited to nothing but exec, but the Linux implementation
905 * is a little more usable. See the child_exec() comment - The child
906 * MUST NOT re-acquire the GIL.
907 */
908 vfork_tstate_save = PyEval_SaveThread();
909 pid = vfork();
910 if (pid != 0) {
911 // Not in the child process, reacquire the GIL.
912 PyEval_RestoreThread(vfork_tstate_save);
913 }
914 if (pid == (pid_t)-1) {
915 /* If vfork() fails, fall back to using fork(). When it isn't
916 * allowed in a process by the kernel, vfork can return -1
917 * with errno EINVAL. https://bugs.python.org/issue47151. */
918 pid = fork();
919 }
920 } else
921 #endif
922 {
923 pid = fork();
924 }
925
926 if (pid != 0) {
927 // Parent process.
928 return pid;
929 }
930
931 /* Child process.
932 * See the comment above child_exec() for restrictions imposed on
933 * the code below.
934 */
935
936 if (preexec_fn != Py_None) {
937 /* We'll be calling back into Python later so we need to do this.
938 * This call may not be async-signal-safe but neither is calling
939 * back into Python. The user asked us to use hope as a strategy
940 * to avoid deadlock... */
941 PyOS_AfterFork_Child();
942 }
943
944 child_exec(exec_array, argv, envp, cwd,
945 p2cread, p2cwrite, c2pread, c2pwrite,
946 errread, errwrite, errpipe_read, errpipe_write,
947 close_fds, restore_signals, call_setsid, pgid_to_set,
948 gid, extra_group_size, extra_groups,
949 uid, child_umask, child_sigmask,
950 fds_to_keep, fds_to_keep_len,
951 preexec_fn, preexec_fn_args_tuple);
952 _exit(255);
953 return 0; /* Dead code to avoid a potential compiler warning. */
954 }
955
956 /*[clinic input]
957 _posixsubprocess.fork_exec as subprocess_fork_exec
958 args as process_args: object
959 executable_list: object
960 close_fds: bool
961 pass_fds as py_fds_to_keep: object(subclass_of='&PyTuple_Type')
962 cwd as cwd_obj: object
963 env as env_list: object
964 p2cread: int
965 p2cwrite: int
966 c2pread: int
967 c2pwrite: int
968 errread: int
969 errwrite: int
970 errpipe_read: int
971 errpipe_write: int
972 restore_signals: bool
973 call_setsid: bool
974 pgid_to_set: pid_t
975 gid as gid_object: object
976 extra_groups as extra_groups_packed: object
977 uid as uid_object: object
978 child_umask: int
979 preexec_fn: object
980 allow_vfork: bool
981 /
982
983 Spawn a fresh new child process.
984
985 Fork a child process, close parent file descriptors as appropriate in the
986 child and duplicate the few that are needed before calling exec() in the
987 child process.
988
989 If close_fds is True, close file descriptors 3 and higher, except those listed
990 in the sorted tuple pass_fds.
991
992 The preexec_fn, if supplied, will be called immediately before closing file
993 descriptors and exec.
994
995 WARNING: preexec_fn is NOT SAFE if your application uses threads.
996 It may trigger infrequent, difficult to debug deadlocks.
997
998 If an error occurs in the child process before the exec, it is
999 serialized and written to the errpipe_write fd per subprocess.py.
1000
1001 Returns: the child process's PID.
1002
1003 Raises: Only on an error in the parent process.
1004 [clinic start generated code]*/
1005
1006 static PyObject *
subprocess_fork_exec_impl(PyObject * module,PyObject * process_args,PyObject * executable_list,int close_fds,PyObject * py_fds_to_keep,PyObject * cwd_obj,PyObject * env_list,int p2cread,int p2cwrite,int c2pread,int c2pwrite,int errread,int errwrite,int errpipe_read,int errpipe_write,int restore_signals,int call_setsid,pid_t pgid_to_set,PyObject * gid_object,PyObject * extra_groups_packed,PyObject * uid_object,int child_umask,PyObject * preexec_fn,int allow_vfork)1007 subprocess_fork_exec_impl(PyObject *module, PyObject *process_args,
1008 PyObject *executable_list, int close_fds,
1009 PyObject *py_fds_to_keep, PyObject *cwd_obj,
1010 PyObject *env_list, int p2cread, int p2cwrite,
1011 int c2pread, int c2pwrite, int errread,
1012 int errwrite, int errpipe_read, int errpipe_write,
1013 int restore_signals, int call_setsid,
1014 pid_t pgid_to_set, PyObject *gid_object,
1015 PyObject *extra_groups_packed,
1016 PyObject *uid_object, int child_umask,
1017 PyObject *preexec_fn, int allow_vfork)
1018 /*[clinic end generated code: output=7ee4f6ee5cf22b5b input=51757287ef266ffa]*/
1019 {
1020 PyObject *converted_args = NULL, *fast_args = NULL;
1021 PyObject *preexec_fn_args_tuple = NULL;
1022 gid_t *extra_groups = NULL;
1023 PyObject *cwd_obj2 = NULL;
1024 const char *cwd = NULL;
1025 pid_t pid = -1;
1026 int need_to_reenable_gc = 0;
1027 char *const *argv = NULL, *const *envp = NULL;
1028 int need_after_fork = 0;
1029 int saved_errno = 0;
1030 int *c_fds_to_keep = NULL;
1031 Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep);
1032
1033 PyInterpreterState *interp = _PyInterpreterState_GET();
1034 if ((preexec_fn != Py_None) &&
1035 _PyInterpreterState_GetFinalizing(interp) != NULL)
1036 {
1037 PyErr_SetString(PyExc_PythonFinalizationError,
1038 "preexec_fn not supported at interpreter shutdown");
1039 return NULL;
1040 }
1041 if ((preexec_fn != Py_None) && (interp != PyInterpreterState_Main())) {
1042 PyErr_SetString(PyExc_RuntimeError,
1043 "preexec_fn not supported within subinterpreters");
1044 return NULL;
1045 }
1046
1047 if (close_fds && errpipe_write < 3) { /* precondition */
1048 PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
1049 return NULL;
1050 }
1051 if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
1052 PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
1053 return NULL;
1054 }
1055
1056 /* We need to call gc.disable() when we'll be calling preexec_fn */
1057 if (preexec_fn != Py_None) {
1058 need_to_reenable_gc = PyGC_Disable();
1059 }
1060
1061 char *const *exec_array = _PySequence_BytesToCharpArray(executable_list);
1062 if (!exec_array)
1063 goto cleanup;
1064
1065 /* Convert args and env into appropriate arguments for exec() */
1066 /* These conversions are done in the parent process to avoid allocating
1067 or freeing memory in the child process. */
1068 if (process_args != Py_None) {
1069 Py_ssize_t num_args;
1070 /* Equivalent to: */
1071 /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */
1072 fast_args = PySequence_Fast(process_args, "argv must be a tuple");
1073 if (fast_args == NULL)
1074 goto cleanup;
1075 num_args = PySequence_Fast_GET_SIZE(fast_args);
1076 converted_args = PyTuple_New(num_args);
1077 if (converted_args == NULL)
1078 goto cleanup;
1079 for (Py_ssize_t arg_num = 0; arg_num < num_args; ++arg_num) {
1080 PyObject *borrowed_arg, *converted_arg;
1081 if (PySequence_Fast_GET_SIZE(fast_args) != num_args) {
1082 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration");
1083 goto cleanup;
1084 }
1085 borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num);
1086 if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0)
1087 goto cleanup;
1088 PyTuple_SET_ITEM(converted_args, arg_num, converted_arg);
1089 }
1090
1091 argv = _PySequence_BytesToCharpArray(converted_args);
1092 Py_CLEAR(converted_args);
1093 Py_CLEAR(fast_args);
1094 if (!argv)
1095 goto cleanup;
1096 }
1097
1098 if (env_list != Py_None) {
1099 envp = _PySequence_BytesToCharpArray(env_list);
1100 if (!envp)
1101 goto cleanup;
1102 }
1103
1104 if (cwd_obj != Py_None) {
1105 if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0)
1106 goto cleanup;
1107 cwd = PyBytes_AsString(cwd_obj2);
1108 }
1109
1110 // Special initial value meaning that subprocess API was called with
1111 // extra_groups=None leading to _posixsubprocess.fork_exec(gids=None).
1112 // We use this to differentiate between code desiring a setgroups(0, NULL)
1113 // call vs no call at all. The fast vfork() code path could be used when
1114 // there is no setgroups call.
1115 Py_ssize_t extra_group_size = -2;
1116
1117 if (extra_groups_packed != Py_None) {
1118 #ifdef HAVE_SETGROUPS
1119 if (!PyList_Check(extra_groups_packed)) {
1120 PyErr_SetString(PyExc_TypeError,
1121 "setgroups argument must be a list");
1122 goto cleanup;
1123 }
1124 extra_group_size = PySequence_Size(extra_groups_packed);
1125
1126 if (extra_group_size < 0)
1127 goto cleanup;
1128
1129 if (extra_group_size > MAX_GROUPS) {
1130 PyErr_SetString(PyExc_ValueError, "too many extra_groups");
1131 goto cleanup;
1132 }
1133
1134 /* Deliberately keep extra_groups == NULL for extra_group_size == 0 */
1135 if (extra_group_size > 0) {
1136 extra_groups = PyMem_RawMalloc(extra_group_size * sizeof(gid_t));
1137 if (extra_groups == NULL) {
1138 PyErr_SetString(PyExc_MemoryError,
1139 "failed to allocate memory for group list");
1140 goto cleanup;
1141 }
1142 }
1143
1144 for (Py_ssize_t i = 0; i < extra_group_size; i++) {
1145 PyObject *elem;
1146 elem = PySequence_GetItem(extra_groups_packed, i);
1147 if (!elem)
1148 goto cleanup;
1149 if (!PyLong_Check(elem)) {
1150 PyErr_SetString(PyExc_TypeError,
1151 "extra_groups must be integers");
1152 Py_DECREF(elem);
1153 goto cleanup;
1154 } else {
1155 gid_t gid;
1156 if (!_Py_Gid_Converter(elem, &gid)) {
1157 Py_DECREF(elem);
1158 PyErr_SetString(PyExc_ValueError, "invalid group id");
1159 goto cleanup;
1160 }
1161 extra_groups[i] = gid;
1162 }
1163 Py_DECREF(elem);
1164 }
1165
1166 #else /* HAVE_SETGROUPS */
1167 PyErr_BadInternalCall();
1168 goto cleanup;
1169 #endif /* HAVE_SETGROUPS */
1170 }
1171
1172 gid_t gid = (gid_t)-1;
1173 if (gid_object != Py_None) {
1174 #ifdef HAVE_SETREGID
1175 if (!_Py_Gid_Converter(gid_object, &gid))
1176 goto cleanup;
1177
1178 #else /* HAVE_SETREGID */
1179 PyErr_BadInternalCall();
1180 goto cleanup;
1181 #endif /* HAVE_SETREUID */
1182 }
1183
1184 uid_t uid = (uid_t)-1;
1185 if (uid_object != Py_None) {
1186 #ifdef HAVE_SETREUID
1187 if (!_Py_Uid_Converter(uid_object, &uid))
1188 goto cleanup;
1189
1190 #else /* HAVE_SETREUID */
1191 PyErr_BadInternalCall();
1192 goto cleanup;
1193 #endif /* HAVE_SETREUID */
1194 }
1195
1196 c_fds_to_keep = PyMem_Malloc(fds_to_keep_len * sizeof(int));
1197 if (c_fds_to_keep == NULL) {
1198 PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep");
1199 goto cleanup;
1200 }
1201 if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) {
1202 goto cleanup;
1203 }
1204
1205 /* This must be the last thing done before fork() because we do not
1206 * want to call PyOS_BeforeFork() if there is any chance of another
1207 * error leading to the cleanup: code without calling fork(). */
1208 if (preexec_fn != Py_None) {
1209 preexec_fn_args_tuple = PyTuple_New(0);
1210 if (!preexec_fn_args_tuple)
1211 goto cleanup;
1212 PyOS_BeforeFork();
1213 need_after_fork = 1;
1214 }
1215
1216 /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */
1217 const void *old_sigmask = NULL;
1218 #ifdef VFORK_USABLE
1219 /* Use vfork() only if it's safe. See the comment above child_exec(). */
1220 sigset_t old_sigs;
1221 if (preexec_fn == Py_None && allow_vfork &&
1222 uid == (uid_t)-1 && gid == (gid_t)-1 && extra_group_size < 0) {
1223 /* Block all signals to ensure that no signal handlers are run in the
1224 * child process while it shares memory with us. Note that signals
1225 * used internally by C libraries won't be blocked by
1226 * pthread_sigmask(), but signal handlers installed by C libraries
1227 * normally service only signals originating from *within the process*,
1228 * so it should be sufficient to consider any library function that
1229 * might send such a signal to be vfork-unsafe and do not call it in
1230 * the child.
1231 */
1232 sigset_t all_sigs;
1233 sigfillset(&all_sigs);
1234 if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) {
1235 goto cleanup;
1236 }
1237 old_sigmask = &old_sigs;
1238 }
1239 #endif
1240
1241 pid = do_fork_exec(exec_array, argv, envp, cwd,
1242 p2cread, p2cwrite, c2pread, c2pwrite,
1243 errread, errwrite, errpipe_read, errpipe_write,
1244 close_fds, restore_signals, call_setsid, pgid_to_set,
1245 gid, extra_group_size, extra_groups,
1246 uid, child_umask, old_sigmask,
1247 c_fds_to_keep, fds_to_keep_len,
1248 preexec_fn, preexec_fn_args_tuple);
1249
1250 /* Parent (original) process */
1251 if (pid == (pid_t)-1) {
1252 /* Capture errno for the exception. */
1253 saved_errno = errno;
1254 }
1255
1256 #ifdef VFORK_USABLE
1257 if (old_sigmask) {
1258 /* vfork() semantics guarantees that the parent is blocked
1259 * until the child performs _exit() or execve(), so it is safe
1260 * to unblock signals once we're here.
1261 * Note that in environments where vfork() is implemented as fork(),
1262 * such as QEMU user-mode emulation, the parent won't be blocked,
1263 * but it won't share the address space with the child,
1264 * so it's still safe to unblock the signals.
1265 *
1266 * We don't handle errors here because this call can't fail
1267 * if valid arguments are given, and because there is no good
1268 * way for the caller to deal with a failure to restore
1269 * the thread signal mask. */
1270 (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL);
1271 }
1272 #endif
1273
1274 if (need_after_fork)
1275 PyOS_AfterFork_Parent();
1276
1277 cleanup:
1278 if (c_fds_to_keep != NULL) {
1279 PyMem_Free(c_fds_to_keep);
1280 }
1281
1282 if (saved_errno != 0) {
1283 errno = saved_errno;
1284 /* We can't call this above as PyOS_AfterFork_Parent() calls back
1285 * into Python code which would see the unreturned error. */
1286 PyErr_SetFromErrno(PyExc_OSError);
1287 }
1288
1289 Py_XDECREF(preexec_fn_args_tuple);
1290 PyMem_RawFree(extra_groups);
1291 Py_XDECREF(cwd_obj2);
1292 if (envp)
1293 _Py_FreeCharPArray(envp);
1294 Py_XDECREF(converted_args);
1295 Py_XDECREF(fast_args);
1296 if (argv)
1297 _Py_FreeCharPArray(argv);
1298 if (exec_array)
1299 _Py_FreeCharPArray(exec_array);
1300
1301 if (need_to_reenable_gc) {
1302 PyGC_Enable();
1303 }
1304
1305 return pid == -1 ? NULL : PyLong_FromPid(pid);
1306 }
1307
1308 /* module level code ********************************************************/
1309
1310 PyDoc_STRVAR(module_doc,
1311 "A POSIX helper for the subprocess module.");
1312
1313 static PyMethodDef module_methods[] = {
1314 SUBPROCESS_FORK_EXEC_METHODDEF
1315 {NULL, NULL} /* sentinel */
1316 };
1317
1318 static PyModuleDef_Slot _posixsubprocess_slots[] = {
1319 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1320 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1321 {0, NULL}
1322 };
1323
1324 static struct PyModuleDef _posixsubprocessmodule = {
1325 PyModuleDef_HEAD_INIT,
1326 .m_name = "_posixsubprocess",
1327 .m_doc = module_doc,
1328 .m_size = 0,
1329 .m_methods = module_methods,
1330 .m_slots = _posixsubprocess_slots,
1331 };
1332
1333 PyMODINIT_FUNC
PyInit__posixsubprocess(void)1334 PyInit__posixsubprocess(void)
1335 {
1336 return PyModuleDef_Init(&_posixsubprocessmodule);
1337 }
1338