1 #include "Python.h"
2 #include "pycore_fileutils.h" // fileutils definitions
3 #include "pycore_runtime.h" // _PyRuntime
4 #include "osdefs.h" // SEP
5 #include <locale.h>
6
7 #ifdef MS_WINDOWS
8 # include <malloc.h>
9 # include <windows.h>
10 extern int winerror_to_errno(int);
11 #endif
12
13 #ifdef HAVE_LANGINFO_H
14 #include <langinfo.h>
15 #endif
16
17 #ifdef HAVE_SYS_IOCTL_H
18 #include <sys/ioctl.h>
19 #endif
20
21 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
22 #include <iconv.h>
23 #endif
24
25 #ifdef HAVE_FCNTL_H
26 #include <fcntl.h>
27 #endif /* HAVE_FCNTL_H */
28
29 #ifdef O_CLOEXEC
30 /* Does open() support the O_CLOEXEC flag? Possible values:
31
32 -1: unknown
33 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
34 1: open() supports O_CLOEXEC flag, close-on-exec is set
35
36 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
37 and os.open(). */
38 int _Py_open_cloexec_works = -1;
39 #endif
40
41 // The value must be the same in unicodeobject.c.
42 #define MAX_UNICODE 0x10ffff
43
44 // mbstowcs() and mbrtowc() errors
45 static const size_t DECODE_ERROR = ((size_t)-1);
46 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
47
48
49 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)50 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
51 {
52 switch (errors)
53 {
54 case _Py_ERROR_STRICT:
55 *surrogateescape = 0;
56 return 0;
57 case _Py_ERROR_SURROGATEESCAPE:
58 *surrogateescape = 1;
59 return 0;
60 default:
61 return -1;
62 }
63 }
64
65
66 PyObject *
_Py_device_encoding(int fd)67 _Py_device_encoding(int fd)
68 {
69 int valid;
70 Py_BEGIN_ALLOW_THREADS
71 _Py_BEGIN_SUPPRESS_IPH
72 valid = isatty(fd);
73 _Py_END_SUPPRESS_IPH
74 Py_END_ALLOW_THREADS
75 if (!valid)
76 Py_RETURN_NONE;
77
78 #if defined(MS_WINDOWS)
79 UINT cp;
80 if (fd == 0)
81 cp = GetConsoleCP();
82 else if (fd == 1 || fd == 2)
83 cp = GetConsoleOutputCP();
84 else
85 cp = 0;
86 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
87 has no console */
88 if (cp == 0) {
89 Py_RETURN_NONE;
90 }
91
92 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
93 #else
94 return _Py_GetLocaleEncodingObject();
95 #endif
96 }
97
98
99 static size_t
is_valid_wide_char(wchar_t ch)100 is_valid_wide_char(wchar_t ch)
101 {
102 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
103 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
104 for non-Unicode locales, which makes values higher than MAX_UNICODE
105 possibly valid. */
106 return 1;
107 #endif
108 if (Py_UNICODE_IS_SURROGATE(ch)) {
109 // Reject lone surrogate characters
110 return 0;
111 }
112 if (ch > MAX_UNICODE) {
113 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
114 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
115 // it creates characters outside the [U+0000; U+10ffff] range:
116 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
117 return 0;
118 }
119 return 1;
120 }
121
122
123 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)124 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
125 {
126 size_t count = mbstowcs(dest, src, n);
127 if (dest != NULL && count != DECODE_ERROR) {
128 for (size_t i=0; i < count; i++) {
129 wchar_t ch = dest[i];
130 if (!is_valid_wide_char(ch)) {
131 return DECODE_ERROR;
132 }
133 }
134 }
135 return count;
136 }
137
138
139 #ifdef HAVE_MBRTOWC
140 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)141 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
142 {
143 assert(pwc != NULL);
144 size_t count = mbrtowc(pwc, str, len, pmbs);
145 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
146 if (!is_valid_wide_char(*pwc)) {
147 return DECODE_ERROR;
148 }
149 }
150 return count;
151 }
152 #endif
153
154
155 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
156
157 #define USE_FORCE_ASCII
158
159 extern int _Py_normalize_encoding(const char *, char *, size_t);
160
161 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
162 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
163 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
164 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
165 locale.getpreferredencoding() codec. For example, if command line arguments
166 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
167 UnicodeEncodeError instead of retrieving the original byte string.
168
169 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
170 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
171 one byte in range 0x80-0xff can be decoded from the locale encoding. The
172 workaround is also enabled on error, for example if getting the locale
173 failed.
174
175 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
176 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
177 ASCII encoding in this case.
178
179 Values of force_ascii:
180
181 1: the workaround is used: Py_EncodeLocale() uses
182 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
183 decode_ascii()
184 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
185 Py_DecodeLocale() uses mbstowcs()
186 -1: unknown, need to call check_force_ascii() to get the value
187 */
188 static int force_ascii = -1;
189
190 static int
check_force_ascii(void)191 check_force_ascii(void)
192 {
193 char *loc = setlocale(LC_CTYPE, NULL);
194 if (loc == NULL) {
195 goto error;
196 }
197 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
198 /* the LC_CTYPE locale is different than C and POSIX */
199 return 0;
200 }
201
202 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
203 const char *codeset = nl_langinfo(CODESET);
204 if (!codeset || codeset[0] == '\0') {
205 /* CODESET is not set or empty */
206 goto error;
207 }
208
209 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
210 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
211 goto error;
212 }
213
214 #ifdef __hpux
215 if (strcmp(encoding, "roman8") == 0) {
216 unsigned char ch;
217 wchar_t wch;
218 size_t res;
219
220 ch = (unsigned char)0xA7;
221 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
222 if (res != DECODE_ERROR && wch == L'\xA7') {
223 /* On HP-UX with C locale or the POSIX locale,
224 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
225 Latin1 encoding in practice. Force ASCII in this case.
226
227 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
228 return 1;
229 }
230 }
231 #else
232 const char* ascii_aliases[] = {
233 "ascii",
234 /* Aliases from Lib/encodings/aliases.py */
235 "646",
236 "ansi_x3.4_1968",
237 "ansi_x3.4_1986",
238 "ansi_x3_4_1968",
239 "cp367",
240 "csascii",
241 "ibm367",
242 "iso646_us",
243 "iso_646.irv_1991",
244 "iso_ir_6",
245 "us",
246 "us_ascii",
247 NULL
248 };
249
250 int is_ascii = 0;
251 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
252 if (strcmp(encoding, *alias) == 0) {
253 is_ascii = 1;
254 break;
255 }
256 }
257 if (!is_ascii) {
258 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
259 return 0;
260 }
261
262 for (unsigned int i=0x80; i<=0xff; i++) {
263 char ch[1];
264 wchar_t wch[1];
265 size_t res;
266
267 unsigned uch = (unsigned char)i;
268 ch[0] = (char)uch;
269 res = _Py_mbstowcs(wch, ch, 1);
270 if (res != DECODE_ERROR) {
271 /* decoding a non-ASCII character from the locale encoding succeed:
272 the locale encoding is not ASCII, force ASCII */
273 return 1;
274 }
275 }
276 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
277 encoding: the locale encoding is really ASCII */
278 #endif /* !defined(__hpux) */
279 return 0;
280 #else
281 /* nl_langinfo(CODESET) is not available: always force ASCII */
282 return 1;
283 #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
284
285 error:
286 /* if an error occurred, force the ASCII encoding */
287 return 1;
288 }
289
290
291 int
_Py_GetForceASCII(void)292 _Py_GetForceASCII(void)
293 {
294 if (force_ascii == -1) {
295 force_ascii = check_force_ascii();
296 }
297 return force_ascii;
298 }
299
300
301 void
_Py_ResetForceASCII(void)302 _Py_ResetForceASCII(void)
303 {
304 force_ascii = -1;
305 }
306
307
308 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)309 encode_ascii(const wchar_t *text, char **str,
310 size_t *error_pos, const char **reason,
311 int raw_malloc, _Py_error_handler errors)
312 {
313 char *result = NULL, *out;
314 size_t len, i;
315 wchar_t ch;
316
317 int surrogateescape;
318 if (get_surrogateescape(errors, &surrogateescape) < 0) {
319 return -3;
320 }
321
322 len = wcslen(text);
323
324 /* +1 for NULL byte */
325 if (raw_malloc) {
326 result = PyMem_RawMalloc(len + 1);
327 }
328 else {
329 result = PyMem_Malloc(len + 1);
330 }
331 if (result == NULL) {
332 return -1;
333 }
334
335 out = result;
336 for (i=0; i<len; i++) {
337 ch = text[i];
338
339 if (ch <= 0x7f) {
340 /* ASCII character */
341 *out++ = (char)ch;
342 }
343 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
344 /* UTF-8b surrogate */
345 *out++ = (char)(ch - 0xdc00);
346 }
347 else {
348 if (raw_malloc) {
349 PyMem_RawFree(result);
350 }
351 else {
352 PyMem_Free(result);
353 }
354 if (error_pos != NULL) {
355 *error_pos = i;
356 }
357 if (reason) {
358 *reason = "encoding error";
359 }
360 return -2;
361 }
362 }
363 *out = '\0';
364 *str = result;
365 return 0;
366 }
367 #else
368 int
_Py_GetForceASCII(void)369 _Py_GetForceASCII(void)
370 {
371 return 0;
372 }
373
374 void
_Py_ResetForceASCII(void)375 _Py_ResetForceASCII(void)
376 {
377 /* nothing to do */
378 }
379 #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
380
381
382 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
383 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)384 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
385 const char **reason, _Py_error_handler errors)
386 {
387 wchar_t *res;
388 unsigned char *in;
389 wchar_t *out;
390 size_t argsize = strlen(arg) + 1;
391
392 int surrogateescape;
393 if (get_surrogateescape(errors, &surrogateescape) < 0) {
394 return -3;
395 }
396
397 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
398 return -1;
399 }
400 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
401 if (!res) {
402 return -1;
403 }
404
405 out = res;
406 for (in = (unsigned char*)arg; *in; in++) {
407 unsigned char ch = *in;
408 if (ch < 128) {
409 *out++ = ch;
410 }
411 else {
412 if (!surrogateescape) {
413 PyMem_RawFree(res);
414 if (wlen) {
415 *wlen = in - (unsigned char*)arg;
416 }
417 if (reason) {
418 *reason = "decoding error";
419 }
420 return -2;
421 }
422 *out++ = 0xdc00 + ch;
423 }
424 }
425 *out = 0;
426
427 if (wlen != NULL) {
428 *wlen = out - res;
429 }
430 *wstr = res;
431 return 0;
432 }
433 #endif /* !HAVE_MBRTOWC */
434
435 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)436 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
437 const char **reason, _Py_error_handler errors)
438 {
439 wchar_t *res;
440 size_t argsize;
441 size_t count;
442 #ifdef HAVE_MBRTOWC
443 unsigned char *in;
444 wchar_t *out;
445 mbstate_t mbs;
446 #endif
447
448 int surrogateescape;
449 if (get_surrogateescape(errors, &surrogateescape) < 0) {
450 return -3;
451 }
452
453 #ifdef HAVE_BROKEN_MBSTOWCS
454 /* Some platforms have a broken implementation of
455 * mbstowcs which does not count the characters that
456 * would result from conversion. Use an upper bound.
457 */
458 argsize = strlen(arg);
459 #else
460 argsize = _Py_mbstowcs(NULL, arg, 0);
461 #endif
462 if (argsize != DECODE_ERROR) {
463 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
464 return -1;
465 }
466 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
467 if (!res) {
468 return -1;
469 }
470
471 count = _Py_mbstowcs(res, arg, argsize + 1);
472 if (count != DECODE_ERROR) {
473 *wstr = res;
474 if (wlen != NULL) {
475 *wlen = count;
476 }
477 return 0;
478 }
479 PyMem_RawFree(res);
480 }
481
482 /* Conversion failed. Fall back to escaping with surrogateescape. */
483 #ifdef HAVE_MBRTOWC
484 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
485
486 /* Overallocate; as multi-byte characters are in the argument, the
487 actual output could use less memory. */
488 argsize = strlen(arg) + 1;
489 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
490 return -1;
491 }
492 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
493 if (!res) {
494 return -1;
495 }
496
497 in = (unsigned char*)arg;
498 out = res;
499 memset(&mbs, 0, sizeof mbs);
500 while (argsize) {
501 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
502 if (converted == 0) {
503 /* Reached end of string; null char stored. */
504 break;
505 }
506
507 if (converted == INCOMPLETE_CHARACTER) {
508 /* Incomplete character. This should never happen,
509 since we provide everything that we have -
510 unless there is a bug in the C library, or I
511 misunderstood how mbrtowc works. */
512 goto decode_error;
513 }
514
515 if (converted == DECODE_ERROR) {
516 if (!surrogateescape) {
517 goto decode_error;
518 }
519
520 /* Decoding error. Escape as UTF-8b, and start over in the initial
521 shift state. */
522 *out++ = 0xdc00 + *in++;
523 argsize--;
524 memset(&mbs, 0, sizeof mbs);
525 continue;
526 }
527
528 // _Py_mbrtowc() reject lone surrogate characters
529 assert(!Py_UNICODE_IS_SURROGATE(*out));
530
531 /* successfully converted some bytes */
532 in += converted;
533 argsize -= converted;
534 out++;
535 }
536 if (wlen != NULL) {
537 *wlen = out - res;
538 }
539 *wstr = res;
540 return 0;
541
542 decode_error:
543 PyMem_RawFree(res);
544 if (wlen) {
545 *wlen = in - (unsigned char*)arg;
546 }
547 if (reason) {
548 *reason = "decoding error";
549 }
550 return -2;
551 #else /* HAVE_MBRTOWC */
552 /* Cannot use C locale for escaping; manually escape as if charset
553 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
554 correctly in the locale's charset, which must be an ASCII superset. */
555 return decode_ascii(arg, wstr, wlen, reason, errors);
556 #endif /* HAVE_MBRTOWC */
557 }
558
559
560 /* Decode a byte string from the locale encoding.
561
562 Use the strict error handler if 'surrogateescape' is zero. Use the
563 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
564 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
565 can be decoded as a surrogate character, escape the bytes using the
566 surrogateescape error handler instead of decoding them.
567
568 On success, return 0 and write the newly allocated wide character string into
569 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
570 the number of wide characters excluding the null character into *wlen.
571
572 On memory allocation failure, return -1.
573
574 On decoding error, return -2. If wlen is not NULL, write the start of
575 invalid byte sequence in the input string into *wlen. If reason is not NULL,
576 write the decoding error message into *reason.
577
578 Return -3 if the error handler 'errors' is not supported.
579
580 Use the Py_EncodeLocaleEx() function to encode the character string back to
581 a byte string. */
582 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)583 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
584 const char **reason,
585 int current_locale, _Py_error_handler errors)
586 {
587 if (current_locale) {
588 #ifdef _Py_FORCE_UTF8_LOCALE
589 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
590 errors);
591 #else
592 return decode_current_locale(arg, wstr, wlen, reason, errors);
593 #endif
594 }
595
596 #ifdef _Py_FORCE_UTF8_FS_ENCODING
597 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
598 errors);
599 #else
600 int use_utf8 = (Py_UTF8Mode == 1);
601 #ifdef MS_WINDOWS
602 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
603 #endif
604 if (use_utf8) {
605 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
606 errors);
607 }
608
609 #ifdef USE_FORCE_ASCII
610 if (force_ascii == -1) {
611 force_ascii = check_force_ascii();
612 }
613
614 if (force_ascii) {
615 /* force ASCII encoding to workaround mbstowcs() issue */
616 return decode_ascii(arg, wstr, wlen, reason, errors);
617 }
618 #endif
619
620 return decode_current_locale(arg, wstr, wlen, reason, errors);
621 #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
622 }
623
624
625 /* Decode a byte string from the locale encoding with the
626 surrogateescape error handler: undecodable bytes are decoded as characters
627 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
628 character, escape the bytes using the surrogateescape error handler instead
629 of decoding them.
630
631 Return a pointer to a newly allocated wide character string, use
632 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
633 wide characters excluding the null character into *size
634
635 Return NULL on decoding error or memory allocation error. If *size* is not
636 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
637 decoding error.
638
639 Decoding errors should never happen, unless there is a bug in the C
640 library.
641
642 Use the Py_EncodeLocale() function to encode the character string back to a
643 byte string. */
644 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)645 Py_DecodeLocale(const char* arg, size_t *wlen)
646 {
647 wchar_t *wstr;
648 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
649 NULL, 0,
650 _Py_ERROR_SURROGATEESCAPE);
651 if (res != 0) {
652 assert(res != -3);
653 if (wlen != NULL) {
654 *wlen = (size_t)res;
655 }
656 return NULL;
657 }
658 return wstr;
659 }
660
661
662 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)663 encode_current_locale(const wchar_t *text, char **str,
664 size_t *error_pos, const char **reason,
665 int raw_malloc, _Py_error_handler errors)
666 {
667 const size_t len = wcslen(text);
668 char *result = NULL, *bytes = NULL;
669 size_t i, size, converted;
670 wchar_t c, buf[2];
671
672 int surrogateescape;
673 if (get_surrogateescape(errors, &surrogateescape) < 0) {
674 return -3;
675 }
676
677 /* The function works in two steps:
678 1. compute the length of the output buffer in bytes (size)
679 2. outputs the bytes */
680 size = 0;
681 buf[1] = 0;
682 while (1) {
683 for (i=0; i < len; i++) {
684 c = text[i];
685 if (c >= 0xdc80 && c <= 0xdcff) {
686 if (!surrogateescape) {
687 goto encode_error;
688 }
689 /* UTF-8b surrogate */
690 if (bytes != NULL) {
691 *bytes++ = c - 0xdc00;
692 size--;
693 }
694 else {
695 size++;
696 }
697 continue;
698 }
699 else {
700 buf[0] = c;
701 if (bytes != NULL) {
702 converted = wcstombs(bytes, buf, size);
703 }
704 else {
705 converted = wcstombs(NULL, buf, 0);
706 }
707 if (converted == DECODE_ERROR) {
708 goto encode_error;
709 }
710 if (bytes != NULL) {
711 bytes += converted;
712 size -= converted;
713 }
714 else {
715 size += converted;
716 }
717 }
718 }
719 if (result != NULL) {
720 *bytes = '\0';
721 break;
722 }
723
724 size += 1; /* nul byte at the end */
725 if (raw_malloc) {
726 result = PyMem_RawMalloc(size);
727 }
728 else {
729 result = PyMem_Malloc(size);
730 }
731 if (result == NULL) {
732 return -1;
733 }
734 bytes = result;
735 }
736 *str = result;
737 return 0;
738
739 encode_error:
740 if (raw_malloc) {
741 PyMem_RawFree(result);
742 }
743 else {
744 PyMem_Free(result);
745 }
746 if (error_pos != NULL) {
747 *error_pos = i;
748 }
749 if (reason) {
750 *reason = "encoding error";
751 }
752 return -2;
753 }
754
755
756 /* Encode a string to the locale encoding.
757
758 Parameters:
759
760 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
761 of PyMem_Malloc().
762 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
763 Python filesystem encoding.
764 * errors: error handler like "strict" or "surrogateescape".
765
766 Return value:
767
768 0: success, *str is set to a newly allocated decoded string.
769 -1: memory allocation failure
770 -2: encoding error, set *error_pos and *reason (if set).
771 -3: the error handler 'errors' is not supported.
772 */
773 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)774 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
775 const char **reason,
776 int raw_malloc, int current_locale, _Py_error_handler errors)
777 {
778 if (current_locale) {
779 #ifdef _Py_FORCE_UTF8_LOCALE
780 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
781 raw_malloc, errors);
782 #else
783 return encode_current_locale(text, str, error_pos, reason,
784 raw_malloc, errors);
785 #endif
786 }
787
788 #ifdef _Py_FORCE_UTF8_FS_ENCODING
789 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
790 raw_malloc, errors);
791 #else
792 int use_utf8 = (Py_UTF8Mode == 1);
793 #ifdef MS_WINDOWS
794 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
795 #endif
796 if (use_utf8) {
797 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
798 raw_malloc, errors);
799 }
800
801 #ifdef USE_FORCE_ASCII
802 if (force_ascii == -1) {
803 force_ascii = check_force_ascii();
804 }
805
806 if (force_ascii) {
807 return encode_ascii(text, str, error_pos, reason,
808 raw_malloc, errors);
809 }
810 #endif
811
812 return encode_current_locale(text, str, error_pos, reason,
813 raw_malloc, errors);
814 #endif /* _Py_FORCE_UTF8_FS_ENCODING */
815 }
816
817 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)818 encode_locale(const wchar_t *text, size_t *error_pos,
819 int raw_malloc, int current_locale)
820 {
821 char *str;
822 int res = encode_locale_ex(text, &str, error_pos, NULL,
823 raw_malloc, current_locale,
824 _Py_ERROR_SURROGATEESCAPE);
825 if (res != -2 && error_pos) {
826 *error_pos = (size_t)-1;
827 }
828 if (res != 0) {
829 return NULL;
830 }
831 return str;
832 }
833
834 /* Encode a wide character string to the locale encoding with the
835 surrogateescape error handler: surrogate characters in the range
836 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
837
838 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
839 the memory. Return NULL on encoding or memory allocation error.
840
841 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
842 to the index of the invalid character on encoding error.
843
844 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
845 character string. */
846 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)847 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
848 {
849 return encode_locale(text, error_pos, 0, 0);
850 }
851
852
853 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
854 instead of PyMem_Free(). */
855 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)856 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
857 {
858 return encode_locale(text, error_pos, 1, 0);
859 }
860
861
862 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)863 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
864 size_t *error_pos, const char **reason,
865 int current_locale, _Py_error_handler errors)
866 {
867 return encode_locale_ex(text, str, error_pos, reason, 1,
868 current_locale, errors);
869 }
870
871
872 // Get the current locale encoding name:
873 //
874 // - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
875 // - Return "UTF-8" if the UTF-8 Mode is enabled
876 // - On Windows, return the ANSI code page (ex: "cp1250")
877 // - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
878 // - Otherwise, return nl_langinfo(CODESET).
879 //
880 // Return NULL on memory allocation failure.
881 //
882 // See also config_get_locale_encoding()
883 wchar_t*
_Py_GetLocaleEncoding(void)884 _Py_GetLocaleEncoding(void)
885 {
886 #ifdef _Py_FORCE_UTF8_LOCALE
887 // On Android langinfo.h and CODESET are missing,
888 // and UTF-8 is always used in mbstowcs() and wcstombs().
889 return _PyMem_RawWcsdup(L"UTF-8");
890 #else
891 const PyPreConfig *preconfig = &_PyRuntime.preconfig;
892 if (preconfig->utf8_mode) {
893 return _PyMem_RawWcsdup(L"UTF-8");
894 }
895
896 #ifdef MS_WINDOWS
897 wchar_t encoding[23];
898 unsigned int ansi_codepage = GetACP();
899 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
900 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
901 return _PyMem_RawWcsdup(encoding);
902 #else
903 const char *encoding = nl_langinfo(CODESET);
904 if (!encoding || encoding[0] == '\0') {
905 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
906 // macOS if the LC_CTYPE locale is not supported.
907 return _PyMem_RawWcsdup(L"UTF-8");
908 }
909
910 wchar_t *wstr;
911 int res = decode_current_locale(encoding, &wstr, NULL,
912 NULL, _Py_ERROR_SURROGATEESCAPE);
913 if (res < 0) {
914 return NULL;
915 }
916 return wstr;
917 #endif // !MS_WINDOWS
918
919 #endif // !_Py_FORCE_UTF8_LOCALE
920 }
921
922
923 PyObject *
_Py_GetLocaleEncodingObject(void)924 _Py_GetLocaleEncodingObject(void)
925 {
926 wchar_t *encoding = _Py_GetLocaleEncoding();
927 if (encoding == NULL) {
928 PyErr_NoMemory();
929 return NULL;
930 }
931
932 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
933 PyMem_RawFree(encoding);
934 return str;
935 }
936
937 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
938
939 /* Check whether current locale uses Unicode as internal wchar_t form. */
940 int
_Py_LocaleUsesNonUnicodeWchar(void)941 _Py_LocaleUsesNonUnicodeWchar(void)
942 {
943 /* Oracle Solaris uses non-Unicode internal wchar_t form for
944 non-Unicode locales and hence needs conversion to UTF first. */
945 char* codeset = nl_langinfo(CODESET);
946 if (!codeset) {
947 return 0;
948 }
949 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
950 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
951 }
952
953 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)954 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
955 const char *tocode, const char *fromcode)
956 {
957 Py_BUILD_ASSERT(sizeof(wchar_t) == 4);
958
959 /* Ensure we won't overflow the size. */
960 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
961 PyErr_NoMemory();
962 return NULL;
963 }
964
965 /* the string doesn't have to be NULL terminated */
966 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
967 if (target == NULL) {
968 PyErr_NoMemory();
969 return NULL;
970 }
971
972 iconv_t cd = iconv_open(tocode, fromcode);
973 if (cd == (iconv_t)-1) {
974 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
975 PyMem_Free(target);
976 return NULL;
977 }
978
979 char *inbuf = (char *) source;
980 char *outbuf = (char *) target;
981 size_t inbytesleft = sizeof(wchar_t) * size;
982 size_t outbytesleft = inbytesleft;
983
984 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
985 if (ret == DECODE_ERROR) {
986 PyErr_Format(PyExc_ValueError, "iconv() failed");
987 PyMem_Free(target);
988 iconv_close(cd);
989 return NULL;
990 }
991
992 iconv_close(cd);
993 return target;
994 }
995
996 /* Convert a wide character string to the UCS-4 encoded string. This
997 is necessary on systems where internal form of wchar_t are not Unicode
998 code points (e.g. Oracle Solaris).
999
1000 Return a pointer to a newly allocated string, use PyMem_Free() to free
1001 the memory. Return NULL and raise exception on conversion or memory
1002 allocation error. */
1003 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)1004 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1005 {
1006 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1007 }
1008
1009 /* Convert a UCS-4 encoded string to native wide character string. This
1010 is necessary on systems where internal form of wchar_t are not Unicode
1011 code points (e.g. Oracle Solaris).
1012
1013 The conversion is done in place. This can be done because both wchar_t
1014 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1015 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1016 which is currently the only system using these functions; it doesn't have
1017 to be for other systems).
1018
1019 Return 0 on success. Return -1 and raise exception on conversion
1020 or memory allocation error. */
1021 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)1022 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1023 {
1024 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1025 if (!result) {
1026 return -1;
1027 }
1028 memcpy(unicode, result, size * sizeof(wchar_t));
1029 PyMem_Free(result);
1030 return 0;
1031 }
1032 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1033
1034 #ifdef MS_WINDOWS
1035 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1036
1037 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)1038 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1039 {
1040 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1041 /* Cannot simply cast and dereference in_ptr,
1042 since it might not be aligned properly */
1043 __int64 in;
1044 memcpy(&in, in_ptr, sizeof(in));
1045 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1046 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1047 }
1048
1049 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)1050 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1051 {
1052 /* XXX endianness */
1053 __int64 out;
1054 out = time_in + secs_between_epochs;
1055 out = out * 10000000 + nsec_in / 100;
1056 memcpy(out_ptr, &out, sizeof(out));
1057 }
1058
1059 /* Below, we *know* that ugo+r is 0444 */
1060 #if _S_IREAD != 0400
1061 #error Unsupported C library
1062 #endif
1063 static int
attributes_to_mode(DWORD attr)1064 attributes_to_mode(DWORD attr)
1065 {
1066 int m = 0;
1067 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1068 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1069 else
1070 m |= _S_IFREG;
1071 if (attr & FILE_ATTRIBUTE_READONLY)
1072 m |= 0444;
1073 else
1074 m |= 0666;
1075 return m;
1076 }
1077
1078 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)1079 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1080 struct _Py_stat_struct *result)
1081 {
1082 memset(result, 0, sizeof(*result));
1083 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1084 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1085 result->st_dev = info->dwVolumeSerialNumber;
1086 result->st_rdev = result->st_dev;
1087 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1088 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1089 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1090 result->st_nlink = info->nNumberOfLinks;
1091 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1092 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1093 open other name surrogate reparse points without traversing them. To
1094 detect/handle these, check st_file_attributes and st_reparse_tag. */
1095 result->st_reparse_tag = reparse_tag;
1096 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1097 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1098 /* first clear the S_IFMT bits */
1099 result->st_mode ^= (result->st_mode & S_IFMT);
1100 /* now set the bits that make this a symlink */
1101 result->st_mode |= S_IFLNK;
1102 }
1103 result->st_file_attributes = info->dwFileAttributes;
1104 }
1105 #endif
1106
1107 /* Return information about a file.
1108
1109 On POSIX, use fstat().
1110
1111 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1112 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1113 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1114 #23152.
1115
1116 On Windows, set the last Windows error and return nonzero on error. On
1117 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1118 success. */
1119 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1120 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1121 {
1122 #ifdef MS_WINDOWS
1123 BY_HANDLE_FILE_INFORMATION info;
1124 HANDLE h;
1125 int type;
1126
1127 h = _Py_get_osfhandle_noraise(fd);
1128
1129 if (h == INVALID_HANDLE_VALUE) {
1130 /* errno is already set by _get_osfhandle, but we also set
1131 the Win32 error for callers who expect that */
1132 SetLastError(ERROR_INVALID_HANDLE);
1133 return -1;
1134 }
1135 memset(status, 0, sizeof(*status));
1136
1137 type = GetFileType(h);
1138 if (type == FILE_TYPE_UNKNOWN) {
1139 DWORD error = GetLastError();
1140 if (error != 0) {
1141 errno = winerror_to_errno(error);
1142 return -1;
1143 }
1144 /* else: valid but unknown file */
1145 }
1146
1147 if (type != FILE_TYPE_DISK) {
1148 if (type == FILE_TYPE_CHAR)
1149 status->st_mode = _S_IFCHR;
1150 else if (type == FILE_TYPE_PIPE)
1151 status->st_mode = _S_IFIFO;
1152 return 0;
1153 }
1154
1155 if (!GetFileInformationByHandle(h, &info)) {
1156 /* The Win32 error is already set, but we also set errno for
1157 callers who expect it */
1158 errno = winerror_to_errno(GetLastError());
1159 return -1;
1160 }
1161
1162 _Py_attribute_data_to_stat(&info, 0, status);
1163 /* specific to fstat() */
1164 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1165 return 0;
1166 #else
1167 return fstat(fd, status);
1168 #endif
1169 }
1170
1171 /* Return information about a file.
1172
1173 On POSIX, use fstat().
1174
1175 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1176 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1177 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1178 #23152.
1179
1180 Raise an exception and return -1 on error. On Windows, set the last Windows
1181 error on error. On POSIX, set errno on error. Fill status and return 0 on
1182 success.
1183
1184 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1185 to call fstat(). The caller must hold the GIL. */
1186 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1187 _Py_fstat(int fd, struct _Py_stat_struct *status)
1188 {
1189 int res;
1190
1191 assert(PyGILState_Check());
1192
1193 Py_BEGIN_ALLOW_THREADS
1194 res = _Py_fstat_noraise(fd, status);
1195 Py_END_ALLOW_THREADS
1196
1197 if (res != 0) {
1198 #ifdef MS_WINDOWS
1199 PyErr_SetFromWindowsErr(0);
1200 #else
1201 PyErr_SetFromErrno(PyExc_OSError);
1202 #endif
1203 return -1;
1204 }
1205 return 0;
1206 }
1207
1208 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1209 call stat() otherwise. Only fill st_mode attribute on Windows.
1210
1211 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1212 raised. */
1213
1214 int
_Py_stat(PyObject * path,struct stat * statbuf)1215 _Py_stat(PyObject *path, struct stat *statbuf)
1216 {
1217 #ifdef MS_WINDOWS
1218 int err;
1219 struct _stat wstatbuf;
1220
1221 #if USE_UNICODE_WCHAR_CACHE
1222 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1223 #else /* USE_UNICODE_WCHAR_CACHE */
1224 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1225 #endif /* USE_UNICODE_WCHAR_CACHE */
1226 if (wpath == NULL)
1227 return -2;
1228
1229 err = _wstat(wpath, &wstatbuf);
1230 if (!err)
1231 statbuf->st_mode = wstatbuf.st_mode;
1232 #if !USE_UNICODE_WCHAR_CACHE
1233 PyMem_Free(wpath);
1234 #endif /* USE_UNICODE_WCHAR_CACHE */
1235 return err;
1236 #else
1237 int ret;
1238 PyObject *bytes;
1239 char *cpath;
1240
1241 bytes = PyUnicode_EncodeFSDefault(path);
1242 if (bytes == NULL)
1243 return -2;
1244
1245 /* check for embedded null bytes */
1246 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1247 Py_DECREF(bytes);
1248 return -2;
1249 }
1250
1251 ret = stat(cpath, statbuf);
1252 Py_DECREF(bytes);
1253 return ret;
1254 #endif
1255 }
1256
1257
1258 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1259 static int
get_inheritable(int fd,int raise)1260 get_inheritable(int fd, int raise)
1261 {
1262 #ifdef MS_WINDOWS
1263 HANDLE handle;
1264 DWORD flags;
1265
1266 handle = _Py_get_osfhandle_noraise(fd);
1267 if (handle == INVALID_HANDLE_VALUE) {
1268 if (raise)
1269 PyErr_SetFromErrno(PyExc_OSError);
1270 return -1;
1271 }
1272
1273 if (!GetHandleInformation(handle, &flags)) {
1274 if (raise)
1275 PyErr_SetFromWindowsErr(0);
1276 return -1;
1277 }
1278
1279 return (flags & HANDLE_FLAG_INHERIT);
1280 #else
1281 int flags;
1282
1283 flags = fcntl(fd, F_GETFD, 0);
1284 if (flags == -1) {
1285 if (raise)
1286 PyErr_SetFromErrno(PyExc_OSError);
1287 return -1;
1288 }
1289 return !(flags & FD_CLOEXEC);
1290 #endif
1291 }
1292
1293 /* Get the inheritable flag of the specified file descriptor.
1294 Return 1 if the file descriptor can be inherited, 0 if it cannot,
1295 raise an exception and return -1 on error. */
1296 int
_Py_get_inheritable(int fd)1297 _Py_get_inheritable(int fd)
1298 {
1299 return get_inheritable(fd, 1);
1300 }
1301
1302
1303 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1304 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1305 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1306 {
1307 #ifdef MS_WINDOWS
1308 HANDLE handle;
1309 DWORD flags;
1310 #else
1311 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1312 static int ioctl_works = -1;
1313 int request;
1314 int err;
1315 #endif
1316 int flags, new_flags;
1317 int res;
1318 #endif
1319
1320 /* atomic_flag_works can only be used to make the file descriptor
1321 non-inheritable */
1322 assert(!(atomic_flag_works != NULL && inheritable));
1323
1324 if (atomic_flag_works != NULL && !inheritable) {
1325 if (*atomic_flag_works == -1) {
1326 int isInheritable = get_inheritable(fd, raise);
1327 if (isInheritable == -1)
1328 return -1;
1329 *atomic_flag_works = !isInheritable;
1330 }
1331
1332 if (*atomic_flag_works)
1333 return 0;
1334 }
1335
1336 #ifdef MS_WINDOWS
1337 handle = _Py_get_osfhandle_noraise(fd);
1338 if (handle == INVALID_HANDLE_VALUE) {
1339 if (raise)
1340 PyErr_SetFromErrno(PyExc_OSError);
1341 return -1;
1342 }
1343
1344 if (inheritable)
1345 flags = HANDLE_FLAG_INHERIT;
1346 else
1347 flags = 0;
1348
1349 /* This check can be removed once support for Windows 7 ends. */
1350 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1351 GetFileType(handle) == FILE_TYPE_CHAR)
1352
1353 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1354 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1355 if (raise)
1356 PyErr_SetFromWindowsErr(0);
1357 return -1;
1358 }
1359 #undef CONSOLE_PSEUDOHANDLE
1360 return 0;
1361
1362 #else
1363
1364 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1365 if (ioctl_works != 0 && raise != 0) {
1366 /* fast-path: ioctl() only requires one syscall */
1367 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1368 * thus avoid using ioctl() so we skip the fast-path. */
1369 if (inheritable)
1370 request = FIONCLEX;
1371 else
1372 request = FIOCLEX;
1373 err = ioctl(fd, request, NULL);
1374 if (!err) {
1375 ioctl_works = 1;
1376 return 0;
1377 }
1378
1379 #ifdef O_PATH
1380 if (errno == EBADF) {
1381 // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1382 // on O_PATH file descriptors. Fall through to the fcntl()
1383 // implementation.
1384 }
1385 else
1386 #endif
1387 if (errno != ENOTTY && errno != EACCES) {
1388 if (raise)
1389 PyErr_SetFromErrno(PyExc_OSError);
1390 return -1;
1391 }
1392 else {
1393 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1394 device". The ioctl is declared but not supported by the kernel.
1395 Remember that ioctl() doesn't work. It is the case on
1396 Illumos-based OS for example.
1397
1398 Issue #27057: When SELinux policy disallows ioctl it will fail
1399 with EACCES. While FIOCLEX is safe operation it may be
1400 unavailable because ioctl was denied altogether.
1401 This can be the case on Android. */
1402 ioctl_works = 0;
1403 }
1404 /* fallback to fcntl() if ioctl() does not work */
1405 }
1406 #endif
1407
1408 /* slow-path: fcntl() requires two syscalls */
1409 flags = fcntl(fd, F_GETFD);
1410 if (flags < 0) {
1411 if (raise)
1412 PyErr_SetFromErrno(PyExc_OSError);
1413 return -1;
1414 }
1415
1416 if (inheritable) {
1417 new_flags = flags & ~FD_CLOEXEC;
1418 }
1419 else {
1420 new_flags = flags | FD_CLOEXEC;
1421 }
1422
1423 if (new_flags == flags) {
1424 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1425 return 0;
1426 }
1427
1428 res = fcntl(fd, F_SETFD, new_flags);
1429 if (res < 0) {
1430 if (raise)
1431 PyErr_SetFromErrno(PyExc_OSError);
1432 return -1;
1433 }
1434 return 0;
1435 #endif
1436 }
1437
1438 /* Make the file descriptor non-inheritable.
1439 Return 0 on success, set errno and return -1 on error. */
1440 static int
make_non_inheritable(int fd)1441 make_non_inheritable(int fd)
1442 {
1443 return set_inheritable(fd, 0, 0, NULL);
1444 }
1445
1446 /* Set the inheritable flag of the specified file descriptor.
1447 On success: return 0, on error: raise an exception and return -1.
1448
1449 If atomic_flag_works is not NULL:
1450
1451 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1452 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1453 set the inheritable flag
1454 * if *atomic_flag_works==1: do nothing
1455 * if *atomic_flag_works==0: set inheritable flag to False
1456
1457 Set atomic_flag_works to NULL if no atomic flag was used to create the
1458 file descriptor.
1459
1460 atomic_flag_works can only be used to make a file descriptor
1461 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1462 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1463 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1464 {
1465 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1466 }
1467
1468 /* Same as _Py_set_inheritable() but on error, set errno and
1469 don't raise an exception.
1470 This function is async-signal-safe. */
1471 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1472 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1473 {
1474 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1475 }
1476
1477 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1478 _Py_open_impl(const char *pathname, int flags, int gil_held)
1479 {
1480 int fd;
1481 int async_err = 0;
1482 #ifndef MS_WINDOWS
1483 int *atomic_flag_works;
1484 #endif
1485
1486 #ifdef MS_WINDOWS
1487 flags |= O_NOINHERIT;
1488 #elif defined(O_CLOEXEC)
1489 atomic_flag_works = &_Py_open_cloexec_works;
1490 flags |= O_CLOEXEC;
1491 #else
1492 atomic_flag_works = NULL;
1493 #endif
1494
1495 if (gil_held) {
1496 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1497 if (pathname_obj == NULL) {
1498 return -1;
1499 }
1500 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1501 Py_DECREF(pathname_obj);
1502 return -1;
1503 }
1504
1505 do {
1506 Py_BEGIN_ALLOW_THREADS
1507 fd = open(pathname, flags);
1508 Py_END_ALLOW_THREADS
1509 } while (fd < 0
1510 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1511 if (async_err) {
1512 Py_DECREF(pathname_obj);
1513 return -1;
1514 }
1515 if (fd < 0) {
1516 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1517 Py_DECREF(pathname_obj);
1518 return -1;
1519 }
1520 Py_DECREF(pathname_obj);
1521 }
1522 else {
1523 fd = open(pathname, flags);
1524 if (fd < 0)
1525 return -1;
1526 }
1527
1528 #ifndef MS_WINDOWS
1529 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1530 close(fd);
1531 return -1;
1532 }
1533 #endif
1534
1535 return fd;
1536 }
1537
1538 /* Open a file with the specified flags (wrapper to open() function).
1539 Return a file descriptor on success. Raise an exception and return -1 on
1540 error.
1541
1542 The file descriptor is created non-inheritable.
1543
1544 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1545 except if the Python signal handler raises an exception.
1546
1547 Release the GIL to call open(). The caller must hold the GIL. */
1548 int
_Py_open(const char * pathname,int flags)1549 _Py_open(const char *pathname, int flags)
1550 {
1551 /* _Py_open() must be called with the GIL held. */
1552 assert(PyGILState_Check());
1553 return _Py_open_impl(pathname, flags, 1);
1554 }
1555
1556 /* Open a file with the specified flags (wrapper to open() function).
1557 Return a file descriptor on success. Set errno and return -1 on error.
1558
1559 The file descriptor is created non-inheritable.
1560
1561 If interrupted by a signal, fail with EINTR. */
1562 int
_Py_open_noraise(const char * pathname,int flags)1563 _Py_open_noraise(const char *pathname, int flags)
1564 {
1565 return _Py_open_impl(pathname, flags, 0);
1566 }
1567
1568 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1569 encoding and use fopen() otherwise.
1570
1571 The file descriptor is created non-inheritable.
1572
1573 If interrupted by a signal, fail with EINTR. */
1574 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1575 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1576 {
1577 FILE *f;
1578 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1579 return NULL;
1580 }
1581 #ifndef MS_WINDOWS
1582 char *cpath;
1583 char cmode[10];
1584 size_t r;
1585 r = wcstombs(cmode, mode, 10);
1586 if (r == DECODE_ERROR || r >= 10) {
1587 errno = EINVAL;
1588 return NULL;
1589 }
1590 cpath = _Py_EncodeLocaleRaw(path, NULL);
1591 if (cpath == NULL) {
1592 return NULL;
1593 }
1594 f = fopen(cpath, cmode);
1595 PyMem_RawFree(cpath);
1596 #else
1597 f = _wfopen(path, mode);
1598 #endif
1599 if (f == NULL)
1600 return NULL;
1601 if (make_non_inheritable(fileno(f)) < 0) {
1602 fclose(f);
1603 return NULL;
1604 }
1605 return f;
1606 }
1607
1608
1609 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1610 encoding and call fopen() otherwise.
1611
1612 Return the new file object on success. Raise an exception and return NULL
1613 on error.
1614
1615 The file descriptor is created non-inheritable.
1616
1617 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1618 except if the Python signal handler raises an exception.
1619
1620 Release the GIL to call _wfopen() or fopen(). The caller must hold
1621 the GIL. */
1622 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1623 _Py_fopen_obj(PyObject *path, const char *mode)
1624 {
1625 FILE *f;
1626 int async_err = 0;
1627 #ifdef MS_WINDOWS
1628 wchar_t wmode[10];
1629 int usize;
1630
1631 assert(PyGILState_Check());
1632
1633 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1634 return NULL;
1635 }
1636 if (!PyUnicode_Check(path)) {
1637 PyErr_Format(PyExc_TypeError,
1638 "str file path expected under Windows, got %R",
1639 Py_TYPE(path));
1640 return NULL;
1641 }
1642 #if USE_UNICODE_WCHAR_CACHE
1643 const wchar_t *wpath = _PyUnicode_AsUnicode(path);
1644 #else /* USE_UNICODE_WCHAR_CACHE */
1645 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1646 #endif /* USE_UNICODE_WCHAR_CACHE */
1647 if (wpath == NULL)
1648 return NULL;
1649
1650 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1651 wmode, Py_ARRAY_LENGTH(wmode));
1652 if (usize == 0) {
1653 PyErr_SetFromWindowsErr(0);
1654 #if !USE_UNICODE_WCHAR_CACHE
1655 PyMem_Free(wpath);
1656 #endif /* USE_UNICODE_WCHAR_CACHE */
1657 return NULL;
1658 }
1659
1660 do {
1661 Py_BEGIN_ALLOW_THREADS
1662 f = _wfopen(wpath, wmode);
1663 Py_END_ALLOW_THREADS
1664 } while (f == NULL
1665 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1666 #if !USE_UNICODE_WCHAR_CACHE
1667 PyMem_Free(wpath);
1668 #endif /* USE_UNICODE_WCHAR_CACHE */
1669 #else
1670 PyObject *bytes;
1671 const char *path_bytes;
1672
1673 assert(PyGILState_Check());
1674
1675 if (!PyUnicode_FSConverter(path, &bytes))
1676 return NULL;
1677 path_bytes = PyBytes_AS_STRING(bytes);
1678
1679 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1680 Py_DECREF(bytes);
1681 return NULL;
1682 }
1683
1684 do {
1685 Py_BEGIN_ALLOW_THREADS
1686 f = fopen(path_bytes, mode);
1687 Py_END_ALLOW_THREADS
1688 } while (f == NULL
1689 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1690
1691 Py_DECREF(bytes);
1692 #endif
1693 if (async_err)
1694 return NULL;
1695
1696 if (f == NULL) {
1697 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1698 return NULL;
1699 }
1700
1701 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1702 fclose(f);
1703 return NULL;
1704 }
1705 return f;
1706 }
1707
1708 /* Read count bytes from fd into buf.
1709
1710 On success, return the number of read bytes, it can be lower than count.
1711 If the current file offset is at or past the end of file, no bytes are read,
1712 and read() returns zero.
1713
1714 On error, raise an exception, set errno and return -1.
1715
1716 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1717 If the Python signal handler raises an exception, the function returns -1
1718 (the syscall is not retried).
1719
1720 Release the GIL to call read(). The caller must hold the GIL. */
1721 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1722 _Py_read(int fd, void *buf, size_t count)
1723 {
1724 Py_ssize_t n;
1725 int err;
1726 int async_err = 0;
1727
1728 assert(PyGILState_Check());
1729
1730 /* _Py_read() must not be called with an exception set, otherwise the
1731 * caller may think that read() was interrupted by a signal and the signal
1732 * handler raised an exception. */
1733 assert(!PyErr_Occurred());
1734
1735 if (count > _PY_READ_MAX) {
1736 count = _PY_READ_MAX;
1737 }
1738
1739 _Py_BEGIN_SUPPRESS_IPH
1740 do {
1741 Py_BEGIN_ALLOW_THREADS
1742 errno = 0;
1743 #ifdef MS_WINDOWS
1744 n = read(fd, buf, (int)count);
1745 #else
1746 n = read(fd, buf, count);
1747 #endif
1748 /* save/restore errno because PyErr_CheckSignals()
1749 * and PyErr_SetFromErrno() can modify it */
1750 err = errno;
1751 Py_END_ALLOW_THREADS
1752 } while (n < 0 && err == EINTR &&
1753 !(async_err = PyErr_CheckSignals()));
1754 _Py_END_SUPPRESS_IPH
1755
1756 if (async_err) {
1757 /* read() was interrupted by a signal (failed with EINTR)
1758 * and the Python signal handler raised an exception */
1759 errno = err;
1760 assert(errno == EINTR && PyErr_Occurred());
1761 return -1;
1762 }
1763 if (n < 0) {
1764 PyErr_SetFromErrno(PyExc_OSError);
1765 errno = err;
1766 return -1;
1767 }
1768
1769 return n;
1770 }
1771
1772 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1773 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1774 {
1775 Py_ssize_t n;
1776 int err;
1777 int async_err = 0;
1778
1779 _Py_BEGIN_SUPPRESS_IPH
1780 #ifdef MS_WINDOWS
1781 if (count > 32767) {
1782 /* Issue #11395: the Windows console returns an error (12: not
1783 enough space error) on writing into stdout if stdout mode is
1784 binary and the length is greater than 66,000 bytes (or less,
1785 depending on heap usage). */
1786 if (gil_held) {
1787 Py_BEGIN_ALLOW_THREADS
1788 if (isatty(fd)) {
1789 count = 32767;
1790 }
1791 Py_END_ALLOW_THREADS
1792 } else {
1793 if (isatty(fd)) {
1794 count = 32767;
1795 }
1796 }
1797 }
1798 #endif
1799 if (count > _PY_WRITE_MAX) {
1800 count = _PY_WRITE_MAX;
1801 }
1802
1803 if (gil_held) {
1804 do {
1805 Py_BEGIN_ALLOW_THREADS
1806 errno = 0;
1807 #ifdef MS_WINDOWS
1808 n = write(fd, buf, (int)count);
1809 #else
1810 n = write(fd, buf, count);
1811 #endif
1812 /* save/restore errno because PyErr_CheckSignals()
1813 * and PyErr_SetFromErrno() can modify it */
1814 err = errno;
1815 Py_END_ALLOW_THREADS
1816 } while (n < 0 && err == EINTR &&
1817 !(async_err = PyErr_CheckSignals()));
1818 }
1819 else {
1820 do {
1821 errno = 0;
1822 #ifdef MS_WINDOWS
1823 n = write(fd, buf, (int)count);
1824 #else
1825 n = write(fd, buf, count);
1826 #endif
1827 err = errno;
1828 } while (n < 0 && err == EINTR);
1829 }
1830 _Py_END_SUPPRESS_IPH
1831
1832 if (async_err) {
1833 /* write() was interrupted by a signal (failed with EINTR)
1834 and the Python signal handler raised an exception (if gil_held is
1835 nonzero). */
1836 errno = err;
1837 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1838 return -1;
1839 }
1840 if (n < 0) {
1841 if (gil_held)
1842 PyErr_SetFromErrno(PyExc_OSError);
1843 errno = err;
1844 return -1;
1845 }
1846
1847 return n;
1848 }
1849
1850 /* Write count bytes of buf into fd.
1851
1852 On success, return the number of written bytes, it can be lower than count
1853 including 0. On error, raise an exception, set errno and return -1.
1854
1855 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1856 If the Python signal handler raises an exception, the function returns -1
1857 (the syscall is not retried).
1858
1859 Release the GIL to call write(). The caller must hold the GIL. */
1860 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1861 _Py_write(int fd, const void *buf, size_t count)
1862 {
1863 assert(PyGILState_Check());
1864
1865 /* _Py_write() must not be called with an exception set, otherwise the
1866 * caller may think that write() was interrupted by a signal and the signal
1867 * handler raised an exception. */
1868 assert(!PyErr_Occurred());
1869
1870 return _Py_write_impl(fd, buf, count, 1);
1871 }
1872
1873 /* Write count bytes of buf into fd.
1874 *
1875 * On success, return the number of written bytes, it can be lower than count
1876 * including 0. On error, set errno and return -1.
1877 *
1878 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1879 * without calling the Python signal handler. */
1880 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1881 _Py_write_noraise(int fd, const void *buf, size_t count)
1882 {
1883 return _Py_write_impl(fd, buf, count, 0);
1884 }
1885
1886 #ifdef HAVE_READLINK
1887
1888 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1889 the result from the locale encoding.
1890
1891 Return -1 on encoding error, on readlink() error, if the internal buffer is
1892 too short, on decoding error, or if 'buf' is too short. */
1893 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1894 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1895 {
1896 char *cpath;
1897 char cbuf[MAXPATHLEN];
1898 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1899 wchar_t *wbuf;
1900 Py_ssize_t res;
1901 size_t r1;
1902
1903 cpath = _Py_EncodeLocaleRaw(path, NULL);
1904 if (cpath == NULL) {
1905 errno = EINVAL;
1906 return -1;
1907 }
1908 res = readlink(cpath, cbuf, cbuf_len);
1909 PyMem_RawFree(cpath);
1910 if (res == -1) {
1911 return -1;
1912 }
1913 if ((size_t)res == cbuf_len) {
1914 errno = EINVAL;
1915 return -1;
1916 }
1917 cbuf[res] = '\0'; /* buf will be null terminated */
1918 wbuf = Py_DecodeLocale(cbuf, &r1);
1919 if (wbuf == NULL) {
1920 errno = EINVAL;
1921 return -1;
1922 }
1923 /* wbuf must have space to store the trailing NUL character */
1924 if (buflen <= r1) {
1925 PyMem_RawFree(wbuf);
1926 errno = EINVAL;
1927 return -1;
1928 }
1929 wcsncpy(buf, wbuf, buflen);
1930 PyMem_RawFree(wbuf);
1931 return (int)r1;
1932 }
1933 #endif
1934
1935 #ifdef HAVE_REALPATH
1936
1937 /* Return the canonicalized absolute pathname. Encode path to the locale
1938 encoding, decode the result from the locale encoding.
1939
1940 Return NULL on encoding error, realpath() error, decoding error
1941 or if 'resolved_path' is too short. */
1942 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1943 _Py_wrealpath(const wchar_t *path,
1944 wchar_t *resolved_path, size_t resolved_path_len)
1945 {
1946 char *cpath;
1947 char cresolved_path[MAXPATHLEN];
1948 wchar_t *wresolved_path;
1949 char *res;
1950 size_t r;
1951 cpath = _Py_EncodeLocaleRaw(path, NULL);
1952 if (cpath == NULL) {
1953 errno = EINVAL;
1954 return NULL;
1955 }
1956 res = realpath(cpath, cresolved_path);
1957 PyMem_RawFree(cpath);
1958 if (res == NULL)
1959 return NULL;
1960
1961 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1962 if (wresolved_path == NULL) {
1963 errno = EINVAL;
1964 return NULL;
1965 }
1966 /* wresolved_path must have space to store the trailing NUL character */
1967 if (resolved_path_len <= r) {
1968 PyMem_RawFree(wresolved_path);
1969 errno = EINVAL;
1970 return NULL;
1971 }
1972 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1973 PyMem_RawFree(wresolved_path);
1974 return resolved_path;
1975 }
1976 #endif
1977
1978
1979 #ifndef MS_WINDOWS
1980 int
_Py_isabs(const wchar_t * path)1981 _Py_isabs(const wchar_t *path)
1982 {
1983 return (path[0] == SEP);
1984 }
1985 #endif
1986
1987
1988 /* Get an absolute path.
1989 On error (ex: fail to get the current directory), return -1.
1990 On memory allocation failure, set *abspath_p to NULL and return 0.
1991 On success, return a newly allocated to *abspath_p to and return 0.
1992 The string must be freed by PyMem_RawFree(). */
1993 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)1994 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1995 {
1996 #ifdef MS_WINDOWS
1997 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1998 DWORD result;
1999
2000 result = GetFullPathNameW(path,
2001 Py_ARRAY_LENGTH(woutbuf), woutbuf,
2002 NULL);
2003 if (!result) {
2004 return -1;
2005 }
2006
2007 if (result > Py_ARRAY_LENGTH(woutbuf)) {
2008 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2009 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
2010 }
2011 else {
2012 woutbufp = NULL;
2013 }
2014 if (!woutbufp) {
2015 *abspath_p = NULL;
2016 return 0;
2017 }
2018
2019 result = GetFullPathNameW(path, result, woutbufp, NULL);
2020 if (!result) {
2021 PyMem_RawFree(woutbufp);
2022 return -1;
2023 }
2024 }
2025
2026 if (woutbufp != woutbuf) {
2027 *abspath_p = woutbufp;
2028 return 0;
2029 }
2030
2031 *abspath_p = _PyMem_RawWcsdup(woutbufp);
2032 return 0;
2033 #else
2034 if (_Py_isabs(path)) {
2035 *abspath_p = _PyMem_RawWcsdup(path);
2036 return 0;
2037 }
2038
2039 wchar_t cwd[MAXPATHLEN + 1];
2040 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2041 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2042 /* unable to get the current directory */
2043 return -1;
2044 }
2045
2046 size_t cwd_len = wcslen(cwd);
2047 size_t path_len = wcslen(path);
2048 size_t len = cwd_len + 1 + path_len + 1;
2049 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2050 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2051 }
2052 else {
2053 *abspath_p = NULL;
2054 }
2055 if (*abspath_p == NULL) {
2056 return 0;
2057 }
2058
2059 wchar_t *abspath = *abspath_p;
2060 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2061 abspath += cwd_len;
2062
2063 *abspath = (wchar_t)SEP;
2064 abspath++;
2065
2066 memcpy(abspath, path, path_len * sizeof(wchar_t));
2067 abspath += path_len;
2068
2069 *abspath = 0;
2070 return 0;
2071 #endif
2072 }
2073
2074
2075 /* Get the current directory. buflen is the buffer size in wide characters
2076 including the null character. Decode the path from the locale encoding.
2077
2078 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2079 too short. */
2080 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2081 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2082 {
2083 #ifdef MS_WINDOWS
2084 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2085 return _wgetcwd(buf, ibuflen);
2086 #else
2087 char fname[MAXPATHLEN];
2088 wchar_t *wname;
2089 size_t len;
2090
2091 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2092 return NULL;
2093 wname = Py_DecodeLocale(fname, &len);
2094 if (wname == NULL)
2095 return NULL;
2096 /* wname must have space to store the trailing NUL character */
2097 if (buflen <= len) {
2098 PyMem_RawFree(wname);
2099 return NULL;
2100 }
2101 wcsncpy(buf, wname, buflen);
2102 PyMem_RawFree(wname);
2103 return buf;
2104 #endif
2105 }
2106
2107 /* Duplicate a file descriptor. The new file descriptor is created as
2108 non-inheritable. Return a new file descriptor on success, raise an OSError
2109 exception and return -1 on error.
2110
2111 The GIL is released to call dup(). The caller must hold the GIL. */
2112 int
_Py_dup(int fd)2113 _Py_dup(int fd)
2114 {
2115 #ifdef MS_WINDOWS
2116 HANDLE handle;
2117 #endif
2118
2119 assert(PyGILState_Check());
2120
2121 #ifdef MS_WINDOWS
2122 handle = _Py_get_osfhandle(fd);
2123 if (handle == INVALID_HANDLE_VALUE)
2124 return -1;
2125
2126 Py_BEGIN_ALLOW_THREADS
2127 _Py_BEGIN_SUPPRESS_IPH
2128 fd = dup(fd);
2129 _Py_END_SUPPRESS_IPH
2130 Py_END_ALLOW_THREADS
2131 if (fd < 0) {
2132 PyErr_SetFromErrno(PyExc_OSError);
2133 return -1;
2134 }
2135
2136 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2137 _Py_BEGIN_SUPPRESS_IPH
2138 close(fd);
2139 _Py_END_SUPPRESS_IPH
2140 return -1;
2141 }
2142 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2143 Py_BEGIN_ALLOW_THREADS
2144 _Py_BEGIN_SUPPRESS_IPH
2145 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2146 _Py_END_SUPPRESS_IPH
2147 Py_END_ALLOW_THREADS
2148 if (fd < 0) {
2149 PyErr_SetFromErrno(PyExc_OSError);
2150 return -1;
2151 }
2152
2153 #else
2154 Py_BEGIN_ALLOW_THREADS
2155 _Py_BEGIN_SUPPRESS_IPH
2156 fd = dup(fd);
2157 _Py_END_SUPPRESS_IPH
2158 Py_END_ALLOW_THREADS
2159 if (fd < 0) {
2160 PyErr_SetFromErrno(PyExc_OSError);
2161 return -1;
2162 }
2163
2164 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2165 _Py_BEGIN_SUPPRESS_IPH
2166 close(fd);
2167 _Py_END_SUPPRESS_IPH
2168 return -1;
2169 }
2170 #endif
2171 return fd;
2172 }
2173
2174 #ifndef MS_WINDOWS
2175 /* Get the blocking mode of the file descriptor.
2176 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2177 raise an exception and return -1 on error. */
2178 int
_Py_get_blocking(int fd)2179 _Py_get_blocking(int fd)
2180 {
2181 int flags;
2182 _Py_BEGIN_SUPPRESS_IPH
2183 flags = fcntl(fd, F_GETFL, 0);
2184 _Py_END_SUPPRESS_IPH
2185 if (flags < 0) {
2186 PyErr_SetFromErrno(PyExc_OSError);
2187 return -1;
2188 }
2189
2190 return !(flags & O_NONBLOCK);
2191 }
2192
2193 /* Set the blocking mode of the specified file descriptor.
2194
2195 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2196 otherwise.
2197
2198 Return 0 on success, raise an exception and return -1 on error. */
2199 int
_Py_set_blocking(int fd,int blocking)2200 _Py_set_blocking(int fd, int blocking)
2201 {
2202 /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2203 Use fcntl() instead. */
2204 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2205 int arg = !blocking;
2206 if (ioctl(fd, FIONBIO, &arg) < 0)
2207 goto error;
2208 #else
2209 int flags, res;
2210
2211 _Py_BEGIN_SUPPRESS_IPH
2212 flags = fcntl(fd, F_GETFL, 0);
2213 if (flags >= 0) {
2214 if (blocking)
2215 flags = flags & (~O_NONBLOCK);
2216 else
2217 flags = flags | O_NONBLOCK;
2218
2219 res = fcntl(fd, F_SETFL, flags);
2220 } else {
2221 res = -1;
2222 }
2223 _Py_END_SUPPRESS_IPH
2224
2225 if (res < 0)
2226 goto error;
2227 #endif
2228 return 0;
2229
2230 error:
2231 PyErr_SetFromErrno(PyExc_OSError);
2232 return -1;
2233 }
2234 #else /* MS_WINDOWS */
2235 void*
_Py_get_osfhandle_noraise(int fd)2236 _Py_get_osfhandle_noraise(int fd)
2237 {
2238 void *handle;
2239 _Py_BEGIN_SUPPRESS_IPH
2240 handle = (void*)_get_osfhandle(fd);
2241 _Py_END_SUPPRESS_IPH
2242 return handle;
2243 }
2244
2245 void*
_Py_get_osfhandle(int fd)2246 _Py_get_osfhandle(int fd)
2247 {
2248 void *handle = _Py_get_osfhandle_noraise(fd);
2249 if (handle == INVALID_HANDLE_VALUE)
2250 PyErr_SetFromErrno(PyExc_OSError);
2251
2252 return handle;
2253 }
2254
2255 int
_Py_open_osfhandle_noraise(void * handle,int flags)2256 _Py_open_osfhandle_noraise(void *handle, int flags)
2257 {
2258 int fd;
2259 _Py_BEGIN_SUPPRESS_IPH
2260 fd = _open_osfhandle((intptr_t)handle, flags);
2261 _Py_END_SUPPRESS_IPH
2262 return fd;
2263 }
2264
2265 int
_Py_open_osfhandle(void * handle,int flags)2266 _Py_open_osfhandle(void *handle, int flags)
2267 {
2268 int fd = _Py_open_osfhandle_noraise(handle, flags);
2269 if (fd == -1)
2270 PyErr_SetFromErrno(PyExc_OSError);
2271
2272 return fd;
2273 }
2274 #endif /* MS_WINDOWS */
2275
2276 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2277 _Py_GetLocaleconvNumeric(struct lconv *lc,
2278 PyObject **decimal_point, PyObject **thousands_sep)
2279 {
2280 assert(decimal_point != NULL);
2281 assert(thousands_sep != NULL);
2282
2283 #ifndef MS_WINDOWS
2284 int change_locale = 0;
2285 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2286 change_locale = 1;
2287 }
2288 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2289 change_locale = 1;
2290 }
2291
2292 /* Keep a copy of the LC_CTYPE locale */
2293 char *oldloc = NULL, *loc = NULL;
2294 if (change_locale) {
2295 oldloc = setlocale(LC_CTYPE, NULL);
2296 if (!oldloc) {
2297 PyErr_SetString(PyExc_RuntimeWarning,
2298 "failed to get LC_CTYPE locale");
2299 return -1;
2300 }
2301
2302 oldloc = _PyMem_Strdup(oldloc);
2303 if (!oldloc) {
2304 PyErr_NoMemory();
2305 return -1;
2306 }
2307
2308 loc = setlocale(LC_NUMERIC, NULL);
2309 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2310 loc = NULL;
2311 }
2312
2313 if (loc != NULL) {
2314 /* Only set the locale temporarily the LC_CTYPE locale
2315 if LC_NUMERIC locale is different than LC_CTYPE locale and
2316 decimal_point and/or thousands_sep are non-ASCII or longer than
2317 1 byte */
2318 setlocale(LC_CTYPE, loc);
2319 }
2320 }
2321
2322 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2323 #else /* MS_WINDOWS */
2324 /* Use _W_* fields of Windows strcut lconv */
2325 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2326 #endif /* MS_WINDOWS */
2327
2328 int res = -1;
2329
2330 *decimal_point = GET_LOCALE_STRING(decimal_point);
2331 if (*decimal_point == NULL) {
2332 goto done;
2333 }
2334
2335 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2336 if (*thousands_sep == NULL) {
2337 goto done;
2338 }
2339
2340 res = 0;
2341
2342 done:
2343 #ifndef MS_WINDOWS
2344 if (loc != NULL) {
2345 setlocale(LC_CTYPE, oldloc);
2346 }
2347 PyMem_Free(oldloc);
2348 #endif
2349 return res;
2350
2351 #undef GET_LOCALE_STRING
2352 }
2353
2354 /* Our selection logic for which function to use is as follows:
2355 * 1. If close_range(2) is available, always prefer that; it's better for
2356 * contiguous ranges like this than fdwalk(3) which entails iterating over
2357 * the entire fd space and simply doing nothing for those outside the range.
2358 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2359 * closing up to sysconf(_SC_OPEN_MAX).
2360 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2361 * as that will be more performant if the range happens to have any chunk of
2362 * non-opened fd in the middle.
2363 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2364 */
2365 #ifdef __FreeBSD__
2366 # define USE_CLOSEFROM
2367 #endif /* __FreeBSD__ */
2368
2369 #ifdef HAVE_FDWALK
2370 # define USE_FDWALK
2371 #endif /* HAVE_FDWALK */
2372
2373 #ifdef USE_FDWALK
2374 static int
_fdwalk_close_func(void * lohi,int fd)2375 _fdwalk_close_func(void *lohi, int fd)
2376 {
2377 int lo = ((int *)lohi)[0];
2378 int hi = ((int *)lohi)[1];
2379
2380 if (fd >= hi) {
2381 return 1;
2382 }
2383 else if (fd >= lo) {
2384 /* Ignore errors */
2385 (void)close(fd);
2386 }
2387 return 0;
2388 }
2389 #endif /* USE_FDWALK */
2390
2391 /* Closes all file descriptors in [first, last], ignoring errors. */
2392 void
_Py_closerange(int first,int last)2393 _Py_closerange(int first, int last)
2394 {
2395 first = Py_MAX(first, 0);
2396 _Py_BEGIN_SUPPRESS_IPH
2397 #ifdef HAVE_CLOSE_RANGE
2398 if (close_range(first, last, 0) == 0 || errno != ENOSYS) {
2399 /* Any errors encountered while closing file descriptors are ignored;
2400 * ENOSYS means no kernel support, though,
2401 * so we'll fallback to the other methods. */
2402 }
2403 else
2404 #endif /* HAVE_CLOSE_RANGE */
2405 #ifdef USE_CLOSEFROM
2406 if (last >= sysconf(_SC_OPEN_MAX)) {
2407 /* Any errors encountered while closing file descriptors are ignored */
2408 closefrom(first);
2409 }
2410 else
2411 #endif /* USE_CLOSEFROM */
2412 #ifdef USE_FDWALK
2413 {
2414 int lohi[2];
2415 lohi[0] = first;
2416 lohi[1] = last + 1;
2417 fdwalk(_fdwalk_close_func, lohi);
2418 }
2419 #else
2420 {
2421 for (int i = first; i <= last; i++) {
2422 /* Ignore errors */
2423 (void)close(i);
2424 }
2425 }
2426 #endif /* USE_FDWALK */
2427 _Py_END_SUPPRESS_IPH
2428 }
2429