• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_fileutils.h"
3 #include "osdefs.h"
4 #include <locale.h>
5 
6 #ifdef MS_WINDOWS
7 #  include <malloc.h>
8 #  include <windows.h>
9 extern int winerror_to_errno(int);
10 #endif
11 
12 #ifdef HAVE_LANGINFO_H
13 #include <langinfo.h>
14 #endif
15 
16 #ifdef HAVE_SYS_IOCTL_H
17 #include <sys/ioctl.h>
18 #endif
19 
20 #ifdef HAVE_FCNTL_H
21 #include <fcntl.h>
22 #endif /* HAVE_FCNTL_H */
23 
24 #ifdef O_CLOEXEC
25 /* Does open() support the O_CLOEXEC flag? Possible values:
26 
27    -1: unknown
28     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
29     1: open() supports O_CLOEXEC flag, close-on-exec is set
30 
31    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
32    and os.open(). */
33 int _Py_open_cloexec_works = -1;
34 #endif
35 
36 
37 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)38 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
39 {
40     switch (errors)
41     {
42     case _Py_ERROR_STRICT:
43         *surrogateescape = 0;
44         return 0;
45     case _Py_ERROR_SURROGATEESCAPE:
46         *surrogateescape = 1;
47         return 0;
48     default:
49         return -1;
50     }
51 }
52 
53 
54 PyObject *
_Py_device_encoding(int fd)55 _Py_device_encoding(int fd)
56 {
57 #if defined(MS_WINDOWS)
58     UINT cp;
59 #endif
60     int valid;
61     _Py_BEGIN_SUPPRESS_IPH
62     valid = isatty(fd);
63     _Py_END_SUPPRESS_IPH
64     if (!valid)
65         Py_RETURN_NONE;
66 
67 #if defined(MS_WINDOWS)
68     if (fd == 0)
69         cp = GetConsoleCP();
70     else if (fd == 1 || fd == 2)
71         cp = GetConsoleOutputCP();
72     else
73         cp = 0;
74     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
75        has no console */
76     if (cp != 0)
77         return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
78 #elif defined(CODESET)
79     {
80         char *codeset = nl_langinfo(CODESET);
81         if (codeset != NULL && codeset[0] != 0)
82             return PyUnicode_FromString(codeset);
83     }
84 #endif
85     Py_RETURN_NONE;
86 }
87 
88 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
89 
90 #define USE_FORCE_ASCII
91 
92 extern int _Py_normalize_encoding(const char *, char *, size_t);
93 
94 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
95    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
96    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
97    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
98    locale.getpreferredencoding() codec. For example, if command line arguments
99    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
100    UnicodeEncodeError instead of retrieving the original byte string.
101 
102    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
103    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
104    one byte in range 0x80-0xff can be decoded from the locale encoding. The
105    workaround is also enabled on error, for example if getting the locale
106    failed.
107 
108    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
109    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
110    ASCII encoding in this case.
111 
112    Values of force_ascii:
113 
114        1: the workaround is used: Py_EncodeLocale() uses
115           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
116           decode_ascii()
117        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
118           Py_DecodeLocale() uses mbstowcs()
119       -1: unknown, need to call check_force_ascii() to get the value
120 */
121 static int force_ascii = -1;
122 
123 static int
check_force_ascii(void)124 check_force_ascii(void)
125 {
126     char *loc = setlocale(LC_CTYPE, NULL);
127     if (loc == NULL) {
128         goto error;
129     }
130     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
131         /* the LC_CTYPE locale is different than C and POSIX */
132         return 0;
133     }
134 
135 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
136     const char *codeset = nl_langinfo(CODESET);
137     if (!codeset || codeset[0] == '\0') {
138         /* CODESET is not set or empty */
139         goto error;
140     }
141 
142     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
143     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
144         goto error;
145     }
146 
147 #ifdef __hpux
148     if (strcmp(encoding, "roman8") == 0) {
149         unsigned char ch;
150         wchar_t wch;
151         size_t res;
152 
153         ch = (unsigned char)0xA7;
154         res = mbstowcs(&wch, (char*)&ch, 1);
155         if (res != (size_t)-1 && wch == L'\xA7') {
156             /* On HP-UX withe C locale or the POSIX locale,
157                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
158                Latin1 encoding in practice. Force ASCII in this case.
159 
160                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
161             return 1;
162         }
163     }
164 #else
165     const char* ascii_aliases[] = {
166         "ascii",
167         /* Aliases from Lib/encodings/aliases.py */
168         "646",
169         "ansi_x3.4_1968",
170         "ansi_x3.4_1986",
171         "ansi_x3_4_1968",
172         "cp367",
173         "csascii",
174         "ibm367",
175         "iso646_us",
176         "iso_646.irv_1991",
177         "iso_ir_6",
178         "us",
179         "us_ascii",
180         NULL
181     };
182 
183     int is_ascii = 0;
184     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
185         if (strcmp(encoding, *alias) == 0) {
186             is_ascii = 1;
187             break;
188         }
189     }
190     if (!is_ascii) {
191         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
192         return 0;
193     }
194 
195     for (unsigned int i=0x80; i<=0xff; i++) {
196         char ch[1];
197         wchar_t wch[1];
198         size_t res;
199 
200         unsigned uch = (unsigned char)i;
201         ch[0] = (char)uch;
202         res = mbstowcs(wch, ch, 1);
203         if (res != (size_t)-1) {
204             /* decoding a non-ASCII character from the locale encoding succeed:
205                the locale encoding is not ASCII, force ASCII */
206             return 1;
207         }
208     }
209     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
210        encoding: the locale encoding is really ASCII */
211 #endif   /* !defined(__hpux) */
212     return 0;
213 #else
214     /* nl_langinfo(CODESET) is not available: always force ASCII */
215     return 1;
216 #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
217 
218 error:
219     /* if an error occurred, force the ASCII encoding */
220     return 1;
221 }
222 
223 
224 int
_Py_GetForceASCII(void)225 _Py_GetForceASCII(void)
226 {
227     if (force_ascii == -1) {
228         force_ascii = check_force_ascii();
229     }
230     return force_ascii;
231 }
232 
233 
234 void
_Py_ResetForceASCII(void)235 _Py_ResetForceASCII(void)
236 {
237     force_ascii = -1;
238 }
239 
240 
241 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)242 encode_ascii(const wchar_t *text, char **str,
243              size_t *error_pos, const char **reason,
244              int raw_malloc, _Py_error_handler errors)
245 {
246     char *result = NULL, *out;
247     size_t len, i;
248     wchar_t ch;
249 
250     int surrogateescape;
251     if (get_surrogateescape(errors, &surrogateescape) < 0) {
252         return -3;
253     }
254 
255     len = wcslen(text);
256 
257     /* +1 for NULL byte */
258     if (raw_malloc) {
259         result = PyMem_RawMalloc(len + 1);
260     }
261     else {
262         result = PyMem_Malloc(len + 1);
263     }
264     if (result == NULL) {
265         return -1;
266     }
267 
268     out = result;
269     for (i=0; i<len; i++) {
270         ch = text[i];
271 
272         if (ch <= 0x7f) {
273             /* ASCII character */
274             *out++ = (char)ch;
275         }
276         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
277             /* UTF-8b surrogate */
278             *out++ = (char)(ch - 0xdc00);
279         }
280         else {
281             if (raw_malloc) {
282                 PyMem_RawFree(result);
283             }
284             else {
285                 PyMem_Free(result);
286             }
287             if (error_pos != NULL) {
288                 *error_pos = i;
289             }
290             if (reason) {
291                 *reason = "encoding error";
292             }
293             return -2;
294         }
295     }
296     *out = '\0';
297     *str = result;
298     return 0;
299 }
300 #else
301 int
_Py_GetForceASCII(void)302 _Py_GetForceASCII(void)
303 {
304     return 0;
305 }
306 
307 void
_Py_ResetForceASCII(void)308 _Py_ResetForceASCII(void)
309 {
310     /* nothing to do */
311 }
312 #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
313 
314 
315 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
316 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)317 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
318              const char **reason, _Py_error_handler errors)
319 {
320     wchar_t *res;
321     unsigned char *in;
322     wchar_t *out;
323     size_t argsize = strlen(arg) + 1;
324 
325     int surrogateescape;
326     if (get_surrogateescape(errors, &surrogateescape) < 0) {
327         return -3;
328     }
329 
330     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
331         return -1;
332     }
333     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
334     if (!res) {
335         return -1;
336     }
337 
338     out = res;
339     for (in = (unsigned char*)arg; *in; in++) {
340         unsigned char ch = *in;
341         if (ch < 128) {
342             *out++ = ch;
343         }
344         else {
345             if (!surrogateescape) {
346                 PyMem_RawFree(res);
347                 if (wlen) {
348                     *wlen = in - (unsigned char*)arg;
349                 }
350                 if (reason) {
351                     *reason = "decoding error";
352                 }
353                 return -2;
354             }
355             *out++ = 0xdc00 + ch;
356         }
357     }
358     *out = 0;
359 
360     if (wlen != NULL) {
361         *wlen = out - res;
362     }
363     *wstr = res;
364     return 0;
365 }
366 #endif   /* !HAVE_MBRTOWC */
367 
368 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)369 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
370                       const char **reason, _Py_error_handler errors)
371 {
372     wchar_t *res;
373     size_t argsize;
374     size_t count;
375 #ifdef HAVE_MBRTOWC
376     unsigned char *in;
377     wchar_t *out;
378     mbstate_t mbs;
379 #endif
380 
381     int surrogateescape;
382     if (get_surrogateescape(errors, &surrogateescape) < 0) {
383         return -3;
384     }
385 
386 #ifdef HAVE_BROKEN_MBSTOWCS
387     /* Some platforms have a broken implementation of
388      * mbstowcs which does not count the characters that
389      * would result from conversion.  Use an upper bound.
390      */
391     argsize = strlen(arg);
392 #else
393     argsize = mbstowcs(NULL, arg, 0);
394 #endif
395     if (argsize != (size_t)-1) {
396         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
397             return -1;
398         }
399         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
400         if (!res) {
401             return -1;
402         }
403 
404         count = mbstowcs(res, arg, argsize + 1);
405         if (count != (size_t)-1) {
406             wchar_t *tmp;
407             /* Only use the result if it contains no
408                surrogate characters. */
409             for (tmp = res; *tmp != 0 &&
410                          !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
411                 ;
412             if (*tmp == 0) {
413                 if (wlen != NULL) {
414                     *wlen = count;
415                 }
416                 *wstr = res;
417                 return 0;
418             }
419         }
420         PyMem_RawFree(res);
421     }
422 
423     /* Conversion failed. Fall back to escaping with surrogateescape. */
424 #ifdef HAVE_MBRTOWC
425     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
426 
427     /* Overallocate; as multi-byte characters are in the argument, the
428        actual output could use less memory. */
429     argsize = strlen(arg) + 1;
430     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
431         return -1;
432     }
433     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
434     if (!res) {
435         return -1;
436     }
437 
438     in = (unsigned char*)arg;
439     out = res;
440     memset(&mbs, 0, sizeof mbs);
441     while (argsize) {
442         size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
443         if (converted == 0) {
444             /* Reached end of string; null char stored. */
445             break;
446         }
447 
448         if (converted == (size_t)-2) {
449             /* Incomplete character. This should never happen,
450                since we provide everything that we have -
451                unless there is a bug in the C library, or I
452                misunderstood how mbrtowc works. */
453             goto decode_error;
454         }
455 
456         if (converted == (size_t)-1) {
457             if (!surrogateescape) {
458                 goto decode_error;
459             }
460 
461             /* Conversion error. Escape as UTF-8b, and start over
462                in the initial shift state. */
463             *out++ = 0xdc00 + *in++;
464             argsize--;
465             memset(&mbs, 0, sizeof mbs);
466             continue;
467         }
468 
469         if (Py_UNICODE_IS_SURROGATE(*out)) {
470             if (!surrogateescape) {
471                 goto decode_error;
472             }
473 
474             /* Surrogate character.  Escape the original
475                byte sequence with surrogateescape. */
476             argsize -= converted;
477             while (converted--) {
478                 *out++ = 0xdc00 + *in++;
479             }
480             continue;
481         }
482         /* successfully converted some bytes */
483         in += converted;
484         argsize -= converted;
485         out++;
486     }
487     if (wlen != NULL) {
488         *wlen = out - res;
489     }
490     *wstr = res;
491     return 0;
492 
493 decode_error:
494     PyMem_RawFree(res);
495     if (wlen) {
496         *wlen = in - (unsigned char*)arg;
497     }
498     if (reason) {
499         *reason = "decoding error";
500     }
501     return -2;
502 #else   /* HAVE_MBRTOWC */
503     /* Cannot use C locale for escaping; manually escape as if charset
504        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
505        correctly in the locale's charset, which must be an ASCII superset. */
506     return decode_ascii(arg, wstr, wlen, reason, errors);
507 #endif   /* HAVE_MBRTOWC */
508 }
509 
510 
511 /* Decode a byte string from the locale encoding.
512 
513    Use the strict error handler if 'surrogateescape' is zero.  Use the
514    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
515    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
516    can be decoded as a surrogate character, escape the bytes using the
517    surrogateescape error handler instead of decoding them.
518 
519    On success, return 0 and write the newly allocated wide character string into
520    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
521    the number of wide characters excluding the null character into *wlen.
522 
523    On memory allocation failure, return -1.
524 
525    On decoding error, return -2. If wlen is not NULL, write the start of
526    invalid byte sequence in the input string into *wlen. If reason is not NULL,
527    write the decoding error message into *reason.
528 
529    Return -3 if the error handler 'errors' is not supported.
530 
531    Use the Py_EncodeLocaleEx() function to encode the character string back to
532    a byte string. */
533 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)534 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
535                    const char **reason,
536                    int current_locale, _Py_error_handler errors)
537 {
538     if (current_locale) {
539 #ifdef _Py_FORCE_UTF8_LOCALE
540         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
541                                 errors);
542 #else
543         return decode_current_locale(arg, wstr, wlen, reason, errors);
544 #endif
545     }
546 
547 #ifdef _Py_FORCE_UTF8_FS_ENCODING
548     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
549                             errors);
550 #else
551     int use_utf8 = (Py_UTF8Mode == 1);
552 #ifdef MS_WINDOWS
553     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
554 #endif
555     if (use_utf8) {
556         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
557                                 errors);
558     }
559 
560 #ifdef USE_FORCE_ASCII
561     if (force_ascii == -1) {
562         force_ascii = check_force_ascii();
563     }
564 
565     if (force_ascii) {
566         /* force ASCII encoding to workaround mbstowcs() issue */
567         return decode_ascii(arg, wstr, wlen, reason, errors);
568     }
569 #endif
570 
571     return decode_current_locale(arg, wstr, wlen, reason, errors);
572 #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
573 }
574 
575 
576 /* Decode a byte string from the locale encoding with the
577    surrogateescape error handler: undecodable bytes are decoded as characters
578    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
579    character, escape the bytes using the surrogateescape error handler instead
580    of decoding them.
581 
582    Return a pointer to a newly allocated wide character string, use
583    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
584    wide characters excluding the null character into *size
585 
586    Return NULL on decoding error or memory allocation error. If *size* is not
587    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
588    decoding error.
589 
590    Decoding errors should never happen, unless there is a bug in the C
591    library.
592 
593    Use the Py_EncodeLocale() function to encode the character string back to a
594    byte string. */
595 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)596 Py_DecodeLocale(const char* arg, size_t *wlen)
597 {
598     wchar_t *wstr;
599     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
600                                  NULL, 0,
601                                  _Py_ERROR_SURROGATEESCAPE);
602     if (res != 0) {
603         assert(res != -3);
604         if (wlen != NULL) {
605             *wlen = (size_t)res;
606         }
607         return NULL;
608     }
609     return wstr;
610 }
611 
612 
613 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)614 encode_current_locale(const wchar_t *text, char **str,
615                       size_t *error_pos, const char **reason,
616                       int raw_malloc, _Py_error_handler errors)
617 {
618     const size_t len = wcslen(text);
619     char *result = NULL, *bytes = NULL;
620     size_t i, size, converted;
621     wchar_t c, buf[2];
622 
623     int surrogateescape;
624     if (get_surrogateescape(errors, &surrogateescape) < 0) {
625         return -3;
626     }
627 
628     /* The function works in two steps:
629        1. compute the length of the output buffer in bytes (size)
630        2. outputs the bytes */
631     size = 0;
632     buf[1] = 0;
633     while (1) {
634         for (i=0; i < len; i++) {
635             c = text[i];
636             if (c >= 0xdc80 && c <= 0xdcff) {
637                 if (!surrogateescape) {
638                     goto encode_error;
639                 }
640                 /* UTF-8b surrogate */
641                 if (bytes != NULL) {
642                     *bytes++ = c - 0xdc00;
643                     size--;
644                 }
645                 else {
646                     size++;
647                 }
648                 continue;
649             }
650             else {
651                 buf[0] = c;
652                 if (bytes != NULL) {
653                     converted = wcstombs(bytes, buf, size);
654                 }
655                 else {
656                     converted = wcstombs(NULL, buf, 0);
657                 }
658                 if (converted == (size_t)-1) {
659                     goto encode_error;
660                 }
661                 if (bytes != NULL) {
662                     bytes += converted;
663                     size -= converted;
664                 }
665                 else {
666                     size += converted;
667                 }
668             }
669         }
670         if (result != NULL) {
671             *bytes = '\0';
672             break;
673         }
674 
675         size += 1; /* nul byte at the end */
676         if (raw_malloc) {
677             result = PyMem_RawMalloc(size);
678         }
679         else {
680             result = PyMem_Malloc(size);
681         }
682         if (result == NULL) {
683             return -1;
684         }
685         bytes = result;
686     }
687     *str = result;
688     return 0;
689 
690 encode_error:
691     if (raw_malloc) {
692         PyMem_RawFree(result);
693     }
694     else {
695         PyMem_Free(result);
696     }
697     if (error_pos != NULL) {
698         *error_pos = i;
699     }
700     if (reason) {
701         *reason = "encoding error";
702     }
703     return -2;
704 }
705 
706 
707 /* Encode a string to the locale encoding.
708 
709    Parameters:
710 
711    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
712      of PyMem_Malloc().
713    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
714      Python filesystem encoding.
715    * errors: error handler like "strict" or "surrogateescape".
716 
717    Return value:
718 
719     0: success, *str is set to a newly allocated decoded string.
720    -1: memory allocation failure
721    -2: encoding error, set *error_pos and *reason (if set).
722    -3: the error handler 'errors' is not supported.
723  */
724 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)725 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
726                  const char **reason,
727                  int raw_malloc, int current_locale, _Py_error_handler errors)
728 {
729     if (current_locale) {
730 #ifdef _Py_FORCE_UTF8_LOCALE
731         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
732                                 raw_malloc, errors);
733 #else
734         return encode_current_locale(text, str, error_pos, reason,
735                                      raw_malloc, errors);
736 #endif
737     }
738 
739 #ifdef _Py_FORCE_UTF8_FS_ENCODING
740     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
741                             raw_malloc, errors);
742 #else
743     int use_utf8 = (Py_UTF8Mode == 1);
744 #ifdef MS_WINDOWS
745     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
746 #endif
747     if (use_utf8) {
748         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
749                                 raw_malloc, errors);
750     }
751 
752 #ifdef USE_FORCE_ASCII
753     if (force_ascii == -1) {
754         force_ascii = check_force_ascii();
755     }
756 
757     if (force_ascii) {
758         return encode_ascii(text, str, error_pos, reason,
759                             raw_malloc, errors);
760     }
761 #endif
762 
763     return encode_current_locale(text, str, error_pos, reason,
764                                  raw_malloc, errors);
765 #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
766 }
767 
768 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)769 encode_locale(const wchar_t *text, size_t *error_pos,
770               int raw_malloc, int current_locale)
771 {
772     char *str;
773     int res = encode_locale_ex(text, &str, error_pos, NULL,
774                                raw_malloc, current_locale,
775                                _Py_ERROR_SURROGATEESCAPE);
776     if (res != -2 && error_pos) {
777         *error_pos = (size_t)-1;
778     }
779     if (res != 0) {
780         return NULL;
781     }
782     return str;
783 }
784 
785 /* Encode a wide character string to the locale encoding with the
786    surrogateescape error handler: surrogate characters in the range
787    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
788 
789    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
790    the memory. Return NULL on encoding or memory allocation error.
791 
792    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
793    to the index of the invalid character on encoding error.
794 
795    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
796    character string. */
797 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)798 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
799 {
800     return encode_locale(text, error_pos, 0, 0);
801 }
802 
803 
804 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
805    instead of PyMem_Free(). */
806 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)807 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
808 {
809     return encode_locale(text, error_pos, 1, 0);
810 }
811 
812 
813 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)814 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
815                    size_t *error_pos, const char **reason,
816                    int current_locale, _Py_error_handler errors)
817 {
818     return encode_locale_ex(text, str, error_pos, reason, 1,
819                             current_locale, errors);
820 }
821 
822 
823 #ifdef MS_WINDOWS
824 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
825 
826 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)827 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
828 {
829     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
830     /* Cannot simply cast and dereference in_ptr,
831        since it might not be aligned properly */
832     __int64 in;
833     memcpy(&in, in_ptr, sizeof(in));
834     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
835     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
836 }
837 
838 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)839 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
840 {
841     /* XXX endianness */
842     __int64 out;
843     out = time_in + secs_between_epochs;
844     out = out * 10000000 + nsec_in / 100;
845     memcpy(out_ptr, &out, sizeof(out));
846 }
847 
848 /* Below, we *know* that ugo+r is 0444 */
849 #if _S_IREAD != 0400
850 #error Unsupported C library
851 #endif
852 static int
attributes_to_mode(DWORD attr)853 attributes_to_mode(DWORD attr)
854 {
855     int m = 0;
856     if (attr & FILE_ATTRIBUTE_DIRECTORY)
857         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
858     else
859         m |= _S_IFREG;
860     if (attr & FILE_ATTRIBUTE_READONLY)
861         m |= 0444;
862     else
863         m |= 0666;
864     return m;
865 }
866 
867 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)868 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
869                            struct _Py_stat_struct *result)
870 {
871     memset(result, 0, sizeof(*result));
872     result->st_mode = attributes_to_mode(info->dwFileAttributes);
873     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
874     result->st_dev = info->dwVolumeSerialNumber;
875     result->st_rdev = result->st_dev;
876     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
877     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
878     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
879     result->st_nlink = info->nNumberOfLinks;
880     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
881     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
882        open other name surrogate reparse points without traversing them. To
883        detect/handle these, check st_file_attributes and st_reparse_tag. */
884     result->st_reparse_tag = reparse_tag;
885     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
886         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
887         /* first clear the S_IFMT bits */
888         result->st_mode ^= (result->st_mode & S_IFMT);
889         /* now set the bits that make this a symlink */
890         result->st_mode |= S_IFLNK;
891     }
892     result->st_file_attributes = info->dwFileAttributes;
893 }
894 #endif
895 
896 /* Return information about a file.
897 
898    On POSIX, use fstat().
899 
900    On Windows, use GetFileType() and GetFileInformationByHandle() which support
901    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
902    than 2 GiB because the file size type is a signed 32-bit integer: see issue
903    #23152.
904 
905    On Windows, set the last Windows error and return nonzero on error. On
906    POSIX, set errno and return nonzero on error. Fill status and return 0 on
907    success. */
908 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)909 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
910 {
911 #ifdef MS_WINDOWS
912     BY_HANDLE_FILE_INFORMATION info;
913     HANDLE h;
914     int type;
915 
916     _Py_BEGIN_SUPPRESS_IPH
917     h = (HANDLE)_get_osfhandle(fd);
918     _Py_END_SUPPRESS_IPH
919 
920     if (h == INVALID_HANDLE_VALUE) {
921         /* errno is already set by _get_osfhandle, but we also set
922            the Win32 error for callers who expect that */
923         SetLastError(ERROR_INVALID_HANDLE);
924         return -1;
925     }
926     memset(status, 0, sizeof(*status));
927 
928     type = GetFileType(h);
929     if (type == FILE_TYPE_UNKNOWN) {
930         DWORD error = GetLastError();
931         if (error != 0) {
932             errno = winerror_to_errno(error);
933             return -1;
934         }
935         /* else: valid but unknown file */
936     }
937 
938     if (type != FILE_TYPE_DISK) {
939         if (type == FILE_TYPE_CHAR)
940             status->st_mode = _S_IFCHR;
941         else if (type == FILE_TYPE_PIPE)
942             status->st_mode = _S_IFIFO;
943         return 0;
944     }
945 
946     if (!GetFileInformationByHandle(h, &info)) {
947         /* The Win32 error is already set, but we also set errno for
948            callers who expect it */
949         errno = winerror_to_errno(GetLastError());
950         return -1;
951     }
952 
953     _Py_attribute_data_to_stat(&info, 0, status);
954     /* specific to fstat() */
955     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
956     return 0;
957 #else
958     return fstat(fd, status);
959 #endif
960 }
961 
962 /* Return information about a file.
963 
964    On POSIX, use fstat().
965 
966    On Windows, use GetFileType() and GetFileInformationByHandle() which support
967    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
968    than 2 GiB because the file size type is a signed 32-bit integer: see issue
969    #23152.
970 
971    Raise an exception and return -1 on error. On Windows, set the last Windows
972    error on error. On POSIX, set errno on error. Fill status and return 0 on
973    success.
974 
975    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
976    to call fstat(). The caller must hold the GIL. */
977 int
_Py_fstat(int fd,struct _Py_stat_struct * status)978 _Py_fstat(int fd, struct _Py_stat_struct *status)
979 {
980     int res;
981 
982     assert(PyGILState_Check());
983 
984     Py_BEGIN_ALLOW_THREADS
985     res = _Py_fstat_noraise(fd, status);
986     Py_END_ALLOW_THREADS
987 
988     if (res != 0) {
989 #ifdef MS_WINDOWS
990         PyErr_SetFromWindowsErr(0);
991 #else
992         PyErr_SetFromErrno(PyExc_OSError);
993 #endif
994         return -1;
995     }
996     return 0;
997 }
998 
999 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1000    call stat() otherwise. Only fill st_mode attribute on Windows.
1001 
1002    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1003    raised. */
1004 
1005 int
_Py_stat(PyObject * path,struct stat * statbuf)1006 _Py_stat(PyObject *path, struct stat *statbuf)
1007 {
1008 #ifdef MS_WINDOWS
1009     int err;
1010     struct _stat wstatbuf;
1011     const wchar_t *wpath;
1012 
1013     wpath = _PyUnicode_AsUnicode(path);
1014     if (wpath == NULL)
1015         return -2;
1016 
1017     err = _wstat(wpath, &wstatbuf);
1018     if (!err)
1019         statbuf->st_mode = wstatbuf.st_mode;
1020     return err;
1021 #else
1022     int ret;
1023     PyObject *bytes;
1024     char *cpath;
1025 
1026     bytes = PyUnicode_EncodeFSDefault(path);
1027     if (bytes == NULL)
1028         return -2;
1029 
1030     /* check for embedded null bytes */
1031     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1032         Py_DECREF(bytes);
1033         return -2;
1034     }
1035 
1036     ret = stat(cpath, statbuf);
1037     Py_DECREF(bytes);
1038     return ret;
1039 #endif
1040 }
1041 
1042 
1043 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1044 static int
get_inheritable(int fd,int raise)1045 get_inheritable(int fd, int raise)
1046 {
1047 #ifdef MS_WINDOWS
1048     HANDLE handle;
1049     DWORD flags;
1050 
1051     _Py_BEGIN_SUPPRESS_IPH
1052     handle = (HANDLE)_get_osfhandle(fd);
1053     _Py_END_SUPPRESS_IPH
1054     if (handle == INVALID_HANDLE_VALUE) {
1055         if (raise)
1056             PyErr_SetFromErrno(PyExc_OSError);
1057         return -1;
1058     }
1059 
1060     if (!GetHandleInformation(handle, &flags)) {
1061         if (raise)
1062             PyErr_SetFromWindowsErr(0);
1063         return -1;
1064     }
1065 
1066     return (flags & HANDLE_FLAG_INHERIT);
1067 #else
1068     int flags;
1069 
1070     flags = fcntl(fd, F_GETFD, 0);
1071     if (flags == -1) {
1072         if (raise)
1073             PyErr_SetFromErrno(PyExc_OSError);
1074         return -1;
1075     }
1076     return !(flags & FD_CLOEXEC);
1077 #endif
1078 }
1079 
1080 /* Get the inheritable flag of the specified file descriptor.
1081    Return 1 if the file descriptor can be inherited, 0 if it cannot,
1082    raise an exception and return -1 on error. */
1083 int
_Py_get_inheritable(int fd)1084 _Py_get_inheritable(int fd)
1085 {
1086     return get_inheritable(fd, 1);
1087 }
1088 
1089 
1090 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1091 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1092 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1093 {
1094 #ifdef MS_WINDOWS
1095     HANDLE handle;
1096     DWORD flags;
1097 #else
1098 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1099     static int ioctl_works = -1;
1100     int request;
1101     int err;
1102 #endif
1103     int flags, new_flags;
1104     int res;
1105 #endif
1106 
1107     /* atomic_flag_works can only be used to make the file descriptor
1108        non-inheritable */
1109     assert(!(atomic_flag_works != NULL && inheritable));
1110 
1111     if (atomic_flag_works != NULL && !inheritable) {
1112         if (*atomic_flag_works == -1) {
1113             int isInheritable = get_inheritable(fd, raise);
1114             if (isInheritable == -1)
1115                 return -1;
1116             *atomic_flag_works = !isInheritable;
1117         }
1118 
1119         if (*atomic_flag_works)
1120             return 0;
1121     }
1122 
1123 #ifdef MS_WINDOWS
1124     _Py_BEGIN_SUPPRESS_IPH
1125     handle = (HANDLE)_get_osfhandle(fd);
1126     _Py_END_SUPPRESS_IPH
1127     if (handle == INVALID_HANDLE_VALUE) {
1128         if (raise)
1129             PyErr_SetFromErrno(PyExc_OSError);
1130         return -1;
1131     }
1132 
1133     if (inheritable)
1134         flags = HANDLE_FLAG_INHERIT;
1135     else
1136         flags = 0;
1137 
1138     /* This check can be removed once support for Windows 7 ends. */
1139 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1140         GetFileType(handle) == FILE_TYPE_CHAR)
1141 
1142     if (!CONSOLE_PSEUDOHANDLE(handle) &&
1143         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1144         if (raise)
1145             PyErr_SetFromWindowsErr(0);
1146         return -1;
1147     }
1148 #undef CONSOLE_PSEUDOHANDLE
1149     return 0;
1150 
1151 #else
1152 
1153 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1154     if (ioctl_works != 0 && raise != 0) {
1155         /* fast-path: ioctl() only requires one syscall */
1156         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1157          * thus avoid using ioctl() so we skip the fast-path. */
1158         if (inheritable)
1159             request = FIONCLEX;
1160         else
1161             request = FIOCLEX;
1162         err = ioctl(fd, request, NULL);
1163         if (!err) {
1164             ioctl_works = 1;
1165             return 0;
1166         }
1167 
1168         if (errno != ENOTTY && errno != EACCES) {
1169             if (raise)
1170                 PyErr_SetFromErrno(PyExc_OSError);
1171             return -1;
1172         }
1173         else {
1174             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1175                device". The ioctl is declared but not supported by the kernel.
1176                Remember that ioctl() doesn't work. It is the case on
1177                Illumos-based OS for example.
1178 
1179                Issue #27057: When SELinux policy disallows ioctl it will fail
1180                with EACCES. While FIOCLEX is safe operation it may be
1181                unavailable because ioctl was denied altogether.
1182                This can be the case on Android. */
1183             ioctl_works = 0;
1184         }
1185         /* fallback to fcntl() if ioctl() does not work */
1186     }
1187 #endif
1188 
1189     /* slow-path: fcntl() requires two syscalls */
1190     flags = fcntl(fd, F_GETFD);
1191     if (flags < 0) {
1192         if (raise)
1193             PyErr_SetFromErrno(PyExc_OSError);
1194         return -1;
1195     }
1196 
1197     if (inheritable) {
1198         new_flags = flags & ~FD_CLOEXEC;
1199     }
1200     else {
1201         new_flags = flags | FD_CLOEXEC;
1202     }
1203 
1204     if (new_flags == flags) {
1205         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1206         return 0;
1207     }
1208 
1209     res = fcntl(fd, F_SETFD, new_flags);
1210     if (res < 0) {
1211         if (raise)
1212             PyErr_SetFromErrno(PyExc_OSError);
1213         return -1;
1214     }
1215     return 0;
1216 #endif
1217 }
1218 
1219 /* Make the file descriptor non-inheritable.
1220    Return 0 on success, set errno and return -1 on error. */
1221 static int
make_non_inheritable(int fd)1222 make_non_inheritable(int fd)
1223 {
1224     return set_inheritable(fd, 0, 0, NULL);
1225 }
1226 
1227 /* Set the inheritable flag of the specified file descriptor.
1228    On success: return 0, on error: raise an exception and return -1.
1229 
1230    If atomic_flag_works is not NULL:
1231 
1232     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1233       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1234       set the inheritable flag
1235     * if *atomic_flag_works==1: do nothing
1236     * if *atomic_flag_works==0: set inheritable flag to False
1237 
1238    Set atomic_flag_works to NULL if no atomic flag was used to create the
1239    file descriptor.
1240 
1241    atomic_flag_works can only be used to make a file descriptor
1242    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1243 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1244 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1245 {
1246     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1247 }
1248 
1249 /* Same as _Py_set_inheritable() but on error, set errno and
1250    don't raise an exception.
1251    This function is async-signal-safe. */
1252 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1253 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1254 {
1255     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1256 }
1257 
1258 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1259 _Py_open_impl(const char *pathname, int flags, int gil_held)
1260 {
1261     int fd;
1262     int async_err = 0;
1263 #ifndef MS_WINDOWS
1264     int *atomic_flag_works;
1265 #endif
1266 
1267 #ifdef MS_WINDOWS
1268     flags |= O_NOINHERIT;
1269 #elif defined(O_CLOEXEC)
1270     atomic_flag_works = &_Py_open_cloexec_works;
1271     flags |= O_CLOEXEC;
1272 #else
1273     atomic_flag_works = NULL;
1274 #endif
1275 
1276     if (gil_held) {
1277         if (PySys_Audit("open", "sOi", pathname, Py_None, flags) < 0) {
1278             return -1;
1279         }
1280 
1281         do {
1282             Py_BEGIN_ALLOW_THREADS
1283             fd = open(pathname, flags);
1284             Py_END_ALLOW_THREADS
1285         } while (fd < 0
1286                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1287         if (async_err)
1288             return -1;
1289         if (fd < 0) {
1290             PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1291             return -1;
1292         }
1293     }
1294     else {
1295         fd = open(pathname, flags);
1296         if (fd < 0)
1297             return -1;
1298     }
1299 
1300 #ifndef MS_WINDOWS
1301     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1302         close(fd);
1303         return -1;
1304     }
1305 #endif
1306 
1307     return fd;
1308 }
1309 
1310 /* Open a file with the specified flags (wrapper to open() function).
1311    Return a file descriptor on success. Raise an exception and return -1 on
1312    error.
1313 
1314    The file descriptor is created non-inheritable.
1315 
1316    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1317    except if the Python signal handler raises an exception.
1318 
1319    Release the GIL to call open(). The caller must hold the GIL. */
1320 int
_Py_open(const char * pathname,int flags)1321 _Py_open(const char *pathname, int flags)
1322 {
1323     /* _Py_open() must be called with the GIL held. */
1324     assert(PyGILState_Check());
1325     return _Py_open_impl(pathname, flags, 1);
1326 }
1327 
1328 /* Open a file with the specified flags (wrapper to open() function).
1329    Return a file descriptor on success. Set errno and return -1 on error.
1330 
1331    The file descriptor is created non-inheritable.
1332 
1333    If interrupted by a signal, fail with EINTR. */
1334 int
_Py_open_noraise(const char * pathname,int flags)1335 _Py_open_noraise(const char *pathname, int flags)
1336 {
1337     return _Py_open_impl(pathname, flags, 0);
1338 }
1339 
1340 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1341    encoding and use fopen() otherwise.
1342 
1343    The file descriptor is created non-inheritable.
1344 
1345    If interrupted by a signal, fail with EINTR. */
1346 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1347 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1348 {
1349     FILE *f;
1350     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1351         return NULL;
1352     }
1353 #ifndef MS_WINDOWS
1354     char *cpath;
1355     char cmode[10];
1356     size_t r;
1357     r = wcstombs(cmode, mode, 10);
1358     if (r == (size_t)-1 || r >= 10) {
1359         errno = EINVAL;
1360         return NULL;
1361     }
1362     cpath = _Py_EncodeLocaleRaw(path, NULL);
1363     if (cpath == NULL) {
1364         return NULL;
1365     }
1366     f = fopen(cpath, cmode);
1367     PyMem_RawFree(cpath);
1368 #else
1369     f = _wfopen(path, mode);
1370 #endif
1371     if (f == NULL)
1372         return NULL;
1373     if (make_non_inheritable(fileno(f)) < 0) {
1374         fclose(f);
1375         return NULL;
1376     }
1377     return f;
1378 }
1379 
1380 /* Wrapper to fopen().
1381 
1382    The file descriptor is created non-inheritable.
1383 
1384    If interrupted by a signal, fail with EINTR. */
1385 FILE*
_Py_fopen(const char * pathname,const char * mode)1386 _Py_fopen(const char *pathname, const char *mode)
1387 {
1388     if (PySys_Audit("open", "ssi", pathname, mode, 0) < 0) {
1389         return NULL;
1390     }
1391 
1392     FILE *f = fopen(pathname, mode);
1393     if (f == NULL)
1394         return NULL;
1395     if (make_non_inheritable(fileno(f)) < 0) {
1396         fclose(f);
1397         return NULL;
1398     }
1399     return f;
1400 }
1401 
1402 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1403    encoding and call fopen() otherwise.
1404 
1405    Return the new file object on success. Raise an exception and return NULL
1406    on error.
1407 
1408    The file descriptor is created non-inheritable.
1409 
1410    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1411    except if the Python signal handler raises an exception.
1412 
1413    Release the GIL to call _wfopen() or fopen(). The caller must hold
1414    the GIL. */
1415 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1416 _Py_fopen_obj(PyObject *path, const char *mode)
1417 {
1418     FILE *f;
1419     int async_err = 0;
1420 #ifdef MS_WINDOWS
1421     const wchar_t *wpath;
1422     wchar_t wmode[10];
1423     int usize;
1424 
1425     assert(PyGILState_Check());
1426 
1427     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1428         return NULL;
1429     }
1430     if (!PyUnicode_Check(path)) {
1431         PyErr_Format(PyExc_TypeError,
1432                      "str file path expected under Windows, got %R",
1433                      Py_TYPE(path));
1434         return NULL;
1435     }
1436     wpath = _PyUnicode_AsUnicode(path);
1437     if (wpath == NULL)
1438         return NULL;
1439 
1440     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1441                                 wmode, Py_ARRAY_LENGTH(wmode));
1442     if (usize == 0) {
1443         PyErr_SetFromWindowsErr(0);
1444         return NULL;
1445     }
1446 
1447     do {
1448         Py_BEGIN_ALLOW_THREADS
1449         f = _wfopen(wpath, wmode);
1450         Py_END_ALLOW_THREADS
1451     } while (f == NULL
1452              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1453 #else
1454     PyObject *bytes;
1455     char *path_bytes;
1456 
1457     assert(PyGILState_Check());
1458 
1459     if (!PyUnicode_FSConverter(path, &bytes))
1460         return NULL;
1461     path_bytes = PyBytes_AS_STRING(bytes);
1462 
1463     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1464         return NULL;
1465     }
1466 
1467     do {
1468         Py_BEGIN_ALLOW_THREADS
1469         f = fopen(path_bytes, mode);
1470         Py_END_ALLOW_THREADS
1471     } while (f == NULL
1472              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1473 
1474     Py_DECREF(bytes);
1475 #endif
1476     if (async_err)
1477         return NULL;
1478 
1479     if (f == NULL) {
1480         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1481         return NULL;
1482     }
1483 
1484     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1485         fclose(f);
1486         return NULL;
1487     }
1488     return f;
1489 }
1490 
1491 /* Read count bytes from fd into buf.
1492 
1493    On success, return the number of read bytes, it can be lower than count.
1494    If the current file offset is at or past the end of file, no bytes are read,
1495    and read() returns zero.
1496 
1497    On error, raise an exception, set errno and return -1.
1498 
1499    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1500    If the Python signal handler raises an exception, the function returns -1
1501    (the syscall is not retried).
1502 
1503    Release the GIL to call read(). The caller must hold the GIL. */
1504 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1505 _Py_read(int fd, void *buf, size_t count)
1506 {
1507     Py_ssize_t n;
1508     int err;
1509     int async_err = 0;
1510 
1511     assert(PyGILState_Check());
1512 
1513     /* _Py_read() must not be called with an exception set, otherwise the
1514      * caller may think that read() was interrupted by a signal and the signal
1515      * handler raised an exception. */
1516     assert(!PyErr_Occurred());
1517 
1518     if (count > _PY_READ_MAX) {
1519         count = _PY_READ_MAX;
1520     }
1521 
1522     _Py_BEGIN_SUPPRESS_IPH
1523     do {
1524         Py_BEGIN_ALLOW_THREADS
1525         errno = 0;
1526 #ifdef MS_WINDOWS
1527         n = read(fd, buf, (int)count);
1528 #else
1529         n = read(fd, buf, count);
1530 #endif
1531         /* save/restore errno because PyErr_CheckSignals()
1532          * and PyErr_SetFromErrno() can modify it */
1533         err = errno;
1534         Py_END_ALLOW_THREADS
1535     } while (n < 0 && err == EINTR &&
1536             !(async_err = PyErr_CheckSignals()));
1537     _Py_END_SUPPRESS_IPH
1538 
1539     if (async_err) {
1540         /* read() was interrupted by a signal (failed with EINTR)
1541          * and the Python signal handler raised an exception */
1542         errno = err;
1543         assert(errno == EINTR && PyErr_Occurred());
1544         return -1;
1545     }
1546     if (n < 0) {
1547         PyErr_SetFromErrno(PyExc_OSError);
1548         errno = err;
1549         return -1;
1550     }
1551 
1552     return n;
1553 }
1554 
1555 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1556 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1557 {
1558     Py_ssize_t n;
1559     int err;
1560     int async_err = 0;
1561 
1562     _Py_BEGIN_SUPPRESS_IPH
1563 #ifdef MS_WINDOWS
1564     if (count > 32767 && isatty(fd)) {
1565         /* Issue #11395: the Windows console returns an error (12: not
1566            enough space error) on writing into stdout if stdout mode is
1567            binary and the length is greater than 66,000 bytes (or less,
1568            depending on heap usage). */
1569         count = 32767;
1570     }
1571 #endif
1572     if (count > _PY_WRITE_MAX) {
1573         count = _PY_WRITE_MAX;
1574     }
1575 
1576     if (gil_held) {
1577         do {
1578             Py_BEGIN_ALLOW_THREADS
1579             errno = 0;
1580 #ifdef MS_WINDOWS
1581             n = write(fd, buf, (int)count);
1582 #else
1583             n = write(fd, buf, count);
1584 #endif
1585             /* save/restore errno because PyErr_CheckSignals()
1586              * and PyErr_SetFromErrno() can modify it */
1587             err = errno;
1588             Py_END_ALLOW_THREADS
1589         } while (n < 0 && err == EINTR &&
1590                 !(async_err = PyErr_CheckSignals()));
1591     }
1592     else {
1593         do {
1594             errno = 0;
1595 #ifdef MS_WINDOWS
1596             n = write(fd, buf, (int)count);
1597 #else
1598             n = write(fd, buf, count);
1599 #endif
1600             err = errno;
1601         } while (n < 0 && err == EINTR);
1602     }
1603     _Py_END_SUPPRESS_IPH
1604 
1605     if (async_err) {
1606         /* write() was interrupted by a signal (failed with EINTR)
1607            and the Python signal handler raised an exception (if gil_held is
1608            nonzero). */
1609         errno = err;
1610         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1611         return -1;
1612     }
1613     if (n < 0) {
1614         if (gil_held)
1615             PyErr_SetFromErrno(PyExc_OSError);
1616         errno = err;
1617         return -1;
1618     }
1619 
1620     return n;
1621 }
1622 
1623 /* Write count bytes of buf into fd.
1624 
1625    On success, return the number of written bytes, it can be lower than count
1626    including 0. On error, raise an exception, set errno and return -1.
1627 
1628    When interrupted by a signal (write() fails with EINTR), retry the syscall.
1629    If the Python signal handler raises an exception, the function returns -1
1630    (the syscall is not retried).
1631 
1632    Release the GIL to call write(). The caller must hold the GIL. */
1633 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1634 _Py_write(int fd, const void *buf, size_t count)
1635 {
1636     assert(PyGILState_Check());
1637 
1638     /* _Py_write() must not be called with an exception set, otherwise the
1639      * caller may think that write() was interrupted by a signal and the signal
1640      * handler raised an exception. */
1641     assert(!PyErr_Occurred());
1642 
1643     return _Py_write_impl(fd, buf, count, 1);
1644 }
1645 
1646 /* Write count bytes of buf into fd.
1647  *
1648  * On success, return the number of written bytes, it can be lower than count
1649  * including 0. On error, set errno and return -1.
1650  *
1651  * When interrupted by a signal (write() fails with EINTR), retry the syscall
1652  * without calling the Python signal handler. */
1653 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1654 _Py_write_noraise(int fd, const void *buf, size_t count)
1655 {
1656     return _Py_write_impl(fd, buf, count, 0);
1657 }
1658 
1659 #ifdef HAVE_READLINK
1660 
1661 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1662    the result from the locale encoding.
1663 
1664    Return -1 on encoding error, on readlink() error, if the internal buffer is
1665    too short, on decoding error, or if 'buf' is too short. */
1666 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1667 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1668 {
1669     char *cpath;
1670     char cbuf[MAXPATHLEN];
1671     wchar_t *wbuf;
1672     int res;
1673     size_t r1;
1674 
1675     cpath = _Py_EncodeLocaleRaw(path, NULL);
1676     if (cpath == NULL) {
1677         errno = EINVAL;
1678         return -1;
1679     }
1680     res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
1681     PyMem_RawFree(cpath);
1682     if (res == -1)
1683         return -1;
1684     if (res == Py_ARRAY_LENGTH(cbuf)) {
1685         errno = EINVAL;
1686         return -1;
1687     }
1688     cbuf[res] = '\0'; /* buf will be null terminated */
1689     wbuf = Py_DecodeLocale(cbuf, &r1);
1690     if (wbuf == NULL) {
1691         errno = EINVAL;
1692         return -1;
1693     }
1694     /* wbuf must have space to store the trailing NUL character */
1695     if (buflen <= r1) {
1696         PyMem_RawFree(wbuf);
1697         errno = EINVAL;
1698         return -1;
1699     }
1700     wcsncpy(buf, wbuf, buflen);
1701     PyMem_RawFree(wbuf);
1702     return (int)r1;
1703 }
1704 #endif
1705 
1706 #ifdef HAVE_REALPATH
1707 
1708 /* Return the canonicalized absolute pathname. Encode path to the locale
1709    encoding, decode the result from the locale encoding.
1710 
1711    Return NULL on encoding error, realpath() error, decoding error
1712    or if 'resolved_path' is too short. */
1713 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1714 _Py_wrealpath(const wchar_t *path,
1715               wchar_t *resolved_path, size_t resolved_path_len)
1716 {
1717     char *cpath;
1718     char cresolved_path[MAXPATHLEN];
1719     wchar_t *wresolved_path;
1720     char *res;
1721     size_t r;
1722     cpath = _Py_EncodeLocaleRaw(path, NULL);
1723     if (cpath == NULL) {
1724         errno = EINVAL;
1725         return NULL;
1726     }
1727     res = realpath(cpath, cresolved_path);
1728     PyMem_RawFree(cpath);
1729     if (res == NULL)
1730         return NULL;
1731 
1732     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1733     if (wresolved_path == NULL) {
1734         errno = EINVAL;
1735         return NULL;
1736     }
1737     /* wresolved_path must have space to store the trailing NUL character */
1738     if (resolved_path_len <= r) {
1739         PyMem_RawFree(wresolved_path);
1740         errno = EINVAL;
1741         return NULL;
1742     }
1743     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1744     PyMem_RawFree(wresolved_path);
1745     return resolved_path;
1746 }
1747 #endif
1748 
1749 /* Get the current directory. buflen is the buffer size in wide characters
1750    including the null character. Decode the path from the locale encoding.
1751 
1752    Return NULL on getcwd() error, on decoding error, or if 'buf' is
1753    too short. */
1754 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)1755 _Py_wgetcwd(wchar_t *buf, size_t buflen)
1756 {
1757 #ifdef MS_WINDOWS
1758     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1759     return _wgetcwd(buf, ibuflen);
1760 #else
1761     char fname[MAXPATHLEN];
1762     wchar_t *wname;
1763     size_t len;
1764 
1765     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
1766         return NULL;
1767     wname = Py_DecodeLocale(fname, &len);
1768     if (wname == NULL)
1769         return NULL;
1770     /* wname must have space to store the trailing NUL character */
1771     if (buflen <= len) {
1772         PyMem_RawFree(wname);
1773         return NULL;
1774     }
1775     wcsncpy(buf, wname, buflen);
1776     PyMem_RawFree(wname);
1777     return buf;
1778 #endif
1779 }
1780 
1781 /* Duplicate a file descriptor. The new file descriptor is created as
1782    non-inheritable. Return a new file descriptor on success, raise an OSError
1783    exception and return -1 on error.
1784 
1785    The GIL is released to call dup(). The caller must hold the GIL. */
1786 int
_Py_dup(int fd)1787 _Py_dup(int fd)
1788 {
1789 #ifdef MS_WINDOWS
1790     HANDLE handle;
1791 #endif
1792 
1793     assert(PyGILState_Check());
1794 
1795 #ifdef MS_WINDOWS
1796     _Py_BEGIN_SUPPRESS_IPH
1797     handle = (HANDLE)_get_osfhandle(fd);
1798     _Py_END_SUPPRESS_IPH
1799     if (handle == INVALID_HANDLE_VALUE) {
1800         PyErr_SetFromErrno(PyExc_OSError);
1801         return -1;
1802     }
1803 
1804     Py_BEGIN_ALLOW_THREADS
1805     _Py_BEGIN_SUPPRESS_IPH
1806     fd = dup(fd);
1807     _Py_END_SUPPRESS_IPH
1808     Py_END_ALLOW_THREADS
1809     if (fd < 0) {
1810         PyErr_SetFromErrno(PyExc_OSError);
1811         return -1;
1812     }
1813 
1814     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1815         _Py_BEGIN_SUPPRESS_IPH
1816         close(fd);
1817         _Py_END_SUPPRESS_IPH
1818         return -1;
1819     }
1820 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1821     Py_BEGIN_ALLOW_THREADS
1822     _Py_BEGIN_SUPPRESS_IPH
1823     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1824     _Py_END_SUPPRESS_IPH
1825     Py_END_ALLOW_THREADS
1826     if (fd < 0) {
1827         PyErr_SetFromErrno(PyExc_OSError);
1828         return -1;
1829     }
1830 
1831 #else
1832     Py_BEGIN_ALLOW_THREADS
1833     _Py_BEGIN_SUPPRESS_IPH
1834     fd = dup(fd);
1835     _Py_END_SUPPRESS_IPH
1836     Py_END_ALLOW_THREADS
1837     if (fd < 0) {
1838         PyErr_SetFromErrno(PyExc_OSError);
1839         return -1;
1840     }
1841 
1842     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1843         _Py_BEGIN_SUPPRESS_IPH
1844         close(fd);
1845         _Py_END_SUPPRESS_IPH
1846         return -1;
1847     }
1848 #endif
1849     return fd;
1850 }
1851 
1852 #ifndef MS_WINDOWS
1853 /* Get the blocking mode of the file descriptor.
1854    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1855    raise an exception and return -1 on error. */
1856 int
_Py_get_blocking(int fd)1857 _Py_get_blocking(int fd)
1858 {
1859     int flags;
1860     _Py_BEGIN_SUPPRESS_IPH
1861     flags = fcntl(fd, F_GETFL, 0);
1862     _Py_END_SUPPRESS_IPH
1863     if (flags < 0) {
1864         PyErr_SetFromErrno(PyExc_OSError);
1865         return -1;
1866     }
1867 
1868     return !(flags & O_NONBLOCK);
1869 }
1870 
1871 /* Set the blocking mode of the specified file descriptor.
1872 
1873    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1874    otherwise.
1875 
1876    Return 0 on success, raise an exception and return -1 on error. */
1877 int
_Py_set_blocking(int fd,int blocking)1878 _Py_set_blocking(int fd, int blocking)
1879 {
1880 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1881     int arg = !blocking;
1882     if (ioctl(fd, FIONBIO, &arg) < 0)
1883         goto error;
1884 #else
1885     int flags, res;
1886 
1887     _Py_BEGIN_SUPPRESS_IPH
1888     flags = fcntl(fd, F_GETFL, 0);
1889     if (flags >= 0) {
1890         if (blocking)
1891             flags = flags & (~O_NONBLOCK);
1892         else
1893             flags = flags | O_NONBLOCK;
1894 
1895         res = fcntl(fd, F_SETFL, flags);
1896     } else {
1897         res = -1;
1898     }
1899     _Py_END_SUPPRESS_IPH
1900 
1901     if (res < 0)
1902         goto error;
1903 #endif
1904     return 0;
1905 
1906 error:
1907     PyErr_SetFromErrno(PyExc_OSError);
1908     return -1;
1909 }
1910 #endif
1911 
1912 
1913 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)1914 _Py_GetLocaleconvNumeric(struct lconv *lc,
1915                          PyObject **decimal_point, PyObject **thousands_sep)
1916 {
1917     assert(decimal_point != NULL);
1918     assert(thousands_sep != NULL);
1919 
1920     int change_locale = 0;
1921     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
1922         change_locale = 1;
1923     }
1924     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
1925         change_locale = 1;
1926     }
1927 
1928     /* Keep a copy of the LC_CTYPE locale */
1929     char *oldloc = NULL, *loc = NULL;
1930     if (change_locale) {
1931         oldloc = setlocale(LC_CTYPE, NULL);
1932         if (!oldloc) {
1933             PyErr_SetString(PyExc_RuntimeWarning,
1934                             "failed to get LC_CTYPE locale");
1935             return -1;
1936         }
1937 
1938         oldloc = _PyMem_Strdup(oldloc);
1939         if (!oldloc) {
1940             PyErr_NoMemory();
1941             return -1;
1942         }
1943 
1944         loc = setlocale(LC_NUMERIC, NULL);
1945         if (loc != NULL && strcmp(loc, oldloc) == 0) {
1946             loc = NULL;
1947         }
1948 
1949         if (loc != NULL) {
1950             /* Only set the locale temporarily the LC_CTYPE locale
1951                if LC_NUMERIC locale is different than LC_CTYPE locale and
1952                decimal_point and/or thousands_sep are non-ASCII or longer than
1953                1 byte */
1954             setlocale(LC_CTYPE, loc);
1955         }
1956     }
1957 
1958     int res = -1;
1959 
1960     *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1961     if (*decimal_point == NULL) {
1962         goto done;
1963     }
1964 
1965     *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1966     if (*thousands_sep == NULL) {
1967         goto done;
1968     }
1969 
1970     res = 0;
1971 
1972 done:
1973     if (loc != NULL) {
1974         setlocale(LC_CTYPE, oldloc);
1975     }
1976     PyMem_Free(oldloc);
1977     return res;
1978 }
1979