• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_fileutils.h"
3 #include "osdefs.h"
4 #include <locale.h>
5 
6 #ifdef MS_WINDOWS
7 #  include <malloc.h>
8 #  include <windows.h>
9 extern int winerror_to_errno(int);
10 #endif
11 
12 #ifdef HAVE_LANGINFO_H
13 #include <langinfo.h>
14 #endif
15 
16 #ifdef HAVE_SYS_IOCTL_H
17 #include <sys/ioctl.h>
18 #endif
19 
20 #ifdef HAVE_FCNTL_H
21 #include <fcntl.h>
22 #endif /* HAVE_FCNTL_H */
23 
24 #ifdef O_CLOEXEC
25 /* Does open() support the O_CLOEXEC flag? Possible values:
26 
27    -1: unknown
28     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
29     1: open() supports O_CLOEXEC flag, close-on-exec is set
30 
31    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
32    and os.open(). */
33 int _Py_open_cloexec_works = -1;
34 #endif
35 
36 
37 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)38 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
39 {
40     switch (errors)
41     {
42     case _Py_ERROR_STRICT:
43         *surrogateescape = 0;
44         return 0;
45     case _Py_ERROR_SURROGATEESCAPE:
46         *surrogateescape = 1;
47         return 0;
48     default:
49         return -1;
50     }
51 }
52 
53 
54 PyObject *
_Py_device_encoding(int fd)55 _Py_device_encoding(int fd)
56 {
57 #if defined(MS_WINDOWS)
58     UINT cp;
59 #endif
60     int valid;
61     _Py_BEGIN_SUPPRESS_IPH
62     valid = isatty(fd);
63     _Py_END_SUPPRESS_IPH
64     if (!valid)
65         Py_RETURN_NONE;
66 
67 #if defined(MS_WINDOWS)
68     if (fd == 0)
69         cp = GetConsoleCP();
70     else if (fd == 1 || fd == 2)
71         cp = GetConsoleOutputCP();
72     else
73         cp = 0;
74     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
75        has no console */
76     if (cp != 0)
77         return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
78 #elif defined(CODESET)
79     {
80         char *codeset = nl_langinfo(CODESET);
81         if (codeset != NULL && codeset[0] != 0)
82             return PyUnicode_FromString(codeset);
83     }
84 #endif
85     Py_RETURN_NONE;
86 }
87 
88 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
89 
90 #define USE_FORCE_ASCII
91 
92 extern int _Py_normalize_encoding(const char *, char *, size_t);
93 
94 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
95    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
96    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
97    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
98    locale.getpreferredencoding() codec. For example, if command line arguments
99    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
100    UnicodeEncodeError instead of retrieving the original byte string.
101 
102    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
103    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
104    one byte in range 0x80-0xff can be decoded from the locale encoding. The
105    workaround is also enabled on error, for example if getting the locale
106    failed.
107 
108    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
109    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
110    ASCII encoding in this case.
111 
112    Values of force_ascii:
113 
114        1: the workaround is used: Py_EncodeLocale() uses
115           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
116           decode_ascii()
117        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
118           Py_DecodeLocale() uses mbstowcs()
119       -1: unknown, need to call check_force_ascii() to get the value
120 */
121 static int force_ascii = -1;
122 
123 static int
check_force_ascii(void)124 check_force_ascii(void)
125 {
126     char *loc = setlocale(LC_CTYPE, NULL);
127     if (loc == NULL) {
128         goto error;
129     }
130     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
131         /* the LC_CTYPE locale is different than C and POSIX */
132         return 0;
133     }
134 
135 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
136     const char *codeset = nl_langinfo(CODESET);
137     if (!codeset || codeset[0] == '\0') {
138         /* CODESET is not set or empty */
139         goto error;
140     }
141 
142     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
143     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
144         goto error;
145     }
146 
147 #ifdef __hpux
148     if (strcmp(encoding, "roman8") == 0) {
149         unsigned char ch;
150         wchar_t wch;
151         size_t res;
152 
153         ch = (unsigned char)0xA7;
154         res = mbstowcs(&wch, (char*)&ch, 1);
155         if (res != (size_t)-1 && wch == L'\xA7') {
156             /* On HP-UX withe C locale or the POSIX locale,
157                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
158                Latin1 encoding in practice. Force ASCII in this case.
159 
160                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
161             return 1;
162         }
163     }
164 #else
165     const char* ascii_aliases[] = {
166         "ascii",
167         /* Aliases from Lib/encodings/aliases.py */
168         "646",
169         "ansi_x3.4_1968",
170         "ansi_x3.4_1986",
171         "ansi_x3_4_1968",
172         "cp367",
173         "csascii",
174         "ibm367",
175         "iso646_us",
176         "iso_646.irv_1991",
177         "iso_ir_6",
178         "us",
179         "us_ascii",
180         NULL
181     };
182 
183     int is_ascii = 0;
184     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
185         if (strcmp(encoding, *alias) == 0) {
186             is_ascii = 1;
187             break;
188         }
189     }
190     if (!is_ascii) {
191         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
192         return 0;
193     }
194 
195     for (unsigned int i=0x80; i<=0xff; i++) {
196         char ch[1];
197         wchar_t wch[1];
198         size_t res;
199 
200         unsigned uch = (unsigned char)i;
201         ch[0] = (char)uch;
202         res = mbstowcs(wch, ch, 1);
203         if (res != (size_t)-1) {
204             /* decoding a non-ASCII character from the locale encoding succeed:
205                the locale encoding is not ASCII, force ASCII */
206             return 1;
207         }
208     }
209     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
210        encoding: the locale encoding is really ASCII */
211 #endif   /* !defined(__hpux) */
212     return 0;
213 #else
214     /* nl_langinfo(CODESET) is not available: always force ASCII */
215     return 1;
216 #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
217 
218 error:
219     /* if an error occurred, force the ASCII encoding */
220     return 1;
221 }
222 
223 
224 int
_Py_GetForceASCII(void)225 _Py_GetForceASCII(void)
226 {
227     if (force_ascii == -1) {
228         force_ascii = check_force_ascii();
229     }
230     return force_ascii;
231 }
232 
233 
234 void
_Py_ResetForceASCII(void)235 _Py_ResetForceASCII(void)
236 {
237     force_ascii = -1;
238 }
239 
240 
241 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)242 encode_ascii(const wchar_t *text, char **str,
243              size_t *error_pos, const char **reason,
244              int raw_malloc, _Py_error_handler errors)
245 {
246     char *result = NULL, *out;
247     size_t len, i;
248     wchar_t ch;
249 
250     int surrogateescape;
251     if (get_surrogateescape(errors, &surrogateescape) < 0) {
252         return -3;
253     }
254 
255     len = wcslen(text);
256 
257     /* +1 for NULL byte */
258     if (raw_malloc) {
259         result = PyMem_RawMalloc(len + 1);
260     }
261     else {
262         result = PyMem_Malloc(len + 1);
263     }
264     if (result == NULL) {
265         return -1;
266     }
267 
268     out = result;
269     for (i=0; i<len; i++) {
270         ch = text[i];
271 
272         if (ch <= 0x7f) {
273             /* ASCII character */
274             *out++ = (char)ch;
275         }
276         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
277             /* UTF-8b surrogate */
278             *out++ = (char)(ch - 0xdc00);
279         }
280         else {
281             if (raw_malloc) {
282                 PyMem_RawFree(result);
283             }
284             else {
285                 PyMem_Free(result);
286             }
287             if (error_pos != NULL) {
288                 *error_pos = i;
289             }
290             if (reason) {
291                 *reason = "encoding error";
292             }
293             return -2;
294         }
295     }
296     *out = '\0';
297     *str = result;
298     return 0;
299 }
300 #else
301 int
_Py_GetForceASCII(void)302 _Py_GetForceASCII(void)
303 {
304     return 0;
305 }
306 
307 void
_Py_ResetForceASCII(void)308 _Py_ResetForceASCII(void)
309 {
310     /* nothing to do */
311 }
312 #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
313 
314 
315 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
316 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)317 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
318              const char **reason, _Py_error_handler errors)
319 {
320     wchar_t *res;
321     unsigned char *in;
322     wchar_t *out;
323     size_t argsize = strlen(arg) + 1;
324 
325     int surrogateescape;
326     if (get_surrogateescape(errors, &surrogateescape) < 0) {
327         return -3;
328     }
329 
330     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
331         return -1;
332     }
333     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
334     if (!res) {
335         return -1;
336     }
337 
338     out = res;
339     for (in = (unsigned char*)arg; *in; in++) {
340         unsigned char ch = *in;
341         if (ch < 128) {
342             *out++ = ch;
343         }
344         else {
345             if (!surrogateescape) {
346                 PyMem_RawFree(res);
347                 if (wlen) {
348                     *wlen = in - (unsigned char*)arg;
349                 }
350                 if (reason) {
351                     *reason = "decoding error";
352                 }
353                 return -2;
354             }
355             *out++ = 0xdc00 + ch;
356         }
357     }
358     *out = 0;
359 
360     if (wlen != NULL) {
361         *wlen = out - res;
362     }
363     *wstr = res;
364     return 0;
365 }
366 #endif   /* !HAVE_MBRTOWC */
367 
368 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)369 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
370                       const char **reason, _Py_error_handler errors)
371 {
372     wchar_t *res;
373     size_t argsize;
374     size_t count;
375 #ifdef HAVE_MBRTOWC
376     unsigned char *in;
377     wchar_t *out;
378     mbstate_t mbs;
379 #endif
380 
381     int surrogateescape;
382     if (get_surrogateescape(errors, &surrogateescape) < 0) {
383         return -3;
384     }
385 
386 #ifdef HAVE_BROKEN_MBSTOWCS
387     /* Some platforms have a broken implementation of
388      * mbstowcs which does not count the characters that
389      * would result from conversion.  Use an upper bound.
390      */
391     argsize = strlen(arg);
392 #else
393     argsize = mbstowcs(NULL, arg, 0);
394 #endif
395     if (argsize != (size_t)-1) {
396         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
397             return -1;
398         }
399         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
400         if (!res) {
401             return -1;
402         }
403 
404         count = mbstowcs(res, arg, argsize + 1);
405         if (count != (size_t)-1) {
406             wchar_t *tmp;
407             /* Only use the result if it contains no
408                surrogate characters. */
409             for (tmp = res; *tmp != 0 &&
410                          !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
411                 ;
412             if (*tmp == 0) {
413                 if (wlen != NULL) {
414                     *wlen = count;
415                 }
416                 *wstr = res;
417                 return 0;
418             }
419         }
420         PyMem_RawFree(res);
421     }
422 
423     /* Conversion failed. Fall back to escaping with surrogateescape. */
424 #ifdef HAVE_MBRTOWC
425     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
426 
427     /* Overallocate; as multi-byte characters are in the argument, the
428        actual output could use less memory. */
429     argsize = strlen(arg) + 1;
430     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
431         return -1;
432     }
433     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
434     if (!res) {
435         return -1;
436     }
437 
438     in = (unsigned char*)arg;
439     out = res;
440     memset(&mbs, 0, sizeof mbs);
441     while (argsize) {
442         size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
443         if (converted == 0) {
444             /* Reached end of string; null char stored. */
445             break;
446         }
447 
448         if (converted == (size_t)-2) {
449             /* Incomplete character. This should never happen,
450                since we provide everything that we have -
451                unless there is a bug in the C library, or I
452                misunderstood how mbrtowc works. */
453             goto decode_error;
454         }
455 
456         if (converted == (size_t)-1) {
457             if (!surrogateescape) {
458                 goto decode_error;
459             }
460 
461             /* Conversion error. Escape as UTF-8b, and start over
462                in the initial shift state. */
463             *out++ = 0xdc00 + *in++;
464             argsize--;
465             memset(&mbs, 0, sizeof mbs);
466             continue;
467         }
468 
469         if (Py_UNICODE_IS_SURROGATE(*out)) {
470             if (!surrogateescape) {
471                 goto decode_error;
472             }
473 
474             /* Surrogate character.  Escape the original
475                byte sequence with surrogateescape. */
476             argsize -= converted;
477             while (converted--) {
478                 *out++ = 0xdc00 + *in++;
479             }
480             continue;
481         }
482         /* successfully converted some bytes */
483         in += converted;
484         argsize -= converted;
485         out++;
486     }
487     if (wlen != NULL) {
488         *wlen = out - res;
489     }
490     *wstr = res;
491     return 0;
492 
493 decode_error:
494     PyMem_RawFree(res);
495     if (wlen) {
496         *wlen = in - (unsigned char*)arg;
497     }
498     if (reason) {
499         *reason = "decoding error";
500     }
501     return -2;
502 #else   /* HAVE_MBRTOWC */
503     /* Cannot use C locale for escaping; manually escape as if charset
504        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
505        correctly in the locale's charset, which must be an ASCII superset. */
506     return decode_ascii(arg, wstr, wlen, reason, errors);
507 #endif   /* HAVE_MBRTOWC */
508 }
509 
510 
511 /* Decode a byte string from the locale encoding.
512 
513    Use the strict error handler if 'surrogateescape' is zero.  Use the
514    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
515    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
516    can be decoded as a surrogate character, escape the bytes using the
517    surrogateescape error handler instead of decoding them.
518 
519    On success, return 0 and write the newly allocated wide character string into
520    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
521    the number of wide characters excluding the null character into *wlen.
522 
523    On memory allocation failure, return -1.
524 
525    On decoding error, return -2. If wlen is not NULL, write the start of
526    invalid byte sequence in the input string into *wlen. If reason is not NULL,
527    write the decoding error message into *reason.
528 
529    Return -3 if the error handler 'errors' is not supported.
530 
531    Use the Py_EncodeLocaleEx() function to encode the character string back to
532    a byte string. */
533 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)534 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
535                    const char **reason,
536                    int current_locale, _Py_error_handler errors)
537 {
538     if (current_locale) {
539 #ifdef _Py_FORCE_UTF8_LOCALE
540         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
541                                 errors);
542 #else
543         return decode_current_locale(arg, wstr, wlen, reason, errors);
544 #endif
545     }
546 
547 #ifdef _Py_FORCE_UTF8_FS_ENCODING
548     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
549                             errors);
550 #else
551     int use_utf8 = (Py_UTF8Mode == 1);
552 #ifdef MS_WINDOWS
553     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
554 #endif
555     if (use_utf8) {
556         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
557                                 errors);
558     }
559 
560 #ifdef USE_FORCE_ASCII
561     if (force_ascii == -1) {
562         force_ascii = check_force_ascii();
563     }
564 
565     if (force_ascii) {
566         /* force ASCII encoding to workaround mbstowcs() issue */
567         return decode_ascii(arg, wstr, wlen, reason, errors);
568     }
569 #endif
570 
571     return decode_current_locale(arg, wstr, wlen, reason, errors);
572 #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
573 }
574 
575 
576 /* Decode a byte string from the locale encoding with the
577    surrogateescape error handler: undecodable bytes are decoded as characters
578    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
579    character, escape the bytes using the surrogateescape error handler instead
580    of decoding them.
581 
582    Return a pointer to a newly allocated wide character string, use
583    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
584    wide characters excluding the null character into *size
585 
586    Return NULL on decoding error or memory allocation error. If *size* is not
587    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
588    decoding error.
589 
590    Decoding errors should never happen, unless there is a bug in the C
591    library.
592 
593    Use the Py_EncodeLocale() function to encode the character string back to a
594    byte string. */
595 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)596 Py_DecodeLocale(const char* arg, size_t *wlen)
597 {
598     wchar_t *wstr;
599     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
600                                  NULL, 0,
601                                  _Py_ERROR_SURROGATEESCAPE);
602     if (res != 0) {
603         assert(res != -3);
604         if (wlen != NULL) {
605             *wlen = (size_t)res;
606         }
607         return NULL;
608     }
609     return wstr;
610 }
611 
612 
613 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)614 encode_current_locale(const wchar_t *text, char **str,
615                       size_t *error_pos, const char **reason,
616                       int raw_malloc, _Py_error_handler errors)
617 {
618     const size_t len = wcslen(text);
619     char *result = NULL, *bytes = NULL;
620     size_t i, size, converted;
621     wchar_t c, buf[2];
622 
623     int surrogateescape;
624     if (get_surrogateescape(errors, &surrogateescape) < 0) {
625         return -3;
626     }
627 
628     /* The function works in two steps:
629        1. compute the length of the output buffer in bytes (size)
630        2. outputs the bytes */
631     size = 0;
632     buf[1] = 0;
633     while (1) {
634         for (i=0; i < len; i++) {
635             c = text[i];
636             if (c >= 0xdc80 && c <= 0xdcff) {
637                 if (!surrogateescape) {
638                     goto encode_error;
639                 }
640                 /* UTF-8b surrogate */
641                 if (bytes != NULL) {
642                     *bytes++ = c - 0xdc00;
643                     size--;
644                 }
645                 else {
646                     size++;
647                 }
648                 continue;
649             }
650             else {
651                 buf[0] = c;
652                 if (bytes != NULL) {
653                     converted = wcstombs(bytes, buf, size);
654                 }
655                 else {
656                     converted = wcstombs(NULL, buf, 0);
657                 }
658                 if (converted == (size_t)-1) {
659                     goto encode_error;
660                 }
661                 if (bytes != NULL) {
662                     bytes += converted;
663                     size -= converted;
664                 }
665                 else {
666                     size += converted;
667                 }
668             }
669         }
670         if (result != NULL) {
671             *bytes = '\0';
672             break;
673         }
674 
675         size += 1; /* nul byte at the end */
676         if (raw_malloc) {
677             result = PyMem_RawMalloc(size);
678         }
679         else {
680             result = PyMem_Malloc(size);
681         }
682         if (result == NULL) {
683             return -1;
684         }
685         bytes = result;
686     }
687     *str = result;
688     return 0;
689 
690 encode_error:
691     if (raw_malloc) {
692         PyMem_RawFree(result);
693     }
694     else {
695         PyMem_Free(result);
696     }
697     if (error_pos != NULL) {
698         *error_pos = i;
699     }
700     if (reason) {
701         *reason = "encoding error";
702     }
703     return -2;
704 }
705 
706 
707 /* Encode a string to the locale encoding.
708 
709    Parameters:
710 
711    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
712      of PyMem_Malloc().
713    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
714      Python filesystem encoding.
715    * errors: error handler like "strict" or "surrogateescape".
716 
717    Return value:
718 
719     0: success, *str is set to a newly allocated decoded string.
720    -1: memory allocation failure
721    -2: encoding error, set *error_pos and *reason (if set).
722    -3: the error handler 'errors' is not supported.
723  */
724 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)725 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
726                  const char **reason,
727                  int raw_malloc, int current_locale, _Py_error_handler errors)
728 {
729     if (current_locale) {
730 #ifdef _Py_FORCE_UTF8_LOCALE
731         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
732                                 raw_malloc, errors);
733 #else
734         return encode_current_locale(text, str, error_pos, reason,
735                                      raw_malloc, errors);
736 #endif
737     }
738 
739 #ifdef _Py_FORCE_UTF8_FS_ENCODING
740     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
741                             raw_malloc, errors);
742 #else
743     int use_utf8 = (Py_UTF8Mode == 1);
744 #ifdef MS_WINDOWS
745     use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
746 #endif
747     if (use_utf8) {
748         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
749                                 raw_malloc, errors);
750     }
751 
752 #ifdef USE_FORCE_ASCII
753     if (force_ascii == -1) {
754         force_ascii = check_force_ascii();
755     }
756 
757     if (force_ascii) {
758         return encode_ascii(text, str, error_pos, reason,
759                             raw_malloc, errors);
760     }
761 #endif
762 
763     return encode_current_locale(text, str, error_pos, reason,
764                                  raw_malloc, errors);
765 #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
766 }
767 
768 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)769 encode_locale(const wchar_t *text, size_t *error_pos,
770               int raw_malloc, int current_locale)
771 {
772     char *str;
773     int res = encode_locale_ex(text, &str, error_pos, NULL,
774                                raw_malloc, current_locale,
775                                _Py_ERROR_SURROGATEESCAPE);
776     if (res != -2 && error_pos) {
777         *error_pos = (size_t)-1;
778     }
779     if (res != 0) {
780         return NULL;
781     }
782     return str;
783 }
784 
785 /* Encode a wide character string to the locale encoding with the
786    surrogateescape error handler: surrogate characters in the range
787    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
788 
789    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
790    the memory. Return NULL on encoding or memory allocation error.
791 
792    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
793    to the index of the invalid character on encoding error.
794 
795    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
796    character string. */
797 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)798 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
799 {
800     return encode_locale(text, error_pos, 0, 0);
801 }
802 
803 
804 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
805    instead of PyMem_Free(). */
806 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)807 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
808 {
809     return encode_locale(text, error_pos, 1, 0);
810 }
811 
812 
813 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)814 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
815                    size_t *error_pos, const char **reason,
816                    int current_locale, _Py_error_handler errors)
817 {
818     return encode_locale_ex(text, str, error_pos, reason, 1,
819                             current_locale, errors);
820 }
821 
822 
823 #ifdef MS_WINDOWS
824 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
825 
826 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)827 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
828 {
829     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
830     /* Cannot simply cast and dereference in_ptr,
831        since it might not be aligned properly */
832     __int64 in;
833     memcpy(&in, in_ptr, sizeof(in));
834     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
835     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
836 }
837 
838 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)839 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
840 {
841     /* XXX endianness */
842     __int64 out;
843     out = time_in + secs_between_epochs;
844     out = out * 10000000 + nsec_in / 100;
845     memcpy(out_ptr, &out, sizeof(out));
846 }
847 
848 /* Below, we *know* that ugo+r is 0444 */
849 #if _S_IREAD != 0400
850 #error Unsupported C library
851 #endif
852 static int
attributes_to_mode(DWORD attr)853 attributes_to_mode(DWORD attr)
854 {
855     int m = 0;
856     if (attr & FILE_ATTRIBUTE_DIRECTORY)
857         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
858     else
859         m |= _S_IFREG;
860     if (attr & FILE_ATTRIBUTE_READONLY)
861         m |= 0444;
862     else
863         m |= 0666;
864     return m;
865 }
866 
867 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,struct _Py_stat_struct * result)868 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
869                            struct _Py_stat_struct *result)
870 {
871     memset(result, 0, sizeof(*result));
872     result->st_mode = attributes_to_mode(info->dwFileAttributes);
873     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
874     result->st_dev = info->dwVolumeSerialNumber;
875     result->st_rdev = result->st_dev;
876     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
877     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
878     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
879     result->st_nlink = info->nNumberOfLinks;
880     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
881     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
882        open other name surrogate reparse points without traversing them. To
883        detect/handle these, check st_file_attributes and st_reparse_tag. */
884     result->st_reparse_tag = reparse_tag;
885     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
886         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
887         /* first clear the S_IFMT bits */
888         result->st_mode ^= (result->st_mode & S_IFMT);
889         /* now set the bits that make this a symlink */
890         result->st_mode |= S_IFLNK;
891     }
892     result->st_file_attributes = info->dwFileAttributes;
893 }
894 #endif
895 
896 /* Return information about a file.
897 
898    On POSIX, use fstat().
899 
900    On Windows, use GetFileType() and GetFileInformationByHandle() which support
901    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
902    than 2 GiB because the file size type is a signed 32-bit integer: see issue
903    #23152.
904 
905    On Windows, set the last Windows error and return nonzero on error. On
906    POSIX, set errno and return nonzero on error. Fill status and return 0 on
907    success. */
908 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)909 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
910 {
911 #ifdef MS_WINDOWS
912     BY_HANDLE_FILE_INFORMATION info;
913     HANDLE h;
914     int type;
915 
916     _Py_BEGIN_SUPPRESS_IPH
917     h = (HANDLE)_get_osfhandle(fd);
918     _Py_END_SUPPRESS_IPH
919 
920     if (h == INVALID_HANDLE_VALUE) {
921         /* errno is already set by _get_osfhandle, but we also set
922            the Win32 error for callers who expect that */
923         SetLastError(ERROR_INVALID_HANDLE);
924         return -1;
925     }
926     memset(status, 0, sizeof(*status));
927 
928     type = GetFileType(h);
929     if (type == FILE_TYPE_UNKNOWN) {
930         DWORD error = GetLastError();
931         if (error != 0) {
932             errno = winerror_to_errno(error);
933             return -1;
934         }
935         /* else: valid but unknown file */
936     }
937 
938     if (type != FILE_TYPE_DISK) {
939         if (type == FILE_TYPE_CHAR)
940             status->st_mode = _S_IFCHR;
941         else if (type == FILE_TYPE_PIPE)
942             status->st_mode = _S_IFIFO;
943         return 0;
944     }
945 
946     if (!GetFileInformationByHandle(h, &info)) {
947         /* The Win32 error is already set, but we also set errno for
948            callers who expect it */
949         errno = winerror_to_errno(GetLastError());
950         return -1;
951     }
952 
953     _Py_attribute_data_to_stat(&info, 0, status);
954     /* specific to fstat() */
955     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
956     return 0;
957 #else
958     return fstat(fd, status);
959 #endif
960 }
961 
962 /* Return information about a file.
963 
964    On POSIX, use fstat().
965 
966    On Windows, use GetFileType() and GetFileInformationByHandle() which support
967    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
968    than 2 GiB because the file size type is a signed 32-bit integer: see issue
969    #23152.
970 
971    Raise an exception and return -1 on error. On Windows, set the last Windows
972    error on error. On POSIX, set errno on error. Fill status and return 0 on
973    success.
974 
975    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
976    to call fstat(). The caller must hold the GIL. */
977 int
_Py_fstat(int fd,struct _Py_stat_struct * status)978 _Py_fstat(int fd, struct _Py_stat_struct *status)
979 {
980     int res;
981 
982     assert(PyGILState_Check());
983 
984     Py_BEGIN_ALLOW_THREADS
985     res = _Py_fstat_noraise(fd, status);
986     Py_END_ALLOW_THREADS
987 
988     if (res != 0) {
989 #ifdef MS_WINDOWS
990         PyErr_SetFromWindowsErr(0);
991 #else
992         PyErr_SetFromErrno(PyExc_OSError);
993 #endif
994         return -1;
995     }
996     return 0;
997 }
998 
999 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1000    call stat() otherwise. Only fill st_mode attribute on Windows.
1001 
1002    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1003    raised. */
1004 
1005 int
_Py_stat(PyObject * path,struct stat * statbuf)1006 _Py_stat(PyObject *path, struct stat *statbuf)
1007 {
1008 #ifdef MS_WINDOWS
1009     int err;
1010     struct _stat wstatbuf;
1011     const wchar_t *wpath;
1012 
1013     wpath = _PyUnicode_AsUnicode(path);
1014     if (wpath == NULL)
1015         return -2;
1016 
1017     err = _wstat(wpath, &wstatbuf);
1018     if (!err)
1019         statbuf->st_mode = wstatbuf.st_mode;
1020     return err;
1021 #else
1022     int ret;
1023     PyObject *bytes;
1024     char *cpath;
1025 
1026     bytes = PyUnicode_EncodeFSDefault(path);
1027     if (bytes == NULL)
1028         return -2;
1029 
1030     /* check for embedded null bytes */
1031     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1032         Py_DECREF(bytes);
1033         return -2;
1034     }
1035 
1036     ret = stat(cpath, statbuf);
1037     Py_DECREF(bytes);
1038     return ret;
1039 #endif
1040 }
1041 
1042 
1043 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1044 static int
get_inheritable(int fd,int raise)1045 get_inheritable(int fd, int raise)
1046 {
1047 #ifdef MS_WINDOWS
1048     HANDLE handle;
1049     DWORD flags;
1050 
1051     _Py_BEGIN_SUPPRESS_IPH
1052     handle = (HANDLE)_get_osfhandle(fd);
1053     _Py_END_SUPPRESS_IPH
1054     if (handle == INVALID_HANDLE_VALUE) {
1055         if (raise)
1056             PyErr_SetFromErrno(PyExc_OSError);
1057         return -1;
1058     }
1059 
1060     if (!GetHandleInformation(handle, &flags)) {
1061         if (raise)
1062             PyErr_SetFromWindowsErr(0);
1063         return -1;
1064     }
1065 
1066     return (flags & HANDLE_FLAG_INHERIT);
1067 #else
1068     int flags;
1069 
1070     flags = fcntl(fd, F_GETFD, 0);
1071     if (flags == -1) {
1072         if (raise)
1073             PyErr_SetFromErrno(PyExc_OSError);
1074         return -1;
1075     }
1076     return !(flags & FD_CLOEXEC);
1077 #endif
1078 }
1079 
1080 /* Get the inheritable flag of the specified file descriptor.
1081    Return 1 if the file descriptor can be inherited, 0 if it cannot,
1082    raise an exception and return -1 on error. */
1083 int
_Py_get_inheritable(int fd)1084 _Py_get_inheritable(int fd)
1085 {
1086     return get_inheritable(fd, 1);
1087 }
1088 
1089 
1090 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1091 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1092 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1093 {
1094 #ifdef MS_WINDOWS
1095     HANDLE handle;
1096     DWORD flags;
1097 #else
1098 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1099     static int ioctl_works = -1;
1100     int request;
1101     int err;
1102 #endif
1103     int flags, new_flags;
1104     int res;
1105 #endif
1106 
1107     /* atomic_flag_works can only be used to make the file descriptor
1108        non-inheritable */
1109     assert(!(atomic_flag_works != NULL && inheritable));
1110 
1111     if (atomic_flag_works != NULL && !inheritable) {
1112         if (*atomic_flag_works == -1) {
1113             int isInheritable = get_inheritable(fd, raise);
1114             if (isInheritable == -1)
1115                 return -1;
1116             *atomic_flag_works = !isInheritable;
1117         }
1118 
1119         if (*atomic_flag_works)
1120             return 0;
1121     }
1122 
1123 #ifdef MS_WINDOWS
1124     _Py_BEGIN_SUPPRESS_IPH
1125     handle = (HANDLE)_get_osfhandle(fd);
1126     _Py_END_SUPPRESS_IPH
1127     if (handle == INVALID_HANDLE_VALUE) {
1128         if (raise)
1129             PyErr_SetFromErrno(PyExc_OSError);
1130         return -1;
1131     }
1132 
1133     if (inheritable)
1134         flags = HANDLE_FLAG_INHERIT;
1135     else
1136         flags = 0;
1137 
1138     /* This check can be removed once support for Windows 7 ends. */
1139 #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1140         GetFileType(handle) == FILE_TYPE_CHAR)
1141 
1142     if (!CONSOLE_PSEUDOHANDLE(handle) &&
1143         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1144         if (raise)
1145             PyErr_SetFromWindowsErr(0);
1146         return -1;
1147     }
1148 #undef CONSOLE_PSEUDOHANDLE
1149     return 0;
1150 
1151 #else
1152 
1153 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1154     if (ioctl_works != 0 && raise != 0) {
1155         /* fast-path: ioctl() only requires one syscall */
1156         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1157          * thus avoid using ioctl() so we skip the fast-path. */
1158         if (inheritable)
1159             request = FIONCLEX;
1160         else
1161             request = FIOCLEX;
1162         err = ioctl(fd, request, NULL);
1163         if (!err) {
1164             ioctl_works = 1;
1165             return 0;
1166         }
1167 
1168         if (errno != ENOTTY && errno != EACCES) {
1169             if (raise)
1170                 PyErr_SetFromErrno(PyExc_OSError);
1171             return -1;
1172         }
1173         else {
1174             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1175                device". The ioctl is declared but not supported by the kernel.
1176                Remember that ioctl() doesn't work. It is the case on
1177                Illumos-based OS for example.
1178 
1179                Issue #27057: When SELinux policy disallows ioctl it will fail
1180                with EACCES. While FIOCLEX is safe operation it may be
1181                unavailable because ioctl was denied altogether.
1182                This can be the case on Android. */
1183             ioctl_works = 0;
1184         }
1185         /* fallback to fcntl() if ioctl() does not work */
1186     }
1187 #endif
1188 
1189     /* slow-path: fcntl() requires two syscalls */
1190     flags = fcntl(fd, F_GETFD);
1191     if (flags < 0) {
1192         if (raise)
1193             PyErr_SetFromErrno(PyExc_OSError);
1194         return -1;
1195     }
1196 
1197     if (inheritable) {
1198         new_flags = flags & ~FD_CLOEXEC;
1199     }
1200     else {
1201         new_flags = flags | FD_CLOEXEC;
1202     }
1203 
1204     if (new_flags == flags) {
1205         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1206         return 0;
1207     }
1208 
1209     res = fcntl(fd, F_SETFD, new_flags);
1210     if (res < 0) {
1211         if (raise)
1212             PyErr_SetFromErrno(PyExc_OSError);
1213         return -1;
1214     }
1215     return 0;
1216 #endif
1217 }
1218 
1219 /* Make the file descriptor non-inheritable.
1220    Return 0 on success, set errno and return -1 on error. */
1221 static int
make_non_inheritable(int fd)1222 make_non_inheritable(int fd)
1223 {
1224     return set_inheritable(fd, 0, 0, NULL);
1225 }
1226 
1227 /* Set the inheritable flag of the specified file descriptor.
1228    On success: return 0, on error: raise an exception and return -1.
1229 
1230    If atomic_flag_works is not NULL:
1231 
1232     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1233       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1234       set the inheritable flag
1235     * if *atomic_flag_works==1: do nothing
1236     * if *atomic_flag_works==0: set inheritable flag to False
1237 
1238    Set atomic_flag_works to NULL if no atomic flag was used to create the
1239    file descriptor.
1240 
1241    atomic_flag_works can only be used to make a file descriptor
1242    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1243 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1244 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1245 {
1246     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1247 }
1248 
1249 /* Same as _Py_set_inheritable() but on error, set errno and
1250    don't raise an exception.
1251    This function is async-signal-safe. */
1252 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1253 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1254 {
1255     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1256 }
1257 
1258 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1259 _Py_open_impl(const char *pathname, int flags, int gil_held)
1260 {
1261     int fd;
1262     int async_err = 0;
1263 #ifndef MS_WINDOWS
1264     int *atomic_flag_works;
1265 #endif
1266 
1267 #ifdef MS_WINDOWS
1268     flags |= O_NOINHERIT;
1269 #elif defined(O_CLOEXEC)
1270     atomic_flag_works = &_Py_open_cloexec_works;
1271     flags |= O_CLOEXEC;
1272 #else
1273     atomic_flag_works = NULL;
1274 #endif
1275 
1276     if (gil_held) {
1277         PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1278         if (pathname_obj == NULL) {
1279             return -1;
1280         }
1281         if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1282             Py_DECREF(pathname_obj);
1283             return -1;
1284         }
1285 
1286         do {
1287             Py_BEGIN_ALLOW_THREADS
1288             fd = open(pathname, flags);
1289             Py_END_ALLOW_THREADS
1290         } while (fd < 0
1291                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1292         if (async_err) {
1293             Py_DECREF(pathname_obj);
1294             return -1;
1295         }
1296         if (fd < 0) {
1297             PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1298             Py_DECREF(pathname_obj);
1299             return -1;
1300         }
1301         Py_DECREF(pathname_obj);
1302     }
1303     else {
1304         fd = open(pathname, flags);
1305         if (fd < 0)
1306             return -1;
1307     }
1308 
1309 #ifndef MS_WINDOWS
1310     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1311         close(fd);
1312         return -1;
1313     }
1314 #endif
1315 
1316     return fd;
1317 }
1318 
1319 /* Open a file with the specified flags (wrapper to open() function).
1320    Return a file descriptor on success. Raise an exception and return -1 on
1321    error.
1322 
1323    The file descriptor is created non-inheritable.
1324 
1325    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1326    except if the Python signal handler raises an exception.
1327 
1328    Release the GIL to call open(). The caller must hold the GIL. */
1329 int
_Py_open(const char * pathname,int flags)1330 _Py_open(const char *pathname, int flags)
1331 {
1332     /* _Py_open() must be called with the GIL held. */
1333     assert(PyGILState_Check());
1334     return _Py_open_impl(pathname, flags, 1);
1335 }
1336 
1337 /* Open a file with the specified flags (wrapper to open() function).
1338    Return a file descriptor on success. Set errno and return -1 on error.
1339 
1340    The file descriptor is created non-inheritable.
1341 
1342    If interrupted by a signal, fail with EINTR. */
1343 int
_Py_open_noraise(const char * pathname,int flags)1344 _Py_open_noraise(const char *pathname, int flags)
1345 {
1346     return _Py_open_impl(pathname, flags, 0);
1347 }
1348 
1349 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1350    encoding and use fopen() otherwise.
1351 
1352    The file descriptor is created non-inheritable.
1353 
1354    If interrupted by a signal, fail with EINTR. */
1355 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1356 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1357 {
1358     FILE *f;
1359     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1360         return NULL;
1361     }
1362 #ifndef MS_WINDOWS
1363     char *cpath;
1364     char cmode[10];
1365     size_t r;
1366     r = wcstombs(cmode, mode, 10);
1367     if (r == (size_t)-1 || r >= 10) {
1368         errno = EINVAL;
1369         return NULL;
1370     }
1371     cpath = _Py_EncodeLocaleRaw(path, NULL);
1372     if (cpath == NULL) {
1373         return NULL;
1374     }
1375     f = fopen(cpath, cmode);
1376     PyMem_RawFree(cpath);
1377 #else
1378     f = _wfopen(path, mode);
1379 #endif
1380     if (f == NULL)
1381         return NULL;
1382     if (make_non_inheritable(fileno(f)) < 0) {
1383         fclose(f);
1384         return NULL;
1385     }
1386     return f;
1387 }
1388 
1389 /* Wrapper to fopen().
1390 
1391    The file descriptor is created non-inheritable.
1392 
1393    If interrupted by a signal, fail with EINTR. */
1394 FILE*
_Py_fopen(const char * pathname,const char * mode)1395 _Py_fopen(const char *pathname, const char *mode)
1396 {
1397     PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1398     if (pathname_obj == NULL) {
1399         return NULL;
1400     }
1401     if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1402         Py_DECREF(pathname_obj);
1403         return NULL;
1404     }
1405     Py_DECREF(pathname_obj);
1406 
1407     FILE *f = fopen(pathname, mode);
1408     if (f == NULL)
1409         return NULL;
1410     if (make_non_inheritable(fileno(f)) < 0) {
1411         fclose(f);
1412         return NULL;
1413     }
1414     return f;
1415 }
1416 
1417 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1418    encoding and call fopen() otherwise.
1419 
1420    Return the new file object on success. Raise an exception and return NULL
1421    on error.
1422 
1423    The file descriptor is created non-inheritable.
1424 
1425    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1426    except if the Python signal handler raises an exception.
1427 
1428    Release the GIL to call _wfopen() or fopen(). The caller must hold
1429    the GIL. */
1430 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1431 _Py_fopen_obj(PyObject *path, const char *mode)
1432 {
1433     FILE *f;
1434     int async_err = 0;
1435 #ifdef MS_WINDOWS
1436     const wchar_t *wpath;
1437     wchar_t wmode[10];
1438     int usize;
1439 
1440     assert(PyGILState_Check());
1441 
1442     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1443         return NULL;
1444     }
1445     if (!PyUnicode_Check(path)) {
1446         PyErr_Format(PyExc_TypeError,
1447                      "str file path expected under Windows, got %R",
1448                      Py_TYPE(path));
1449         return NULL;
1450     }
1451     wpath = _PyUnicode_AsUnicode(path);
1452     if (wpath == NULL)
1453         return NULL;
1454 
1455     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1456                                 wmode, Py_ARRAY_LENGTH(wmode));
1457     if (usize == 0) {
1458         PyErr_SetFromWindowsErr(0);
1459         return NULL;
1460     }
1461 
1462     do {
1463         Py_BEGIN_ALLOW_THREADS
1464         f = _wfopen(wpath, wmode);
1465         Py_END_ALLOW_THREADS
1466     } while (f == NULL
1467              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1468 #else
1469     PyObject *bytes;
1470     char *path_bytes;
1471 
1472     assert(PyGILState_Check());
1473 
1474     if (!PyUnicode_FSConverter(path, &bytes))
1475         return NULL;
1476     path_bytes = PyBytes_AS_STRING(bytes);
1477 
1478     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1479         Py_DECREF(bytes);
1480         return NULL;
1481     }
1482 
1483     do {
1484         Py_BEGIN_ALLOW_THREADS
1485         f = fopen(path_bytes, mode);
1486         Py_END_ALLOW_THREADS
1487     } while (f == NULL
1488              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1489 
1490     Py_DECREF(bytes);
1491 #endif
1492     if (async_err)
1493         return NULL;
1494 
1495     if (f == NULL) {
1496         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1497         return NULL;
1498     }
1499 
1500     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1501         fclose(f);
1502         return NULL;
1503     }
1504     return f;
1505 }
1506 
1507 /* Read count bytes from fd into buf.
1508 
1509    On success, return the number of read bytes, it can be lower than count.
1510    If the current file offset is at or past the end of file, no bytes are read,
1511    and read() returns zero.
1512 
1513    On error, raise an exception, set errno and return -1.
1514 
1515    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1516    If the Python signal handler raises an exception, the function returns -1
1517    (the syscall is not retried).
1518 
1519    Release the GIL to call read(). The caller must hold the GIL. */
1520 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1521 _Py_read(int fd, void *buf, size_t count)
1522 {
1523     Py_ssize_t n;
1524     int err;
1525     int async_err = 0;
1526 
1527     assert(PyGILState_Check());
1528 
1529     /* _Py_read() must not be called with an exception set, otherwise the
1530      * caller may think that read() was interrupted by a signal and the signal
1531      * handler raised an exception. */
1532     assert(!PyErr_Occurred());
1533 
1534     if (count > _PY_READ_MAX) {
1535         count = _PY_READ_MAX;
1536     }
1537 
1538     _Py_BEGIN_SUPPRESS_IPH
1539     do {
1540         Py_BEGIN_ALLOW_THREADS
1541         errno = 0;
1542 #ifdef MS_WINDOWS
1543         n = read(fd, buf, (int)count);
1544 #else
1545         n = read(fd, buf, count);
1546 #endif
1547         /* save/restore errno because PyErr_CheckSignals()
1548          * and PyErr_SetFromErrno() can modify it */
1549         err = errno;
1550         Py_END_ALLOW_THREADS
1551     } while (n < 0 && err == EINTR &&
1552             !(async_err = PyErr_CheckSignals()));
1553     _Py_END_SUPPRESS_IPH
1554 
1555     if (async_err) {
1556         /* read() was interrupted by a signal (failed with EINTR)
1557          * and the Python signal handler raised an exception */
1558         errno = err;
1559         assert(errno == EINTR && PyErr_Occurred());
1560         return -1;
1561     }
1562     if (n < 0) {
1563         PyErr_SetFromErrno(PyExc_OSError);
1564         errno = err;
1565         return -1;
1566     }
1567 
1568     return n;
1569 }
1570 
1571 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1572 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1573 {
1574     Py_ssize_t n;
1575     int err;
1576     int async_err = 0;
1577 
1578     _Py_BEGIN_SUPPRESS_IPH
1579 #ifdef MS_WINDOWS
1580     if (count > 32767 && isatty(fd)) {
1581         /* Issue #11395: the Windows console returns an error (12: not
1582            enough space error) on writing into stdout if stdout mode is
1583            binary and the length is greater than 66,000 bytes (or less,
1584            depending on heap usage). */
1585         count = 32767;
1586     }
1587 #endif
1588     if (count > _PY_WRITE_MAX) {
1589         count = _PY_WRITE_MAX;
1590     }
1591 
1592     if (gil_held) {
1593         do {
1594             Py_BEGIN_ALLOW_THREADS
1595             errno = 0;
1596 #ifdef MS_WINDOWS
1597             n = write(fd, buf, (int)count);
1598 #else
1599             n = write(fd, buf, count);
1600 #endif
1601             /* save/restore errno because PyErr_CheckSignals()
1602              * and PyErr_SetFromErrno() can modify it */
1603             err = errno;
1604             Py_END_ALLOW_THREADS
1605         } while (n < 0 && err == EINTR &&
1606                 !(async_err = PyErr_CheckSignals()));
1607     }
1608     else {
1609         do {
1610             errno = 0;
1611 #ifdef MS_WINDOWS
1612             n = write(fd, buf, (int)count);
1613 #else
1614             n = write(fd, buf, count);
1615 #endif
1616             err = errno;
1617         } while (n < 0 && err == EINTR);
1618     }
1619     _Py_END_SUPPRESS_IPH
1620 
1621     if (async_err) {
1622         /* write() was interrupted by a signal (failed with EINTR)
1623            and the Python signal handler raised an exception (if gil_held is
1624            nonzero). */
1625         errno = err;
1626         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1627         return -1;
1628     }
1629     if (n < 0) {
1630         if (gil_held)
1631             PyErr_SetFromErrno(PyExc_OSError);
1632         errno = err;
1633         return -1;
1634     }
1635 
1636     return n;
1637 }
1638 
1639 /* Write count bytes of buf into fd.
1640 
1641    On success, return the number of written bytes, it can be lower than count
1642    including 0. On error, raise an exception, set errno and return -1.
1643 
1644    When interrupted by a signal (write() fails with EINTR), retry the syscall.
1645    If the Python signal handler raises an exception, the function returns -1
1646    (the syscall is not retried).
1647 
1648    Release the GIL to call write(). The caller must hold the GIL. */
1649 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)1650 _Py_write(int fd, const void *buf, size_t count)
1651 {
1652     assert(PyGILState_Check());
1653 
1654     /* _Py_write() must not be called with an exception set, otherwise the
1655      * caller may think that write() was interrupted by a signal and the signal
1656      * handler raised an exception. */
1657     assert(!PyErr_Occurred());
1658 
1659     return _Py_write_impl(fd, buf, count, 1);
1660 }
1661 
1662 /* Write count bytes of buf into fd.
1663  *
1664  * On success, return the number of written bytes, it can be lower than count
1665  * including 0. On error, set errno and return -1.
1666  *
1667  * When interrupted by a signal (write() fails with EINTR), retry the syscall
1668  * without calling the Python signal handler. */
1669 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)1670 _Py_write_noraise(int fd, const void *buf, size_t count)
1671 {
1672     return _Py_write_impl(fd, buf, count, 0);
1673 }
1674 
1675 #ifdef HAVE_READLINK
1676 
1677 /* Read value of symbolic link. Encode the path to the locale encoding, decode
1678    the result from the locale encoding.
1679 
1680    Return -1 on encoding error, on readlink() error, if the internal buffer is
1681    too short, on decoding error, or if 'buf' is too short. */
1682 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)1683 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1684 {
1685     char *cpath;
1686     char cbuf[MAXPATHLEN];
1687     wchar_t *wbuf;
1688     int res;
1689     size_t r1;
1690 
1691     cpath = _Py_EncodeLocaleRaw(path, NULL);
1692     if (cpath == NULL) {
1693         errno = EINVAL;
1694         return -1;
1695     }
1696     res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
1697     PyMem_RawFree(cpath);
1698     if (res == -1)
1699         return -1;
1700     if (res == Py_ARRAY_LENGTH(cbuf)) {
1701         errno = EINVAL;
1702         return -1;
1703     }
1704     cbuf[res] = '\0'; /* buf will be null terminated */
1705     wbuf = Py_DecodeLocale(cbuf, &r1);
1706     if (wbuf == NULL) {
1707         errno = EINVAL;
1708         return -1;
1709     }
1710     /* wbuf must have space to store the trailing NUL character */
1711     if (buflen <= r1) {
1712         PyMem_RawFree(wbuf);
1713         errno = EINVAL;
1714         return -1;
1715     }
1716     wcsncpy(buf, wbuf, buflen);
1717     PyMem_RawFree(wbuf);
1718     return (int)r1;
1719 }
1720 #endif
1721 
1722 #ifdef HAVE_REALPATH
1723 
1724 /* Return the canonicalized absolute pathname. Encode path to the locale
1725    encoding, decode the result from the locale encoding.
1726 
1727    Return NULL on encoding error, realpath() error, decoding error
1728    or if 'resolved_path' is too short. */
1729 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)1730 _Py_wrealpath(const wchar_t *path,
1731               wchar_t *resolved_path, size_t resolved_path_len)
1732 {
1733     char *cpath;
1734     char cresolved_path[MAXPATHLEN];
1735     wchar_t *wresolved_path;
1736     char *res;
1737     size_t r;
1738     cpath = _Py_EncodeLocaleRaw(path, NULL);
1739     if (cpath == NULL) {
1740         errno = EINVAL;
1741         return NULL;
1742     }
1743     res = realpath(cpath, cresolved_path);
1744     PyMem_RawFree(cpath);
1745     if (res == NULL)
1746         return NULL;
1747 
1748     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1749     if (wresolved_path == NULL) {
1750         errno = EINVAL;
1751         return NULL;
1752     }
1753     /* wresolved_path must have space to store the trailing NUL character */
1754     if (resolved_path_len <= r) {
1755         PyMem_RawFree(wresolved_path);
1756         errno = EINVAL;
1757         return NULL;
1758     }
1759     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1760     PyMem_RawFree(wresolved_path);
1761     return resolved_path;
1762 }
1763 #endif
1764 
1765 /* Get the current directory. buflen is the buffer size in wide characters
1766    including the null character. Decode the path from the locale encoding.
1767 
1768    Return NULL on getcwd() error, on decoding error, or if 'buf' is
1769    too short. */
1770 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)1771 _Py_wgetcwd(wchar_t *buf, size_t buflen)
1772 {
1773 #ifdef MS_WINDOWS
1774     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1775     return _wgetcwd(buf, ibuflen);
1776 #else
1777     char fname[MAXPATHLEN];
1778     wchar_t *wname;
1779     size_t len;
1780 
1781     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
1782         return NULL;
1783     wname = Py_DecodeLocale(fname, &len);
1784     if (wname == NULL)
1785         return NULL;
1786     /* wname must have space to store the trailing NUL character */
1787     if (buflen <= len) {
1788         PyMem_RawFree(wname);
1789         return NULL;
1790     }
1791     wcsncpy(buf, wname, buflen);
1792     PyMem_RawFree(wname);
1793     return buf;
1794 #endif
1795 }
1796 
1797 /* Duplicate a file descriptor. The new file descriptor is created as
1798    non-inheritable. Return a new file descriptor on success, raise an OSError
1799    exception and return -1 on error.
1800 
1801    The GIL is released to call dup(). The caller must hold the GIL. */
1802 int
_Py_dup(int fd)1803 _Py_dup(int fd)
1804 {
1805 #ifdef MS_WINDOWS
1806     HANDLE handle;
1807 #endif
1808 
1809     assert(PyGILState_Check());
1810 
1811 #ifdef MS_WINDOWS
1812     _Py_BEGIN_SUPPRESS_IPH
1813     handle = (HANDLE)_get_osfhandle(fd);
1814     _Py_END_SUPPRESS_IPH
1815     if (handle == INVALID_HANDLE_VALUE) {
1816         PyErr_SetFromErrno(PyExc_OSError);
1817         return -1;
1818     }
1819 
1820     Py_BEGIN_ALLOW_THREADS
1821     _Py_BEGIN_SUPPRESS_IPH
1822     fd = dup(fd);
1823     _Py_END_SUPPRESS_IPH
1824     Py_END_ALLOW_THREADS
1825     if (fd < 0) {
1826         PyErr_SetFromErrno(PyExc_OSError);
1827         return -1;
1828     }
1829 
1830     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1831         _Py_BEGIN_SUPPRESS_IPH
1832         close(fd);
1833         _Py_END_SUPPRESS_IPH
1834         return -1;
1835     }
1836 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1837     Py_BEGIN_ALLOW_THREADS
1838     _Py_BEGIN_SUPPRESS_IPH
1839     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
1840     _Py_END_SUPPRESS_IPH
1841     Py_END_ALLOW_THREADS
1842     if (fd < 0) {
1843         PyErr_SetFromErrno(PyExc_OSError);
1844         return -1;
1845     }
1846 
1847 #else
1848     Py_BEGIN_ALLOW_THREADS
1849     _Py_BEGIN_SUPPRESS_IPH
1850     fd = dup(fd);
1851     _Py_END_SUPPRESS_IPH
1852     Py_END_ALLOW_THREADS
1853     if (fd < 0) {
1854         PyErr_SetFromErrno(PyExc_OSError);
1855         return -1;
1856     }
1857 
1858     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1859         _Py_BEGIN_SUPPRESS_IPH
1860         close(fd);
1861         _Py_END_SUPPRESS_IPH
1862         return -1;
1863     }
1864 #endif
1865     return fd;
1866 }
1867 
1868 #ifndef MS_WINDOWS
1869 /* Get the blocking mode of the file descriptor.
1870    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1871    raise an exception and return -1 on error. */
1872 int
_Py_get_blocking(int fd)1873 _Py_get_blocking(int fd)
1874 {
1875     int flags;
1876     _Py_BEGIN_SUPPRESS_IPH
1877     flags = fcntl(fd, F_GETFL, 0);
1878     _Py_END_SUPPRESS_IPH
1879     if (flags < 0) {
1880         PyErr_SetFromErrno(PyExc_OSError);
1881         return -1;
1882     }
1883 
1884     return !(flags & O_NONBLOCK);
1885 }
1886 
1887 /* Set the blocking mode of the specified file descriptor.
1888 
1889    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1890    otherwise.
1891 
1892    Return 0 on success, raise an exception and return -1 on error. */
1893 int
_Py_set_blocking(int fd,int blocking)1894 _Py_set_blocking(int fd, int blocking)
1895 {
1896 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1897     int arg = !blocking;
1898     if (ioctl(fd, FIONBIO, &arg) < 0)
1899         goto error;
1900 #else
1901     int flags, res;
1902 
1903     _Py_BEGIN_SUPPRESS_IPH
1904     flags = fcntl(fd, F_GETFL, 0);
1905     if (flags >= 0) {
1906         if (blocking)
1907             flags = flags & (~O_NONBLOCK);
1908         else
1909             flags = flags | O_NONBLOCK;
1910 
1911         res = fcntl(fd, F_SETFL, flags);
1912     } else {
1913         res = -1;
1914     }
1915     _Py_END_SUPPRESS_IPH
1916 
1917     if (res < 0)
1918         goto error;
1919 #endif
1920     return 0;
1921 
1922 error:
1923     PyErr_SetFromErrno(PyExc_OSError);
1924     return -1;
1925 }
1926 #endif
1927 
1928 
1929 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)1930 _Py_GetLocaleconvNumeric(struct lconv *lc,
1931                          PyObject **decimal_point, PyObject **thousands_sep)
1932 {
1933     assert(decimal_point != NULL);
1934     assert(thousands_sep != NULL);
1935 
1936     int change_locale = 0;
1937     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
1938         change_locale = 1;
1939     }
1940     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
1941         change_locale = 1;
1942     }
1943 
1944     /* Keep a copy of the LC_CTYPE locale */
1945     char *oldloc = NULL, *loc = NULL;
1946     if (change_locale) {
1947         oldloc = setlocale(LC_CTYPE, NULL);
1948         if (!oldloc) {
1949             PyErr_SetString(PyExc_RuntimeWarning,
1950                             "failed to get LC_CTYPE locale");
1951             return -1;
1952         }
1953 
1954         oldloc = _PyMem_Strdup(oldloc);
1955         if (!oldloc) {
1956             PyErr_NoMemory();
1957             return -1;
1958         }
1959 
1960         loc = setlocale(LC_NUMERIC, NULL);
1961         if (loc != NULL && strcmp(loc, oldloc) == 0) {
1962             loc = NULL;
1963         }
1964 
1965         if (loc != NULL) {
1966             /* Only set the locale temporarily the LC_CTYPE locale
1967                if LC_NUMERIC locale is different than LC_CTYPE locale and
1968                decimal_point and/or thousands_sep are non-ASCII or longer than
1969                1 byte */
1970             setlocale(LC_CTYPE, loc);
1971         }
1972     }
1973 
1974     int res = -1;
1975 
1976     *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1977     if (*decimal_point == NULL) {
1978         goto done;
1979     }
1980 
1981     *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1982     if (*thousands_sep == NULL) {
1983         goto done;
1984     }
1985 
1986     res = 0;
1987 
1988 done:
1989     if (loc != NULL) {
1990         setlocale(LC_CTYPE, oldloc);
1991     }
1992     PyMem_Free(oldloc);
1993     return res;
1994 }
1995