• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #include "pycore_fileutils.h"     // fileutils definitions
3 #include "pycore_runtime.h"       // _PyRuntime
4 #include "osdefs.h"               // SEP
5 
6 #include <stdlib.h>               // mbstowcs()
7 #ifdef HAVE_UNISTD_H
8 #  include <unistd.h>             // getcwd()
9 #endif
10 
11 #ifdef MS_WINDOWS
12 #  include <malloc.h>
13 #  include <windows.h>
14 #  include <winioctl.h>             // FILE_DEVICE_* constants
15 #  include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION
16 #  if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
17 #    define PATHCCH_ALLOW_LONG_PATHS 0x01
18 #  else
19 #    include <pathcch.h>            // PathCchCombineEx
20 #  endif
21 extern int winerror_to_errno(int);
22 #endif
23 
24 #ifdef HAVE_LANGINFO_H
25 #  include <langinfo.h>           // nl_langinfo(CODESET)
26 #endif
27 
28 #ifdef HAVE_SYS_IOCTL_H
29 #include <sys/ioctl.h>
30 #endif
31 
32 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
33 #  include <iconv.h>              // iconv_open()
34 #endif
35 
36 #ifdef HAVE_FCNTL_H
37 #  include <fcntl.h>              // fcntl(F_GETFD)
38 #endif
39 
40 #ifdef O_CLOEXEC
41 /* Does open() support the O_CLOEXEC flag? Possible values:
42 
43    -1: unknown
44     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
45     1: open() supports O_CLOEXEC flag, close-on-exec is set
46 
47    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
48    and os.open(). */
49 int _Py_open_cloexec_works = -1;
50 #endif
51 
52 // The value must be the same in unicodeobject.c.
53 #define MAX_UNICODE 0x10ffff
54 
55 // mbstowcs() and mbrtowc() errors
56 static const size_t DECODE_ERROR = ((size_t)-1);
57 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
58 
59 
60 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)61 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
62 {
63     switch (errors)
64     {
65     case _Py_ERROR_STRICT:
66         *surrogateescape = 0;
67         return 0;
68     case _Py_ERROR_SURROGATEESCAPE:
69         *surrogateescape = 1;
70         return 0;
71     default:
72         return -1;
73     }
74 }
75 
76 
77 PyObject *
_Py_device_encoding(int fd)78 _Py_device_encoding(int fd)
79 {
80     int valid;
81     Py_BEGIN_ALLOW_THREADS
82     _Py_BEGIN_SUPPRESS_IPH
83     valid = isatty(fd);
84     _Py_END_SUPPRESS_IPH
85     Py_END_ALLOW_THREADS
86     if (!valid)
87         Py_RETURN_NONE;
88 
89 #ifdef MS_WINDOWS
90 #ifdef HAVE_WINDOWS_CONSOLE_IO
91     UINT cp;
92     if (fd == 0)
93         cp = GetConsoleCP();
94     else if (fd == 1 || fd == 2)
95         cp = GetConsoleOutputCP();
96     else
97         cp = 0;
98     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
99        has no console */
100     if (cp == 0) {
101         Py_RETURN_NONE;
102     }
103 
104     return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
105 #else
106     Py_RETURN_NONE;
107 #endif /* HAVE_WINDOWS_CONSOLE_IO */
108 #else
109     if (_PyRuntime.preconfig.utf8_mode) {
110         _Py_DECLARE_STR(utf_8, "utf-8");
111         return &_Py_STR(utf_8);
112     }
113     return _Py_GetLocaleEncodingObject();
114 #endif
115 }
116 
117 
118 static int
is_valid_wide_char(wchar_t ch)119 is_valid_wide_char(wchar_t ch)
120 {
121 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
122     /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
123        for non-Unicode locales, which makes values higher than MAX_UNICODE
124        possibly valid. */
125     return 1;
126 #endif
127     if (Py_UNICODE_IS_SURROGATE(ch)) {
128         // Reject lone surrogate characters
129         return 0;
130     }
131     if (ch > MAX_UNICODE) {
132         // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
133         // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
134         // it creates characters outside the [U+0000; U+10ffff] range:
135         // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
136         return 0;
137     }
138     return 1;
139 }
140 
141 
142 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)143 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
144 {
145     size_t count = mbstowcs(dest, src, n);
146     if (dest != NULL && count != DECODE_ERROR) {
147         for (size_t i=0; i < count; i++) {
148             wchar_t ch = dest[i];
149             if (!is_valid_wide_char(ch)) {
150                 return DECODE_ERROR;
151             }
152         }
153     }
154     return count;
155 }
156 
157 
158 #ifdef HAVE_MBRTOWC
159 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)160 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
161 {
162     assert(pwc != NULL);
163     size_t count = mbrtowc(pwc, str, len, pmbs);
164     if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
165         if (!is_valid_wide_char(*pwc)) {
166             return DECODE_ERROR;
167         }
168     }
169     return count;
170 }
171 #endif
172 
173 
174 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
175 
176 #define USE_FORCE_ASCII
177 
178 extern int _Py_normalize_encoding(const char *, char *, size_t);
179 
180 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
181    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
182    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
183    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
184    locale.getpreferredencoding() codec. For example, if command line arguments
185    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
186    UnicodeEncodeError instead of retrieving the original byte string.
187 
188    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
189    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
190    one byte in range 0x80-0xff can be decoded from the locale encoding. The
191    workaround is also enabled on error, for example if getting the locale
192    failed.
193 
194    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
195    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
196    ASCII encoding in this case.
197 
198    Values of force_ascii:
199 
200        1: the workaround is used: Py_EncodeLocale() uses
201           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
202           decode_ascii()
203        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
204           Py_DecodeLocale() uses mbstowcs()
205       -1: unknown, need to call check_force_ascii() to get the value
206 */
207 #define force_ascii (_PyRuntime.fileutils.force_ascii)
208 
209 static int
check_force_ascii(void)210 check_force_ascii(void)
211 {
212     char *loc = setlocale(LC_CTYPE, NULL);
213     if (loc == NULL) {
214         goto error;
215     }
216     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
217         /* the LC_CTYPE locale is different than C and POSIX */
218         return 0;
219     }
220 
221 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
222     const char *codeset = nl_langinfo(CODESET);
223     if (!codeset || codeset[0] == '\0') {
224         /* CODESET is not set or empty */
225         goto error;
226     }
227 
228     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
229     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
230         goto error;
231     }
232 
233 #ifdef __hpux
234     if (strcmp(encoding, "roman8") == 0) {
235         unsigned char ch;
236         wchar_t wch;
237         size_t res;
238 
239         ch = (unsigned char)0xA7;
240         res = _Py_mbstowcs(&wch, (char*)&ch, 1);
241         if (res != DECODE_ERROR && wch == L'\xA7') {
242             /* On HP-UX with C locale or the POSIX locale,
243                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
244                Latin1 encoding in practice. Force ASCII in this case.
245 
246                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
247             return 1;
248         }
249     }
250 #else
251     const char* ascii_aliases[] = {
252         "ascii",
253         /* Aliases from Lib/encodings/aliases.py */
254         "646",
255         "ansi_x3.4_1968",
256         "ansi_x3.4_1986",
257         "ansi_x3_4_1968",
258         "cp367",
259         "csascii",
260         "ibm367",
261         "iso646_us",
262         "iso_646.irv_1991",
263         "iso_ir_6",
264         "us",
265         "us_ascii",
266         NULL
267     };
268 
269     int is_ascii = 0;
270     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
271         if (strcmp(encoding, *alias) == 0) {
272             is_ascii = 1;
273             break;
274         }
275     }
276     if (!is_ascii) {
277         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
278         return 0;
279     }
280 
281     for (unsigned int i=0x80; i<=0xff; i++) {
282         char ch[1];
283         wchar_t wch[1];
284         size_t res;
285 
286         unsigned uch = (unsigned char)i;
287         ch[0] = (char)uch;
288         res = _Py_mbstowcs(wch, ch, 1);
289         if (res != DECODE_ERROR) {
290             /* decoding a non-ASCII character from the locale encoding succeed:
291                the locale encoding is not ASCII, force ASCII */
292             return 1;
293         }
294     }
295     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
296        encoding: the locale encoding is really ASCII */
297 #endif   /* !defined(__hpux) */
298     return 0;
299 #else
300     /* nl_langinfo(CODESET) is not available: always force ASCII */
301     return 1;
302 #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
303 
304 error:
305     /* if an error occurred, force the ASCII encoding */
306     return 1;
307 }
308 
309 
310 int
_Py_GetForceASCII(void)311 _Py_GetForceASCII(void)
312 {
313     if (force_ascii == -1) {
314         force_ascii = check_force_ascii();
315     }
316     return force_ascii;
317 }
318 
319 
320 void
_Py_ResetForceASCII(void)321 _Py_ResetForceASCII(void)
322 {
323     force_ascii = -1;
324 }
325 
326 
327 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)328 encode_ascii(const wchar_t *text, char **str,
329              size_t *error_pos, const char **reason,
330              int raw_malloc, _Py_error_handler errors)
331 {
332     char *result = NULL, *out;
333     size_t len, i;
334     wchar_t ch;
335 
336     int surrogateescape;
337     if (get_surrogateescape(errors, &surrogateescape) < 0) {
338         return -3;
339     }
340 
341     len = wcslen(text);
342 
343     /* +1 for NULL byte */
344     if (raw_malloc) {
345         result = PyMem_RawMalloc(len + 1);
346     }
347     else {
348         result = PyMem_Malloc(len + 1);
349     }
350     if (result == NULL) {
351         return -1;
352     }
353 
354     out = result;
355     for (i=0; i<len; i++) {
356         ch = text[i];
357 
358         if (ch <= 0x7f) {
359             /* ASCII character */
360             *out++ = (char)ch;
361         }
362         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
363             /* UTF-8b surrogate */
364             *out++ = (char)(ch - 0xdc00);
365         }
366         else {
367             if (raw_malloc) {
368                 PyMem_RawFree(result);
369             }
370             else {
371                 PyMem_Free(result);
372             }
373             if (error_pos != NULL) {
374                 *error_pos = i;
375             }
376             if (reason) {
377                 *reason = "encoding error";
378             }
379             return -2;
380         }
381     }
382     *out = '\0';
383     *str = result;
384     return 0;
385 }
386 #else
387 int
_Py_GetForceASCII(void)388 _Py_GetForceASCII(void)
389 {
390     return 0;
391 }
392 
393 void
_Py_ResetForceASCII(void)394 _Py_ResetForceASCII(void)
395 {
396     /* nothing to do */
397 }
398 #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
399 
400 
401 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
402 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)403 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
404              const char **reason, _Py_error_handler errors)
405 {
406     wchar_t *res;
407     unsigned char *in;
408     wchar_t *out;
409     size_t argsize = strlen(arg) + 1;
410 
411     int surrogateescape;
412     if (get_surrogateescape(errors, &surrogateescape) < 0) {
413         return -3;
414     }
415 
416     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
417         return -1;
418     }
419     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
420     if (!res) {
421         return -1;
422     }
423 
424     out = res;
425     for (in = (unsigned char*)arg; *in; in++) {
426         unsigned char ch = *in;
427         if (ch < 128) {
428             *out++ = ch;
429         }
430         else {
431             if (!surrogateescape) {
432                 PyMem_RawFree(res);
433                 if (wlen) {
434                     *wlen = in - (unsigned char*)arg;
435                 }
436                 if (reason) {
437                     *reason = "decoding error";
438                 }
439                 return -2;
440             }
441             *out++ = 0xdc00 + ch;
442         }
443     }
444     *out = 0;
445 
446     if (wlen != NULL) {
447         *wlen = out - res;
448     }
449     *wstr = res;
450     return 0;
451 }
452 #endif   /* !HAVE_MBRTOWC */
453 
454 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)455 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
456                       const char **reason, _Py_error_handler errors)
457 {
458     wchar_t *res;
459     size_t argsize;
460     size_t count;
461 #ifdef HAVE_MBRTOWC
462     unsigned char *in;
463     wchar_t *out;
464     mbstate_t mbs;
465 #endif
466 
467     int surrogateescape;
468     if (get_surrogateescape(errors, &surrogateescape) < 0) {
469         return -3;
470     }
471 
472 #ifdef HAVE_BROKEN_MBSTOWCS
473     /* Some platforms have a broken implementation of
474      * mbstowcs which does not count the characters that
475      * would result from conversion.  Use an upper bound.
476      */
477     argsize = strlen(arg);
478 #else
479     argsize = _Py_mbstowcs(NULL, arg, 0);
480 #endif
481     if (argsize != DECODE_ERROR) {
482         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
483             return -1;
484         }
485         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
486         if (!res) {
487             return -1;
488         }
489 
490         count = _Py_mbstowcs(res, arg, argsize + 1);
491         if (count != DECODE_ERROR) {
492             *wstr = res;
493             if (wlen != NULL) {
494                 *wlen = count;
495             }
496             return 0;
497         }
498         PyMem_RawFree(res);
499     }
500 
501     /* Conversion failed. Fall back to escaping with surrogateescape. */
502 #ifdef HAVE_MBRTOWC
503     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
504 
505     /* Overallocate; as multi-byte characters are in the argument, the
506        actual output could use less memory. */
507     argsize = strlen(arg) + 1;
508     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
509         return -1;
510     }
511     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
512     if (!res) {
513         return -1;
514     }
515 
516     in = (unsigned char*)arg;
517     out = res;
518     memset(&mbs, 0, sizeof mbs);
519     while (argsize) {
520         size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
521         if (converted == 0) {
522             /* Reached end of string; null char stored. */
523             break;
524         }
525 
526         if (converted == INCOMPLETE_CHARACTER) {
527             /* Incomplete character. This should never happen,
528                since we provide everything that we have -
529                unless there is a bug in the C library, or I
530                misunderstood how mbrtowc works. */
531             goto decode_error;
532         }
533 
534         if (converted == DECODE_ERROR) {
535             if (!surrogateescape) {
536                 goto decode_error;
537             }
538 
539             /* Decoding error. Escape as UTF-8b, and start over in the initial
540                shift state. */
541             *out++ = 0xdc00 + *in++;
542             argsize--;
543             memset(&mbs, 0, sizeof mbs);
544             continue;
545         }
546 
547         // _Py_mbrtowc() reject lone surrogate characters
548         assert(!Py_UNICODE_IS_SURROGATE(*out));
549 
550         /* successfully converted some bytes */
551         in += converted;
552         argsize -= converted;
553         out++;
554     }
555     if (wlen != NULL) {
556         *wlen = out - res;
557     }
558     *wstr = res;
559     return 0;
560 
561 decode_error:
562     PyMem_RawFree(res);
563     if (wlen) {
564         *wlen = in - (unsigned char*)arg;
565     }
566     if (reason) {
567         *reason = "decoding error";
568     }
569     return -2;
570 #else   /* HAVE_MBRTOWC */
571     /* Cannot use C locale for escaping; manually escape as if charset
572        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
573        correctly in the locale's charset, which must be an ASCII superset. */
574     return decode_ascii(arg, wstr, wlen, reason, errors);
575 #endif   /* HAVE_MBRTOWC */
576 }
577 
578 
579 /* Decode a byte string from the locale encoding.
580 
581    Use the strict error handler if 'surrogateescape' is zero.  Use the
582    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
583    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
584    can be decoded as a surrogate character, escape the bytes using the
585    surrogateescape error handler instead of decoding them.
586 
587    On success, return 0 and write the newly allocated wide character string into
588    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
589    the number of wide characters excluding the null character into *wlen.
590 
591    On memory allocation failure, return -1.
592 
593    On decoding error, return -2. If wlen is not NULL, write the start of
594    invalid byte sequence in the input string into *wlen. If reason is not NULL,
595    write the decoding error message into *reason.
596 
597    Return -3 if the error handler 'errors' is not supported.
598 
599    Use the Py_EncodeLocaleEx() function to encode the character string back to
600    a byte string. */
601 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)602 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
603                    const char **reason,
604                    int current_locale, _Py_error_handler errors)
605 {
606     if (current_locale) {
607 #ifdef _Py_FORCE_UTF8_LOCALE
608         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
609                                 errors);
610 #else
611         return decode_current_locale(arg, wstr, wlen, reason, errors);
612 #endif
613     }
614 
615 #ifdef _Py_FORCE_UTF8_FS_ENCODING
616     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
617                             errors);
618 #else
619     int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
620 #ifdef MS_WINDOWS
621     use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
622 #endif
623     if (use_utf8) {
624         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
625                                 errors);
626     }
627 
628 #ifdef USE_FORCE_ASCII
629     if (force_ascii == -1) {
630         force_ascii = check_force_ascii();
631     }
632 
633     if (force_ascii) {
634         /* force ASCII encoding to workaround mbstowcs() issue */
635         return decode_ascii(arg, wstr, wlen, reason, errors);
636     }
637 #endif
638 
639     return decode_current_locale(arg, wstr, wlen, reason, errors);
640 #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
641 }
642 
643 
644 /* Decode a byte string from the locale encoding with the
645    surrogateescape error handler: undecodable bytes are decoded as characters
646    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
647    character, escape the bytes using the surrogateescape error handler instead
648    of decoding them.
649 
650    Return a pointer to a newly allocated wide character string, use
651    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
652    wide characters excluding the null character into *size
653 
654    Return NULL on decoding error or memory allocation error. If *size* is not
655    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
656    decoding error.
657 
658    Decoding errors should never happen, unless there is a bug in the C
659    library.
660 
661    Use the Py_EncodeLocale() function to encode the character string back to a
662    byte string. */
663 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)664 Py_DecodeLocale(const char* arg, size_t *wlen)
665 {
666     wchar_t *wstr;
667     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
668                                  NULL, 0,
669                                  _Py_ERROR_SURROGATEESCAPE);
670     if (res != 0) {
671         assert(res != -3);
672         if (wlen != NULL) {
673             *wlen = (size_t)res;
674         }
675         return NULL;
676     }
677     return wstr;
678 }
679 
680 
681 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)682 encode_current_locale(const wchar_t *text, char **str,
683                       size_t *error_pos, const char **reason,
684                       int raw_malloc, _Py_error_handler errors)
685 {
686     const size_t len = wcslen(text);
687     char *result = NULL, *bytes = NULL;
688     size_t i, size, converted;
689     wchar_t c, buf[2];
690 
691     int surrogateescape;
692     if (get_surrogateescape(errors, &surrogateescape) < 0) {
693         return -3;
694     }
695 
696     /* The function works in two steps:
697        1. compute the length of the output buffer in bytes (size)
698        2. outputs the bytes */
699     size = 0;
700     buf[1] = 0;
701     while (1) {
702         for (i=0; i < len; i++) {
703             c = text[i];
704             if (c >= 0xdc80 && c <= 0xdcff) {
705                 if (!surrogateescape) {
706                     goto encode_error;
707                 }
708                 /* UTF-8b surrogate */
709                 if (bytes != NULL) {
710                     *bytes++ = c - 0xdc00;
711                     size--;
712                 }
713                 else {
714                     size++;
715                 }
716                 continue;
717             }
718             else {
719                 buf[0] = c;
720                 if (bytes != NULL) {
721                     converted = wcstombs(bytes, buf, size);
722                 }
723                 else {
724                     converted = wcstombs(NULL, buf, 0);
725                 }
726                 if (converted == DECODE_ERROR) {
727                     goto encode_error;
728                 }
729                 if (bytes != NULL) {
730                     bytes += converted;
731                     size -= converted;
732                 }
733                 else {
734                     size += converted;
735                 }
736             }
737         }
738         if (result != NULL) {
739             *bytes = '\0';
740             break;
741         }
742 
743         size += 1; /* nul byte at the end */
744         if (raw_malloc) {
745             result = PyMem_RawMalloc(size);
746         }
747         else {
748             result = PyMem_Malloc(size);
749         }
750         if (result == NULL) {
751             return -1;
752         }
753         bytes = result;
754     }
755     *str = result;
756     return 0;
757 
758 encode_error:
759     if (raw_malloc) {
760         PyMem_RawFree(result);
761     }
762     else {
763         PyMem_Free(result);
764     }
765     if (error_pos != NULL) {
766         *error_pos = i;
767     }
768     if (reason) {
769         *reason = "encoding error";
770     }
771     return -2;
772 }
773 
774 
775 /* Encode a string to the locale encoding.
776 
777    Parameters:
778 
779    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
780      of PyMem_Malloc().
781    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
782      Python filesystem encoding.
783    * errors: error handler like "strict" or "surrogateescape".
784 
785    Return value:
786 
787     0: success, *str is set to a newly allocated decoded string.
788    -1: memory allocation failure
789    -2: encoding error, set *error_pos and *reason (if set).
790    -3: the error handler 'errors' is not supported.
791  */
792 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)793 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
794                  const char **reason,
795                  int raw_malloc, int current_locale, _Py_error_handler errors)
796 {
797     if (current_locale) {
798 #ifdef _Py_FORCE_UTF8_LOCALE
799         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
800                                 raw_malloc, errors);
801 #else
802         return encode_current_locale(text, str, error_pos, reason,
803                                      raw_malloc, errors);
804 #endif
805     }
806 
807 #ifdef _Py_FORCE_UTF8_FS_ENCODING
808     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
809                             raw_malloc, errors);
810 #else
811     int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
812 #ifdef MS_WINDOWS
813     use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
814 #endif
815     if (use_utf8) {
816         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
817                                 raw_malloc, errors);
818     }
819 
820 #ifdef USE_FORCE_ASCII
821     if (force_ascii == -1) {
822         force_ascii = check_force_ascii();
823     }
824 
825     if (force_ascii) {
826         return encode_ascii(text, str, error_pos, reason,
827                             raw_malloc, errors);
828     }
829 #endif
830 
831     return encode_current_locale(text, str, error_pos, reason,
832                                  raw_malloc, errors);
833 #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
834 }
835 
836 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)837 encode_locale(const wchar_t *text, size_t *error_pos,
838               int raw_malloc, int current_locale)
839 {
840     char *str;
841     int res = encode_locale_ex(text, &str, error_pos, NULL,
842                                raw_malloc, current_locale,
843                                _Py_ERROR_SURROGATEESCAPE);
844     if (res != -2 && error_pos) {
845         *error_pos = (size_t)-1;
846     }
847     if (res != 0) {
848         return NULL;
849     }
850     return str;
851 }
852 
853 /* Encode a wide character string to the locale encoding with the
854    surrogateescape error handler: surrogate characters in the range
855    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
856 
857    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
858    the memory. Return NULL on encoding or memory allocation error.
859 
860    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
861    to the index of the invalid character on encoding error.
862 
863    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
864    character string. */
865 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)866 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
867 {
868     return encode_locale(text, error_pos, 0, 0);
869 }
870 
871 
872 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
873    instead of PyMem_Free(). */
874 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)875 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
876 {
877     return encode_locale(text, error_pos, 1, 0);
878 }
879 
880 
881 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)882 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
883                    size_t *error_pos, const char **reason,
884                    int current_locale, _Py_error_handler errors)
885 {
886     return encode_locale_ex(text, str, error_pos, reason, 1,
887                             current_locale, errors);
888 }
889 
890 
891 // Get the current locale encoding name:
892 //
893 // - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
894 // - Return "utf-8" if the UTF-8 Mode is enabled
895 // - On Windows, return the ANSI code page (ex: "cp1250")
896 // - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
897 // - Otherwise, return nl_langinfo(CODESET).
898 //
899 // Return NULL on memory allocation failure.
900 //
901 // See also config_get_locale_encoding()
902 wchar_t*
_Py_GetLocaleEncoding(void)903 _Py_GetLocaleEncoding(void)
904 {
905 #ifdef _Py_FORCE_UTF8_LOCALE
906     // On Android langinfo.h and CODESET are missing,
907     // and UTF-8 is always used in mbstowcs() and wcstombs().
908     return _PyMem_RawWcsdup(L"utf-8");
909 #else
910 
911 #ifdef MS_WINDOWS
912     wchar_t encoding[23];
913     unsigned int ansi_codepage = GetACP();
914     swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
915     encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
916     return _PyMem_RawWcsdup(encoding);
917 #else
918     const char *encoding = nl_langinfo(CODESET);
919     if (!encoding || encoding[0] == '\0') {
920         // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
921         // macOS if the LC_CTYPE locale is not supported.
922         return _PyMem_RawWcsdup(L"utf-8");
923     }
924 
925     wchar_t *wstr;
926     int res = decode_current_locale(encoding, &wstr, NULL,
927                                     NULL, _Py_ERROR_SURROGATEESCAPE);
928     if (res < 0) {
929         return NULL;
930     }
931     return wstr;
932 #endif  // !MS_WINDOWS
933 
934 #endif  // !_Py_FORCE_UTF8_LOCALE
935 }
936 
937 
938 PyObject *
_Py_GetLocaleEncodingObject(void)939 _Py_GetLocaleEncodingObject(void)
940 {
941     wchar_t *encoding = _Py_GetLocaleEncoding();
942     if (encoding == NULL) {
943         PyErr_NoMemory();
944         return NULL;
945     }
946 
947     PyObject *str = PyUnicode_FromWideChar(encoding, -1);
948     PyMem_RawFree(encoding);
949     return str;
950 }
951 
952 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
953 
954 /* Check whether current locale uses Unicode as internal wchar_t form. */
955 int
_Py_LocaleUsesNonUnicodeWchar(void)956 _Py_LocaleUsesNonUnicodeWchar(void)
957 {
958     /* Oracle Solaris uses non-Unicode internal wchar_t form for
959        non-Unicode locales and hence needs conversion to UTF first. */
960     char* codeset = nl_langinfo(CODESET);
961     if (!codeset) {
962         return 0;
963     }
964     /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
965     return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
966 }
967 
968 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)969 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
970                      const char *tocode, const char *fromcode)
971 {
972     static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
973 
974     /* Ensure we won't overflow the size. */
975     if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
976         PyErr_NoMemory();
977         return NULL;
978     }
979 
980     /* the string doesn't have to be NULL terminated */
981     wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
982     if (target == NULL) {
983         PyErr_NoMemory();
984         return NULL;
985     }
986 
987     iconv_t cd = iconv_open(tocode, fromcode);
988     if (cd == (iconv_t)-1) {
989         PyErr_Format(PyExc_ValueError, "iconv_open() failed");
990         PyMem_Free(target);
991         return NULL;
992     }
993 
994     char *inbuf = (char *) source;
995     char *outbuf = (char *) target;
996     size_t inbytesleft = sizeof(wchar_t) * size;
997     size_t outbytesleft = inbytesleft;
998 
999     size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
1000     if (ret == DECODE_ERROR) {
1001         PyErr_Format(PyExc_ValueError, "iconv() failed");
1002         PyMem_Free(target);
1003         iconv_close(cd);
1004         return NULL;
1005     }
1006 
1007     iconv_close(cd);
1008     return target;
1009 }
1010 
1011 /* Convert a wide character string to the UCS-4 encoded string. This
1012    is necessary on systems where internal form of wchar_t are not Unicode
1013    code points (e.g. Oracle Solaris).
1014 
1015    Return a pointer to a newly allocated string, use PyMem_Free() to free
1016    the memory. Return NULL and raise exception on conversion or memory
1017    allocation error. */
1018 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)1019 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1020 {
1021     return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1022 }
1023 
1024 /* Convert a UCS-4 encoded string to native wide character string. This
1025    is necessary on systems where internal form of wchar_t are not Unicode
1026    code points (e.g. Oracle Solaris).
1027 
1028    The conversion is done in place. This can be done because both wchar_t
1029    and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1030    to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1031    which is currently the only system using these functions; it doesn't have
1032    to be for other systems).
1033 
1034    Return 0 on success. Return -1 and raise exception on conversion
1035    or memory allocation error. */
1036 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)1037 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1038 {
1039     wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1040     if (!result) {
1041         return -1;
1042     }
1043     memcpy(unicode, result, size * sizeof(wchar_t));
1044     PyMem_Free(result);
1045     return 0;
1046 }
1047 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1048 
1049 #ifdef MS_WINDOWS
1050 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1051 
1052 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)1053 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1054 {
1055     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1056     /* Cannot simply cast and dereference in_ptr,
1057        since it might not be aligned properly */
1058     __int64 in;
1059     memcpy(&in, in_ptr, sizeof(in));
1060     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1061     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1062 }
1063 
1064 static void
LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER * in_ptr,time_t * time_out,int * nsec_out)1065 LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out)
1066 {
1067     *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1068     *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t);
1069 }
1070 
1071 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)1072 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1073 {
1074     /* XXX endianness */
1075     __int64 out;
1076     out = time_in + secs_between_epochs;
1077     out = out * 10000000 + nsec_in / 100;
1078     memcpy(out_ptr, &out, sizeof(out));
1079 }
1080 
1081 /* Below, we *know* that ugo+r is 0444 */
1082 #if _S_IREAD != 0400
1083 #error Unsupported C library
1084 #endif
1085 static int
attributes_to_mode(DWORD attr)1086 attributes_to_mode(DWORD attr)
1087 {
1088     int m = 0;
1089     if (attr & FILE_ATTRIBUTE_DIRECTORY)
1090         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1091     else
1092         m |= _S_IFREG;
1093     if (attr & FILE_ATTRIBUTE_READONLY)
1094         m |= 0444;
1095     else
1096         m |= 0666;
1097     return m;
1098 }
1099 
1100 
1101 typedef union {
1102     FILE_ID_128 id;
1103     struct {
1104         uint64_t st_ino;
1105         uint64_t st_ino_high;
1106     };
1107 } id_128_to_ino;
1108 
1109 
1110 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,FILE_BASIC_INFO * basic_info,FILE_ID_INFO * id_info,struct _Py_stat_struct * result)1111 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1112                            FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info,
1113                            struct _Py_stat_struct *result)
1114 {
1115     memset(result, 0, sizeof(*result));
1116     result->st_mode = attributes_to_mode(info->dwFileAttributes);
1117     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1118     result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber;
1119     result->st_rdev = 0;
1120     /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */
1121     if (basic_info) {
1122         LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1123         LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1124         LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1125         LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1126     } else {
1127         FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1128         FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1129         FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1130     }
1131     result->st_nlink = info->nNumberOfLinks;
1132 
1133     if (id_info) {
1134         id_128_to_ino file_id;
1135         file_id.id = id_info->FileId;
1136         result->st_ino = file_id.st_ino;
1137         result->st_ino_high = file_id.st_ino_high;
1138     }
1139     if (!result->st_ino && !result->st_ino_high) {
1140         /* should only occur for DirEntry_from_find_data, in which case the
1141            index is likely to be zero anyway. */
1142         result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1143     }
1144 
1145     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1146        open other name surrogate reparse points without traversing them. To
1147        detect/handle these, check st_file_attributes and st_reparse_tag. */
1148     result->st_reparse_tag = reparse_tag;
1149     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1150         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1151         /* set the bits that make this a symlink */
1152         result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1153     }
1154     result->st_file_attributes = info->dwFileAttributes;
1155 }
1156 
1157 void
_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION * info,struct _Py_stat_struct * result)1158 _Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info,
1159                             struct _Py_stat_struct *result)
1160 {
1161     memset(result, 0, sizeof(*result));
1162     result->st_mode = attributes_to_mode(info->FileAttributes);
1163     result->st_size = info->EndOfFile.QuadPart;
1164     LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1165     LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1166     LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1167     LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1168     result->st_nlink = info->NumberOfLinks;
1169     result->st_dev = info->VolumeSerialNumber.QuadPart;
1170     /* File systems with less than 128-bits zero pad into this field */
1171     id_128_to_ino file_id;
1172     file_id.id = info->FileId128;
1173     result->st_ino = file_id.st_ino;
1174     result->st_ino_high = file_id.st_ino_high;
1175     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1176        open other name surrogate reparse points without traversing them. To
1177        detect/handle these, check st_file_attributes and st_reparse_tag. */
1178     result->st_reparse_tag = info->ReparseTag;
1179     if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1180         info->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1181         /* set the bits that make this a symlink */
1182         result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1183     }
1184     result->st_file_attributes = info->FileAttributes;
1185     switch (info->DeviceType) {
1186     case FILE_DEVICE_DISK:
1187     case FILE_DEVICE_VIRTUAL_DISK:
1188     case FILE_DEVICE_DFS:
1189     case FILE_DEVICE_CD_ROM:
1190     case FILE_DEVICE_CONTROLLER:
1191     case FILE_DEVICE_DATALINK:
1192         break;
1193     case FILE_DEVICE_DISK_FILE_SYSTEM:
1194     case FILE_DEVICE_CD_ROM_FILE_SYSTEM:
1195     case FILE_DEVICE_NETWORK_FILE_SYSTEM:
1196         result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */
1197         break;
1198     case FILE_DEVICE_CONSOLE:
1199     case FILE_DEVICE_NULL:
1200     case FILE_DEVICE_KEYBOARD:
1201     case FILE_DEVICE_MODEM:
1202     case FILE_DEVICE_MOUSE:
1203     case FILE_DEVICE_PARALLEL_PORT:
1204     case FILE_DEVICE_PRINTER:
1205     case FILE_DEVICE_SCREEN:
1206     case FILE_DEVICE_SERIAL_PORT:
1207     case FILE_DEVICE_SOUND:
1208         result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR;
1209         break;
1210     case FILE_DEVICE_NAMED_PIPE:
1211         result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO;
1212         break;
1213     default:
1214         if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
1215             result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR;
1216         }
1217         break;
1218     }
1219 }
1220 
1221 #endif
1222 
1223 /* Return information about a file.
1224 
1225    On POSIX, use fstat().
1226 
1227    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1228    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1229    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1230    #23152.
1231 
1232    On Windows, set the last Windows error and return nonzero on error. On
1233    POSIX, set errno and return nonzero on error. Fill status and return 0 on
1234    success. */
1235 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1236 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1237 {
1238 #ifdef MS_WINDOWS
1239     BY_HANDLE_FILE_INFORMATION info;
1240     FILE_BASIC_INFO basicInfo;
1241     FILE_ID_INFO idInfo;
1242     FILE_ID_INFO *pIdInfo = &idInfo;
1243     HANDLE h;
1244     int type;
1245 
1246     h = _Py_get_osfhandle_noraise(fd);
1247 
1248     if (h == INVALID_HANDLE_VALUE) {
1249         /* errno is already set by _get_osfhandle, but we also set
1250            the Win32 error for callers who expect that */
1251         SetLastError(ERROR_INVALID_HANDLE);
1252         return -1;
1253     }
1254     memset(status, 0, sizeof(*status));
1255 
1256     type = GetFileType(h);
1257     if (type == FILE_TYPE_UNKNOWN) {
1258         DWORD error = GetLastError();
1259         if (error != 0) {
1260             errno = winerror_to_errno(error);
1261             return -1;
1262         }
1263         /* else: valid but unknown file */
1264     }
1265 
1266     if (type != FILE_TYPE_DISK) {
1267         if (type == FILE_TYPE_CHAR)
1268             status->st_mode = _S_IFCHR;
1269         else if (type == FILE_TYPE_PIPE)
1270             status->st_mode = _S_IFIFO;
1271         return 0;
1272     }
1273 
1274     if (!GetFileInformationByHandle(h, &info) ||
1275         !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo))) {
1276         /* The Win32 error is already set, but we also set errno for
1277            callers who expect it */
1278         errno = winerror_to_errno(GetLastError());
1279         return -1;
1280     }
1281 
1282     if (!GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) {
1283         /* Failed to get FileIdInfo, so do not pass it along */
1284         pIdInfo = NULL;
1285     }
1286 
1287     _Py_attribute_data_to_stat(&info, 0, &basicInfo, pIdInfo, status);
1288     return 0;
1289 #else
1290     return fstat(fd, status);
1291 #endif
1292 }
1293 
1294 /* Return information about a file.
1295 
1296    On POSIX, use fstat().
1297 
1298    On Windows, use GetFileType() and GetFileInformationByHandle() which support
1299    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1300    than 2 GiB because the file size type is a signed 32-bit integer: see issue
1301    #23152.
1302 
1303    Raise an exception and return -1 on error. On Windows, set the last Windows
1304    error on error. On POSIX, set errno on error. Fill status and return 0 on
1305    success.
1306 
1307    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1308    to call fstat(). The caller must hold the GIL. */
1309 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1310 _Py_fstat(int fd, struct _Py_stat_struct *status)
1311 {
1312     int res;
1313 
1314     assert(PyGILState_Check());
1315 
1316     Py_BEGIN_ALLOW_THREADS
1317     res = _Py_fstat_noraise(fd, status);
1318     Py_END_ALLOW_THREADS
1319 
1320     if (res != 0) {
1321 #ifdef MS_WINDOWS
1322         PyErr_SetFromWindowsErr(0);
1323 #else
1324         PyErr_SetFromErrno(PyExc_OSError);
1325 #endif
1326         return -1;
1327     }
1328     return 0;
1329 }
1330 
1331 /* Like _Py_stat() but with a raw filename. */
1332 int
_Py_wstat(const wchar_t * path,struct stat * buf)1333 _Py_wstat(const wchar_t* path, struct stat *buf)
1334 {
1335     int err;
1336 #ifdef MS_WINDOWS
1337     struct _stat wstatbuf;
1338     err = _wstat(path, &wstatbuf);
1339     if (!err) {
1340         buf->st_mode = wstatbuf.st_mode;
1341     }
1342 #else
1343     char *fname;
1344     fname = _Py_EncodeLocaleRaw(path, NULL);
1345     if (fname == NULL) {
1346         errno = EINVAL;
1347         return -1;
1348     }
1349     err = stat(fname, buf);
1350     PyMem_RawFree(fname);
1351 #endif
1352     return err;
1353 }
1354 
1355 
1356 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1357    call stat() otherwise. Only fill st_mode attribute on Windows.
1358 
1359    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1360    raised. */
1361 
1362 int
_Py_stat(PyObject * path,struct stat * statbuf)1363 _Py_stat(PyObject *path, struct stat *statbuf)
1364 {
1365 #ifdef MS_WINDOWS
1366     int err;
1367 
1368     wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1369     if (wpath == NULL)
1370         return -2;
1371 
1372     err = _Py_wstat(wpath, statbuf);
1373     PyMem_Free(wpath);
1374     return err;
1375 #else
1376     int ret;
1377     PyObject *bytes;
1378     char *cpath;
1379 
1380     bytes = PyUnicode_EncodeFSDefault(path);
1381     if (bytes == NULL)
1382         return -2;
1383 
1384     /* check for embedded null bytes */
1385     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1386         Py_DECREF(bytes);
1387         return -2;
1388     }
1389 
1390     ret = stat(cpath, statbuf);
1391     Py_DECREF(bytes);
1392     return ret;
1393 #endif
1394 }
1395 
1396 #ifdef MS_WINDOWS
1397 // For some Windows API partitions, SetHandleInformation() is declared
1398 // but none of the handle flags are defined.
1399 #ifndef HANDLE_FLAG_INHERIT
1400 #define HANDLE_FLAG_INHERIT 0x00000001
1401 #endif
1402 #endif
1403 
1404 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1405 static int
get_inheritable(int fd,int raise)1406 get_inheritable(int fd, int raise)
1407 {
1408 #ifdef MS_WINDOWS
1409     HANDLE handle;
1410     DWORD flags;
1411 
1412     handle = _Py_get_osfhandle_noraise(fd);
1413     if (handle == INVALID_HANDLE_VALUE) {
1414         if (raise)
1415             PyErr_SetFromErrno(PyExc_OSError);
1416         return -1;
1417     }
1418 
1419     if (!GetHandleInformation(handle, &flags)) {
1420         if (raise)
1421             PyErr_SetFromWindowsErr(0);
1422         return -1;
1423     }
1424 
1425     return (flags & HANDLE_FLAG_INHERIT);
1426 #else
1427     int flags;
1428 
1429     flags = fcntl(fd, F_GETFD, 0);
1430     if (flags == -1) {
1431         if (raise)
1432             PyErr_SetFromErrno(PyExc_OSError);
1433         return -1;
1434     }
1435     return !(flags & FD_CLOEXEC);
1436 #endif
1437 }
1438 
1439 /* Get the inheritable flag of the specified file descriptor.
1440    Return 1 if the file descriptor can be inherited, 0 if it cannot,
1441    raise an exception and return -1 on error. */
1442 int
_Py_get_inheritable(int fd)1443 _Py_get_inheritable(int fd)
1444 {
1445     return get_inheritable(fd, 1);
1446 }
1447 
1448 
1449 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1450 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1451 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1452 {
1453 #ifdef MS_WINDOWS
1454     HANDLE handle;
1455     DWORD flags;
1456 #else
1457 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1458     static int ioctl_works = -1;
1459     int request;
1460     int err;
1461 #endif
1462     int flags, new_flags;
1463     int res;
1464 #endif
1465 
1466     /* atomic_flag_works can only be used to make the file descriptor
1467        non-inheritable */
1468     assert(!(atomic_flag_works != NULL && inheritable));
1469 
1470     if (atomic_flag_works != NULL && !inheritable) {
1471         if (*atomic_flag_works == -1) {
1472             int isInheritable = get_inheritable(fd, raise);
1473             if (isInheritable == -1)
1474                 return -1;
1475             *atomic_flag_works = !isInheritable;
1476         }
1477 
1478         if (*atomic_flag_works)
1479             return 0;
1480     }
1481 
1482 #ifdef MS_WINDOWS
1483     handle = _Py_get_osfhandle_noraise(fd);
1484     if (handle == INVALID_HANDLE_VALUE) {
1485         if (raise)
1486             PyErr_SetFromErrno(PyExc_OSError);
1487         return -1;
1488     }
1489 
1490     if (inheritable)
1491         flags = HANDLE_FLAG_INHERIT;
1492     else
1493         flags = 0;
1494 
1495     if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1496         if (raise)
1497             PyErr_SetFromWindowsErr(0);
1498         return -1;
1499     }
1500     return 0;
1501 
1502 #else
1503 
1504 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1505     if (raise != 0 && _Py_atomic_load_int_relaxed(&ioctl_works) != 0) {
1506         /* fast-path: ioctl() only requires one syscall */
1507         /* caveat: raise=0 is an indicator that we must be async-signal-safe
1508          * thus avoid using ioctl() so we skip the fast-path. */
1509         if (inheritable)
1510             request = FIONCLEX;
1511         else
1512             request = FIOCLEX;
1513         err = ioctl(fd, request, NULL);
1514         if (!err) {
1515             if (_Py_atomic_load_int_relaxed(&ioctl_works) == -1) {
1516                 _Py_atomic_store_int_relaxed(&ioctl_works, 1);
1517             }
1518             return 0;
1519         }
1520 
1521 #ifdef O_PATH
1522         if (errno == EBADF) {
1523             // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1524             // on O_PATH file descriptors. Fall through to the fcntl()
1525             // implementation.
1526         }
1527         else
1528 #endif
1529         if (errno != ENOTTY && errno != EACCES) {
1530             if (raise)
1531                 PyErr_SetFromErrno(PyExc_OSError);
1532             return -1;
1533         }
1534         else {
1535             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1536                device". The ioctl is declared but not supported by the kernel.
1537                Remember that ioctl() doesn't work. It is the case on
1538                Illumos-based OS for example.
1539 
1540                Issue #27057: When SELinux policy disallows ioctl it will fail
1541                with EACCES. While FIOCLEX is safe operation it may be
1542                unavailable because ioctl was denied altogether.
1543                This can be the case on Android. */
1544             _Py_atomic_store_int_relaxed(&ioctl_works, 0);
1545         }
1546         /* fallback to fcntl() if ioctl() does not work */
1547     }
1548 #endif
1549 
1550     /* slow-path: fcntl() requires two syscalls */
1551     flags = fcntl(fd, F_GETFD);
1552     if (flags < 0) {
1553         if (raise)
1554             PyErr_SetFromErrno(PyExc_OSError);
1555         return -1;
1556     }
1557 
1558     if (inheritable) {
1559         new_flags = flags & ~FD_CLOEXEC;
1560     }
1561     else {
1562         new_flags = flags | FD_CLOEXEC;
1563     }
1564 
1565     if (new_flags == flags) {
1566         /* FD_CLOEXEC flag already set/cleared: nothing to do */
1567         return 0;
1568     }
1569 
1570     res = fcntl(fd, F_SETFD, new_flags);
1571     if (res < 0) {
1572         if (raise)
1573             PyErr_SetFromErrno(PyExc_OSError);
1574         return -1;
1575     }
1576     return 0;
1577 #endif
1578 }
1579 
1580 /* Make the file descriptor non-inheritable.
1581    Return 0 on success, set errno and return -1 on error. */
1582 static int
make_non_inheritable(int fd)1583 make_non_inheritable(int fd)
1584 {
1585     return set_inheritable(fd, 0, 0, NULL);
1586 }
1587 
1588 /* Set the inheritable flag of the specified file descriptor.
1589    On success: return 0, on error: raise an exception and return -1.
1590 
1591    If atomic_flag_works is not NULL:
1592 
1593     * if *atomic_flag_works==-1, check if the inheritable is set on the file
1594       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1595       set the inheritable flag
1596     * if *atomic_flag_works==1: do nothing
1597     * if *atomic_flag_works==0: set inheritable flag to False
1598 
1599    Set atomic_flag_works to NULL if no atomic flag was used to create the
1600    file descriptor.
1601 
1602    atomic_flag_works can only be used to make a file descriptor
1603    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1604 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1605 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1606 {
1607     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1608 }
1609 
1610 /* Same as _Py_set_inheritable() but on error, set errno and
1611    don't raise an exception.
1612    This function is async-signal-safe. */
1613 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1614 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1615 {
1616     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1617 }
1618 
1619 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1620 _Py_open_impl(const char *pathname, int flags, int gil_held)
1621 {
1622     int fd;
1623     int async_err = 0;
1624 #ifndef MS_WINDOWS
1625     int *atomic_flag_works;
1626 #endif
1627 
1628 #ifdef MS_WINDOWS
1629     flags |= O_NOINHERIT;
1630 #elif defined(O_CLOEXEC)
1631     atomic_flag_works = &_Py_open_cloexec_works;
1632     flags |= O_CLOEXEC;
1633 #else
1634     atomic_flag_works = NULL;
1635 #endif
1636 
1637     if (gil_held) {
1638         PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1639         if (pathname_obj == NULL) {
1640             return -1;
1641         }
1642         if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1643             Py_DECREF(pathname_obj);
1644             return -1;
1645         }
1646 
1647         do {
1648             Py_BEGIN_ALLOW_THREADS
1649             fd = open(pathname, flags);
1650             Py_END_ALLOW_THREADS
1651         } while (fd < 0
1652                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1653         if (async_err) {
1654             Py_DECREF(pathname_obj);
1655             return -1;
1656         }
1657         if (fd < 0) {
1658             PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1659             Py_DECREF(pathname_obj);
1660             return -1;
1661         }
1662         Py_DECREF(pathname_obj);
1663     }
1664     else {
1665         fd = open(pathname, flags);
1666         if (fd < 0)
1667             return -1;
1668     }
1669 
1670 #ifndef MS_WINDOWS
1671     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1672         close(fd);
1673         return -1;
1674     }
1675 #endif
1676 
1677     return fd;
1678 }
1679 
1680 /* Open a file with the specified flags (wrapper to open() function).
1681    Return a file descriptor on success. Raise an exception and return -1 on
1682    error.
1683 
1684    The file descriptor is created non-inheritable.
1685 
1686    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1687    except if the Python signal handler raises an exception.
1688 
1689    Release the GIL to call open(). The caller must hold the GIL. */
1690 int
_Py_open(const char * pathname,int flags)1691 _Py_open(const char *pathname, int flags)
1692 {
1693     /* _Py_open() must be called with the GIL held. */
1694     assert(PyGILState_Check());
1695     return _Py_open_impl(pathname, flags, 1);
1696 }
1697 
1698 /* Open a file with the specified flags (wrapper to open() function).
1699    Return a file descriptor on success. Set errno and return -1 on error.
1700 
1701    The file descriptor is created non-inheritable.
1702 
1703    If interrupted by a signal, fail with EINTR. */
1704 int
_Py_open_noraise(const char * pathname,int flags)1705 _Py_open_noraise(const char *pathname, int flags)
1706 {
1707     return _Py_open_impl(pathname, flags, 0);
1708 }
1709 
1710 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1711    encoding and use fopen() otherwise.
1712 
1713    The file descriptor is created non-inheritable.
1714 
1715    If interrupted by a signal, fail with EINTR. */
1716 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1717 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1718 {
1719     FILE *f;
1720     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1721         return NULL;
1722     }
1723 #ifndef MS_WINDOWS
1724     char *cpath;
1725     char cmode[10];
1726     size_t r;
1727     r = wcstombs(cmode, mode, 10);
1728     if (r == DECODE_ERROR || r >= 10) {
1729         errno = EINVAL;
1730         return NULL;
1731     }
1732     cpath = _Py_EncodeLocaleRaw(path, NULL);
1733     if (cpath == NULL) {
1734         return NULL;
1735     }
1736     f = fopen(cpath, cmode);
1737     PyMem_RawFree(cpath);
1738 #else
1739     f = _wfopen(path, mode);
1740 #endif
1741     if (f == NULL)
1742         return NULL;
1743     if (make_non_inheritable(fileno(f)) < 0) {
1744         fclose(f);
1745         return NULL;
1746     }
1747     return f;
1748 }
1749 
1750 
1751 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1752    encoding and call fopen() otherwise.
1753 
1754    Return the new file object on success. Raise an exception and return NULL
1755    on error.
1756 
1757    The file descriptor is created non-inheritable.
1758 
1759    When interrupted by a signal (open() fails with EINTR), retry the syscall,
1760    except if the Python signal handler raises an exception.
1761 
1762    Release the GIL to call _wfopen() or fopen(). The caller must hold
1763    the GIL. */
1764 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1765 _Py_fopen_obj(PyObject *path, const char *mode)
1766 {
1767     FILE *f;
1768     int async_err = 0;
1769 #ifdef MS_WINDOWS
1770     wchar_t wmode[10];
1771     int usize;
1772 
1773     assert(PyGILState_Check());
1774 
1775     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1776         return NULL;
1777     }
1778     if (!PyUnicode_Check(path)) {
1779         PyErr_Format(PyExc_TypeError,
1780                      "str file path expected under Windows, got %R",
1781                      Py_TYPE(path));
1782         return NULL;
1783     }
1784 
1785     wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1786     if (wpath == NULL)
1787         return NULL;
1788 
1789     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1790                                 wmode, Py_ARRAY_LENGTH(wmode));
1791     if (usize == 0) {
1792         PyErr_SetFromWindowsErr(0);
1793         PyMem_Free(wpath);
1794         return NULL;
1795     }
1796 
1797     do {
1798         Py_BEGIN_ALLOW_THREADS
1799         f = _wfopen(wpath, wmode);
1800         Py_END_ALLOW_THREADS
1801     } while (f == NULL
1802              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1803     int saved_errno = errno;
1804     PyMem_Free(wpath);
1805 #else
1806     PyObject *bytes;
1807     const char *path_bytes;
1808 
1809     assert(PyGILState_Check());
1810 
1811     if (!PyUnicode_FSConverter(path, &bytes))
1812         return NULL;
1813     path_bytes = PyBytes_AS_STRING(bytes);
1814 
1815     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1816         Py_DECREF(bytes);
1817         return NULL;
1818     }
1819 
1820     do {
1821         Py_BEGIN_ALLOW_THREADS
1822         f = fopen(path_bytes, mode);
1823         Py_END_ALLOW_THREADS
1824     } while (f == NULL
1825              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1826     int saved_errno = errno;
1827     Py_DECREF(bytes);
1828 #endif
1829     if (async_err)
1830         return NULL;
1831 
1832     if (f == NULL) {
1833         errno = saved_errno;
1834         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1835         return NULL;
1836     }
1837 
1838     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1839         fclose(f);
1840         return NULL;
1841     }
1842     return f;
1843 }
1844 
1845 /* Read count bytes from fd into buf.
1846 
1847    On success, return the number of read bytes, it can be lower than count.
1848    If the current file offset is at or past the end of file, no bytes are read,
1849    and read() returns zero.
1850 
1851    On error, raise an exception, set errno and return -1.
1852 
1853    When interrupted by a signal (read() fails with EINTR), retry the syscall.
1854    If the Python signal handler raises an exception, the function returns -1
1855    (the syscall is not retried).
1856 
1857    Release the GIL to call read(). The caller must hold the GIL. */
1858 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1859 _Py_read(int fd, void *buf, size_t count)
1860 {
1861     Py_ssize_t n;
1862     int err;
1863     int async_err = 0;
1864 
1865     assert(PyGILState_Check());
1866 
1867     /* _Py_read() must not be called with an exception set, otherwise the
1868      * caller may think that read() was interrupted by a signal and the signal
1869      * handler raised an exception. */
1870     assert(!PyErr_Occurred());
1871 
1872     if (count > _PY_READ_MAX) {
1873         count = _PY_READ_MAX;
1874     }
1875 
1876     _Py_BEGIN_SUPPRESS_IPH
1877     do {
1878         Py_BEGIN_ALLOW_THREADS
1879         errno = 0;
1880 #ifdef MS_WINDOWS
1881         _doserrno = 0;
1882         n = read(fd, buf, (int)count);
1883         // read() on a non-blocking empty pipe fails with EINVAL, which is
1884         // mapped from the Windows error code ERROR_NO_DATA.
1885         if (n < 0 && errno == EINVAL) {
1886             if (_doserrno == ERROR_NO_DATA) {
1887                 errno = EAGAIN;
1888             }
1889         }
1890 #else
1891         n = read(fd, buf, count);
1892 #endif
1893         /* save/restore errno because PyErr_CheckSignals()
1894          * and PyErr_SetFromErrno() can modify it */
1895         err = errno;
1896         Py_END_ALLOW_THREADS
1897     } while (n < 0 && err == EINTR &&
1898             !(async_err = PyErr_CheckSignals()));
1899     _Py_END_SUPPRESS_IPH
1900 
1901     if (async_err) {
1902         /* read() was interrupted by a signal (failed with EINTR)
1903          * and the Python signal handler raised an exception */
1904         errno = err;
1905         assert(errno == EINTR && PyErr_Occurred());
1906         return -1;
1907     }
1908     if (n < 0) {
1909         PyErr_SetFromErrno(PyExc_OSError);
1910         errno = err;
1911         return -1;
1912     }
1913 
1914     return n;
1915 }
1916 
1917 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1918 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1919 {
1920     Py_ssize_t n;
1921     int err;
1922     int async_err = 0;
1923 
1924     _Py_BEGIN_SUPPRESS_IPH
1925 #ifdef MS_WINDOWS
1926     if (count > 32767) {
1927         /* Issue #11395: the Windows console returns an error (12: not
1928            enough space error) on writing into stdout if stdout mode is
1929            binary and the length is greater than 66,000 bytes (or less,
1930            depending on heap usage). */
1931         if (gil_held) {
1932             Py_BEGIN_ALLOW_THREADS
1933             if (isatty(fd)) {
1934                 count = 32767;
1935             }
1936             Py_END_ALLOW_THREADS
1937         } else {
1938             if (isatty(fd)) {
1939                 count = 32767;
1940             }
1941         }
1942     }
1943 
1944 #endif
1945     if (count > _PY_WRITE_MAX) {
1946         count = _PY_WRITE_MAX;
1947     }
1948 
1949     if (gil_held) {
1950         do {
1951             Py_BEGIN_ALLOW_THREADS
1952             errno = 0;
1953 #ifdef MS_WINDOWS
1954             // write() on a non-blocking pipe fails with ENOSPC on Windows if
1955             // the pipe lacks available space for the entire buffer.
1956             int c = (int)count;
1957             do {
1958                 _doserrno = 0;
1959                 n = write(fd, buf, c);
1960                 if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1961                     break;
1962                 }
1963                 errno = EAGAIN;
1964                 c /= 2;
1965             } while (c > 0);
1966 #else
1967             n = write(fd, buf, count);
1968 #endif
1969             /* save/restore errno because PyErr_CheckSignals()
1970              * and PyErr_SetFromErrno() can modify it */
1971             err = errno;
1972             Py_END_ALLOW_THREADS
1973         } while (n < 0 && err == EINTR &&
1974                 !(async_err = PyErr_CheckSignals()));
1975     }
1976     else {
1977         do {
1978             errno = 0;
1979 #ifdef MS_WINDOWS
1980             // write() on a non-blocking pipe fails with ENOSPC on Windows if
1981             // the pipe lacks available space for the entire buffer.
1982             int c = (int)count;
1983             do {
1984                 _doserrno = 0;
1985                 n = write(fd, buf, c);
1986                 if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1987                     break;
1988                 }
1989                 errno = EAGAIN;
1990                 c /= 2;
1991             } while (c > 0);
1992 #else
1993             n = write(fd, buf, count);
1994 #endif
1995             err = errno;
1996         } while (n < 0 && err == EINTR);
1997     }
1998     _Py_END_SUPPRESS_IPH
1999 
2000     if (async_err) {
2001         /* write() was interrupted by a signal (failed with EINTR)
2002            and the Python signal handler raised an exception (if gil_held is
2003            nonzero). */
2004         errno = err;
2005         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
2006         return -1;
2007     }
2008     if (n < 0) {
2009         if (gil_held)
2010             PyErr_SetFromErrno(PyExc_OSError);
2011         errno = err;
2012         return -1;
2013     }
2014 
2015     return n;
2016 }
2017 
2018 /* Write count bytes of buf into fd.
2019 
2020    On success, return the number of written bytes, it can be lower than count
2021    including 0. On error, raise an exception, set errno and return -1.
2022 
2023    When interrupted by a signal (write() fails with EINTR), retry the syscall.
2024    If the Python signal handler raises an exception, the function returns -1
2025    (the syscall is not retried).
2026 
2027    Release the GIL to call write(). The caller must hold the GIL. */
2028 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)2029 _Py_write(int fd, const void *buf, size_t count)
2030 {
2031     assert(PyGILState_Check());
2032 
2033     /* _Py_write() must not be called with an exception set, otherwise the
2034      * caller may think that write() was interrupted by a signal and the signal
2035      * handler raised an exception. */
2036     assert(!PyErr_Occurred());
2037 
2038     return _Py_write_impl(fd, buf, count, 1);
2039 }
2040 
2041 /* Write count bytes of buf into fd.
2042  *
2043  * On success, return the number of written bytes, it can be lower than count
2044  * including 0. On error, set errno and return -1.
2045  *
2046  * When interrupted by a signal (write() fails with EINTR), retry the syscall
2047  * without calling the Python signal handler. */
2048 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)2049 _Py_write_noraise(int fd, const void *buf, size_t count)
2050 {
2051     return _Py_write_impl(fd, buf, count, 0);
2052 }
2053 
2054 #ifdef HAVE_READLINK
2055 
2056 /* Read value of symbolic link. Encode the path to the locale encoding, decode
2057    the result from the locale encoding.
2058 
2059    Return -1 on encoding error, on readlink() error, if the internal buffer is
2060    too short, on decoding error, or if 'buf' is too short. */
2061 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)2062 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
2063 {
2064     char *cpath;
2065     char cbuf[MAXPATHLEN];
2066     size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
2067     wchar_t *wbuf;
2068     Py_ssize_t res;
2069     size_t r1;
2070 
2071     cpath = _Py_EncodeLocaleRaw(path, NULL);
2072     if (cpath == NULL) {
2073         errno = EINVAL;
2074         return -1;
2075     }
2076     res = readlink(cpath, cbuf, cbuf_len);
2077     PyMem_RawFree(cpath);
2078     if (res == -1) {
2079         return -1;
2080     }
2081     if ((size_t)res == cbuf_len) {
2082         errno = EINVAL;
2083         return -1;
2084     }
2085     cbuf[res] = '\0'; /* buf will be null terminated */
2086     wbuf = Py_DecodeLocale(cbuf, &r1);
2087     if (wbuf == NULL) {
2088         errno = EINVAL;
2089         return -1;
2090     }
2091     /* wbuf must have space to store the trailing NUL character */
2092     if (buflen <= r1) {
2093         PyMem_RawFree(wbuf);
2094         errno = EINVAL;
2095         return -1;
2096     }
2097     wcsncpy(buf, wbuf, buflen);
2098     PyMem_RawFree(wbuf);
2099     return (int)r1;
2100 }
2101 #endif
2102 
2103 #ifdef HAVE_REALPATH
2104 
2105 /* Return the canonicalized absolute pathname. Encode path to the locale
2106    encoding, decode the result from the locale encoding.
2107 
2108    Return NULL on encoding error, realpath() error, decoding error
2109    or if 'resolved_path' is too short. */
2110 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)2111 _Py_wrealpath(const wchar_t *path,
2112               wchar_t *resolved_path, size_t resolved_path_len)
2113 {
2114     char *cpath;
2115     char cresolved_path[MAXPATHLEN];
2116     wchar_t *wresolved_path;
2117     char *res;
2118     size_t r;
2119     cpath = _Py_EncodeLocaleRaw(path, NULL);
2120     if (cpath == NULL) {
2121         errno = EINVAL;
2122         return NULL;
2123     }
2124     res = realpath(cpath, cresolved_path);
2125     PyMem_RawFree(cpath);
2126     if (res == NULL)
2127         return NULL;
2128 
2129     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
2130     if (wresolved_path == NULL) {
2131         errno = EINVAL;
2132         return NULL;
2133     }
2134     /* wresolved_path must have space to store the trailing NUL character */
2135     if (resolved_path_len <= r) {
2136         PyMem_RawFree(wresolved_path);
2137         errno = EINVAL;
2138         return NULL;
2139     }
2140     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
2141     PyMem_RawFree(wresolved_path);
2142     return resolved_path;
2143 }
2144 #endif
2145 
2146 
2147 int
_Py_isabs(const wchar_t * path)2148 _Py_isabs(const wchar_t *path)
2149 {
2150 #ifdef MS_WINDOWS
2151     const wchar_t *tail;
2152     HRESULT hr = PathCchSkipRoot(path, &tail);
2153     if (FAILED(hr) || path == tail) {
2154         return 0;
2155     }
2156     if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2157         // Exclude paths with leading SEP
2158         return 0;
2159     }
2160     if (tail == &path[2] && path[1] == L':') {
2161         // Exclude drive-relative paths (e.g. C:filename.ext)
2162         return 0;
2163     }
2164     return 1;
2165 #else
2166     return (path[0] == SEP);
2167 #endif
2168 }
2169 
2170 
2171 /* Get an absolute path.
2172    On error (ex: fail to get the current directory), return -1.
2173    On memory allocation failure, set *abspath_p to NULL and return 0.
2174    On success, return a newly allocated to *abspath_p to and return 0.
2175    The string must be freed by PyMem_RawFree(). */
2176 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)2177 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2178 {
2179     if (path[0] == '\0' || !wcscmp(path, L".")) {
2180         wchar_t cwd[MAXPATHLEN + 1];
2181         cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2182         if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2183             /* unable to get the current directory */
2184             return -1;
2185         }
2186         *abspath_p = _PyMem_RawWcsdup(cwd);
2187         return 0;
2188     }
2189 
2190     if (_Py_isabs(path)) {
2191         *abspath_p = _PyMem_RawWcsdup(path);
2192         return 0;
2193     }
2194 
2195 #ifdef MS_WINDOWS
2196     return _PyOS_getfullpathname(path, abspath_p);
2197 #else
2198     wchar_t cwd[MAXPATHLEN + 1];
2199     cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2200     if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2201         /* unable to get the current directory */
2202         return -1;
2203     }
2204 
2205     size_t cwd_len = wcslen(cwd);
2206     size_t path_len = wcslen(path);
2207     size_t len = cwd_len + 1 + path_len + 1;
2208     if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2209         *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2210     }
2211     else {
2212         *abspath_p = NULL;
2213     }
2214     if (*abspath_p == NULL) {
2215         return 0;
2216     }
2217 
2218     wchar_t *abspath = *abspath_p;
2219     memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2220     abspath += cwd_len;
2221 
2222     *abspath = (wchar_t)SEP;
2223     abspath++;
2224 
2225     memcpy(abspath, path, path_len * sizeof(wchar_t));
2226     abspath += path_len;
2227 
2228     *abspath = 0;
2229     return 0;
2230 #endif
2231 }
2232 
2233 // The Windows Games API family implements the PathCch* APIs in the Xbox OS,
2234 // but does not expose them yet. Load them dynamically until
2235 // 1) they are officially exposed
2236 // 2) we stop supporting older versions of the GDK which do not expose them
2237 #if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
2238 HRESULT
PathCchSkipRoot(const wchar_t * path,const wchar_t ** rootEnd)2239 PathCchSkipRoot(const wchar_t *path, const wchar_t **rootEnd)
2240 {
2241     static int initialized = 0;
2242     typedef HRESULT(__stdcall *PPathCchSkipRoot) (PCWSTR pszPath,
2243                                                   PCWSTR *ppszRootEnd);
2244     static PPathCchSkipRoot _PathCchSkipRoot;
2245 
2246     if (initialized == 0) {
2247         HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2248                                          LOAD_LIBRARY_SEARCH_SYSTEM32);
2249         if (pathapi) {
2250             _PathCchSkipRoot = (PPathCchSkipRoot)GetProcAddress(
2251                 pathapi, "PathCchSkipRoot");
2252         }
2253         else {
2254             _PathCchSkipRoot = NULL;
2255         }
2256         initialized = 1;
2257     }
2258 
2259     if (!_PathCchSkipRoot) {
2260         return E_NOINTERFACE;
2261     }
2262 
2263     return _PathCchSkipRoot(path, rootEnd);
2264 }
2265 
2266 static HRESULT
PathCchCombineEx(wchar_t * buffer,size_t bufsize,const wchar_t * dirname,const wchar_t * relfile,unsigned long flags)2267 PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
2268                  const wchar_t *relfile, unsigned long flags)
2269 {
2270     static int initialized = 0;
2271     typedef HRESULT(__stdcall *PPathCchCombineEx) (PWSTR pszPathOut,
2272                                                    size_t cchPathOut,
2273                                                    PCWSTR pszPathIn,
2274                                                    PCWSTR pszMore,
2275                                                    unsigned long dwFlags);
2276     static PPathCchCombineEx _PathCchCombineEx;
2277 
2278     if (initialized == 0) {
2279         HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2280                                          LOAD_LIBRARY_SEARCH_SYSTEM32);
2281         if (pathapi) {
2282             _PathCchCombineEx = (PPathCchCombineEx)GetProcAddress(
2283                 pathapi, "PathCchCombineEx");
2284         }
2285         else {
2286             _PathCchCombineEx = NULL;
2287         }
2288         initialized = 1;
2289     }
2290 
2291     if (!_PathCchCombineEx) {
2292         return E_NOINTERFACE;
2293     }
2294 
2295     return _PathCchCombineEx(buffer, bufsize, dirname, relfile, flags);
2296 }
2297 
2298 #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
2299 
2300 void
_Py_skiproot(const wchar_t * path,Py_ssize_t size,Py_ssize_t * drvsize,Py_ssize_t * rootsize)2301 _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
2302              Py_ssize_t *rootsize)
2303 {
2304     assert(drvsize);
2305     assert(rootsize);
2306 #ifndef MS_WINDOWS
2307 #define IS_SEP(x) (*(x) == SEP)
2308     *drvsize = 0;
2309     if (!IS_SEP(&path[0])) {
2310         // Relative path, e.g.: 'foo'
2311         *rootsize = 0;
2312     }
2313     else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
2314         // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
2315         *rootsize = 1;
2316     }
2317     else {
2318         // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
2319         // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
2320         *rootsize = 2;
2321     }
2322 #undef IS_SEP
2323 #else
2324     const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2325 #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2326 #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2327 #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2328     if (IS_SEP(&path[0])) {
2329         if (IS_SEP(&path[1])) {
2330             // Device drives, e.g. \\.\device or \\?\device
2331             // UNC drives, e.g. \\server\share or \\?\UNC\server\share
2332             Py_ssize_t idx;
2333             if (path[2] == L'?' && IS_SEP(&path[3]) &&
2334                 (path[4] == L'U' || path[4] == L'u') &&
2335                 (path[5] == L'N' || path[5] == L'n') &&
2336                 (path[6] == L'C' || path[6] == L'c') &&
2337                 IS_SEP(&path[7]))
2338             {
2339                 idx = 8;
2340             }
2341             else {
2342                 idx = 2;
2343             }
2344             while (!SEP_OR_END(&path[idx])) {
2345                 idx++;
2346             }
2347             if (IS_END(&path[idx])) {
2348                 *drvsize = idx;
2349                 *rootsize = 0;
2350             }
2351             else {
2352                 idx++;
2353                 while (!SEP_OR_END(&path[idx])) {
2354                     idx++;
2355                 }
2356                 *drvsize = idx;
2357                 if (IS_END(&path[idx])) {
2358                     *rootsize = 0;
2359                 }
2360                 else {
2361                     *rootsize = 1;
2362                 }
2363             }
2364         }
2365         else {
2366             // Relative path with root, e.g. \Windows
2367             *drvsize = 0;
2368             *rootsize = 1;
2369         }
2370     }
2371     else if (!IS_END(&path[0]) && path[1] == L':') {
2372         *drvsize = 2;
2373         if (IS_SEP(&path[2])) {
2374             // Absolute drive-letter path, e.g. X:\Windows
2375             *rootsize = 1;
2376         }
2377         else {
2378             // Relative path with drive, e.g. X:Windows
2379             *rootsize = 0;
2380         }
2381     }
2382     else {
2383         // Relative path, e.g. Windows
2384         *drvsize = 0;
2385         *rootsize = 0;
2386     }
2387 #undef SEP_OR_END
2388 #undef IS_SEP
2389 #undef IS_END
2390 #endif
2391 }
2392 
2393 // The caller must ensure "buffer" is big enough.
2394 static int
join_relfile(wchar_t * buffer,size_t bufsize,const wchar_t * dirname,const wchar_t * relfile)2395 join_relfile(wchar_t *buffer, size_t bufsize,
2396              const wchar_t *dirname, const wchar_t *relfile)
2397 {
2398 #ifdef MS_WINDOWS
2399     if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2400         PATHCCH_ALLOW_LONG_PATHS))) {
2401         return -1;
2402     }
2403 #else
2404     assert(!_Py_isabs(relfile));
2405     size_t dirlen = wcslen(dirname);
2406     size_t rellen = wcslen(relfile);
2407     size_t maxlen = bufsize - 1;
2408     if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2409         return -1;
2410     }
2411     if (dirlen == 0) {
2412         // We do not add a leading separator.
2413         wcscpy(buffer, relfile);
2414     }
2415     else {
2416         if (dirname != buffer) {
2417             wcscpy(buffer, dirname);
2418         }
2419         size_t relstart = dirlen;
2420         if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2421             buffer[dirlen] = SEP;
2422             relstart += 1;
2423         }
2424         wcscpy(&buffer[relstart], relfile);
2425     }
2426 #endif
2427     return 0;
2428 }
2429 
2430 /* Join the two paths together, like os.path.join().  Return NULL
2431    if memory could not be allocated.  The caller is responsible
2432    for calling PyMem_RawFree() on the result. */
2433 wchar_t *
_Py_join_relfile(const wchar_t * dirname,const wchar_t * relfile)2434 _Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2435 {
2436     assert(dirname != NULL && relfile != NULL);
2437 #ifndef MS_WINDOWS
2438     assert(!_Py_isabs(relfile));
2439 #endif
2440     size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2441     size_t bufsize = maxlen + 1;
2442     wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2443     if (filename == NULL) {
2444         return NULL;
2445     }
2446     assert(wcslen(dirname) < MAXPATHLEN);
2447     assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2448     if (join_relfile(filename, bufsize, dirname, relfile) < 0) {
2449         PyMem_RawFree(filename);
2450         return NULL;
2451     }
2452     return filename;
2453 }
2454 
2455 /* Join the two paths together, like os.path.join().
2456      dirname: the target buffer with the dirname already in place,
2457               including trailing NUL
2458      relfile: this must be a relative path
2459      bufsize: total allocated size of the buffer
2460    Return -1 if anything is wrong with the path lengths. */
2461 int
_Py_add_relfile(wchar_t * dirname,const wchar_t * relfile,size_t bufsize)2462 _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2463 {
2464     assert(dirname != NULL && relfile != NULL);
2465     assert(bufsize > 0);
2466     return join_relfile(dirname, bufsize, dirname, relfile);
2467 }
2468 
2469 
2470 size_t
_Py_find_basename(const wchar_t * filename)2471 _Py_find_basename(const wchar_t *filename)
2472 {
2473     for (size_t i = wcslen(filename); i > 0; --i) {
2474         if (filename[i] == SEP) {
2475             return i + 1;
2476         }
2477     }
2478     return 0;
2479 }
2480 
2481 /* In-place path normalisation. Returns the start of the normalized
2482    path, which will be within the original buffer. Guaranteed to not
2483    make the path longer, and will not fail. 'size' is the length of
2484    the path, if known. If -1, the first null character will be assumed
2485    to be the end of the path. 'normsize' will be set to contain the
2486    length of the resulting normalized path. */
2487 wchar_t *
_Py_normpath_and_size(wchar_t * path,Py_ssize_t size,Py_ssize_t * normsize)2488 _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
2489 {
2490     assert(path != NULL);
2491     if ((size < 0 && !path[0]) || size == 0) {
2492         *normsize = 0;
2493         return path;
2494     }
2495     wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2496     wchar_t *p1 = path;     // sequentially scanned address in the path
2497     wchar_t *p2 = path;     // destination of a scanned character to be ljusted
2498     wchar_t *minP2 = path;  // the beginning of the destination range
2499     wchar_t lastC = L'\0';  // the last ljusted character, p2[-1] in most cases
2500 
2501 #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2502 #ifdef ALTSEP
2503 #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2504 #else
2505 #define IS_SEP(x) (*(x) == SEP)
2506 #endif
2507 #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2508 
2509     Py_ssize_t drvsize, rootsize;
2510     _Py_skiproot(path, size, &drvsize, &rootsize);
2511     if (drvsize || rootsize) {
2512         // Skip past root and update minP2
2513         p1 = &path[drvsize + rootsize];
2514 #ifndef ALTSEP
2515         p2 = p1;
2516 #else
2517         for (; p2 < p1; ++p2) {
2518             if (*p2 == ALTSEP) {
2519                 *p2 = SEP;
2520             }
2521         }
2522 #endif
2523         minP2 = p2 - 1;
2524         lastC = *minP2;
2525 #ifdef MS_WINDOWS
2526         if (lastC != SEP) {
2527             minP2++;
2528         }
2529 #endif
2530     }
2531     if (p1[0] == L'.' && SEP_OR_END(&p1[1])) {
2532         // Skip leading '.\'
2533         lastC = *++p1;
2534 #ifdef ALTSEP
2535         if (lastC == ALTSEP) {
2536             lastC = SEP;
2537         }
2538 #endif
2539         while (IS_SEP(p1)) {
2540             p1++;
2541         }
2542     }
2543 
2544     /* if pEnd is specified, check that. Else, check for null terminator */
2545     for (; !IS_END(p1); ++p1) {
2546         wchar_t c = *p1;
2547 #ifdef ALTSEP
2548         if (c == ALTSEP) {
2549             c = SEP;
2550         }
2551 #endif
2552         if (lastC == SEP) {
2553             if (c == L'.') {
2554                 int sep_at_1 = SEP_OR_END(&p1[1]);
2555                 int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2556                 if (sep_at_2 && p1[1] == L'.') {
2557                     wchar_t *p3 = p2;
2558                     while (p3 != minP2 && *--p3 == SEP) { }
2559                     while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2560                     if (p2 == minP2
2561                         || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2562                     {
2563                         // Previous segment is also ../, so append instead.
2564                         // Relative path does not absorb ../ at minP2 as well.
2565                         *p2++ = L'.';
2566                         *p2++ = L'.';
2567                         lastC = L'.';
2568                     } else if (p3[0] == SEP) {
2569                         // Absolute path, so absorb segment
2570                         p2 = p3 + 1;
2571                     } else {
2572                         p2 = p3;
2573                     }
2574                     p1 += 1;
2575                 } else if (sep_at_1) {
2576                 } else {
2577                     *p2++ = lastC = c;
2578                 }
2579             } else if (c == SEP) {
2580             } else {
2581                 *p2++ = lastC = c;
2582             }
2583         } else {
2584             *p2++ = lastC = c;
2585         }
2586     }
2587     *p2 = L'\0';
2588     if (p2 != minP2) {
2589         while (--p2 != minP2 && *p2 == SEP) {
2590             *p2 = L'\0';
2591         }
2592     } else {
2593         --p2;
2594     }
2595     *normsize = p2 - path + 1;
2596 #undef SEP_OR_END
2597 #undef IS_SEP
2598 #undef IS_END
2599     return path;
2600 }
2601 
2602 /* In-place path normalisation. Returns the start of the normalized
2603    path, which will be within the original buffer. Guaranteed to not
2604    make the path longer, and will not fail. 'size' is the length of
2605    the path, if known. If -1, the first null character will be assumed
2606    to be the end of the path. */
2607 wchar_t *
_Py_normpath(wchar_t * path,Py_ssize_t size)2608 _Py_normpath(wchar_t *path, Py_ssize_t size)
2609 {
2610     Py_ssize_t norm_length;
2611     return _Py_normpath_and_size(path, size, &norm_length);
2612 }
2613 
2614 
2615 /* Get the current directory. buflen is the buffer size in wide characters
2616    including the null character. Decode the path from the locale encoding.
2617 
2618    Return NULL on getcwd() error, on decoding error, or if 'buf' is
2619    too short. */
2620 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2621 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2622 {
2623 #ifdef MS_WINDOWS
2624     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2625     return _wgetcwd(buf, ibuflen);
2626 #else
2627     char fname[MAXPATHLEN];
2628     wchar_t *wname;
2629     size_t len;
2630 
2631     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2632         return NULL;
2633     wname = Py_DecodeLocale(fname, &len);
2634     if (wname == NULL)
2635         return NULL;
2636     /* wname must have space to store the trailing NUL character */
2637     if (buflen <= len) {
2638         PyMem_RawFree(wname);
2639         return NULL;
2640     }
2641     wcsncpy(buf, wname, buflen);
2642     PyMem_RawFree(wname);
2643     return buf;
2644 #endif
2645 }
2646 
2647 /* Duplicate a file descriptor. The new file descriptor is created as
2648    non-inheritable. Return a new file descriptor on success, raise an OSError
2649    exception and return -1 on error.
2650 
2651    The GIL is released to call dup(). The caller must hold the GIL. */
2652 int
_Py_dup(int fd)2653 _Py_dup(int fd)
2654 {
2655 #ifdef MS_WINDOWS
2656     HANDLE handle;
2657 #endif
2658 
2659     assert(PyGILState_Check());
2660 
2661 #ifdef MS_WINDOWS
2662     handle = _Py_get_osfhandle(fd);
2663     if (handle == INVALID_HANDLE_VALUE)
2664         return -1;
2665 
2666     Py_BEGIN_ALLOW_THREADS
2667     _Py_BEGIN_SUPPRESS_IPH
2668     fd = dup(fd);
2669     _Py_END_SUPPRESS_IPH
2670     Py_END_ALLOW_THREADS
2671     if (fd < 0) {
2672         PyErr_SetFromErrno(PyExc_OSError);
2673         return -1;
2674     }
2675 
2676     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2677         _Py_BEGIN_SUPPRESS_IPH
2678         close(fd);
2679         _Py_END_SUPPRESS_IPH
2680         return -1;
2681     }
2682 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2683     Py_BEGIN_ALLOW_THREADS
2684     _Py_BEGIN_SUPPRESS_IPH
2685     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2686     _Py_END_SUPPRESS_IPH
2687     Py_END_ALLOW_THREADS
2688     if (fd < 0) {
2689         PyErr_SetFromErrno(PyExc_OSError);
2690         return -1;
2691     }
2692 
2693 #elif HAVE_DUP
2694     Py_BEGIN_ALLOW_THREADS
2695     _Py_BEGIN_SUPPRESS_IPH
2696     fd = dup(fd);
2697     _Py_END_SUPPRESS_IPH
2698     Py_END_ALLOW_THREADS
2699     if (fd < 0) {
2700         PyErr_SetFromErrno(PyExc_OSError);
2701         return -1;
2702     }
2703 
2704     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2705         _Py_BEGIN_SUPPRESS_IPH
2706         close(fd);
2707         _Py_END_SUPPRESS_IPH
2708         return -1;
2709     }
2710 #else
2711     errno = ENOTSUP;
2712     PyErr_SetFromErrno(PyExc_OSError);
2713     return -1;
2714 #endif
2715     return fd;
2716 }
2717 
2718 #ifndef MS_WINDOWS
2719 /* Get the blocking mode of the file descriptor.
2720    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2721    raise an exception and return -1 on error. */
2722 int
_Py_get_blocking(int fd)2723 _Py_get_blocking(int fd)
2724 {
2725     int flags;
2726     _Py_BEGIN_SUPPRESS_IPH
2727     flags = fcntl(fd, F_GETFL, 0);
2728     _Py_END_SUPPRESS_IPH
2729     if (flags < 0) {
2730         PyErr_SetFromErrno(PyExc_OSError);
2731         return -1;
2732     }
2733 
2734     return !(flags & O_NONBLOCK);
2735 }
2736 
2737 /* Set the blocking mode of the specified file descriptor.
2738 
2739    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2740    otherwise.
2741 
2742    Return 0 on success, raise an exception and return -1 on error. */
2743 int
_Py_set_blocking(int fd,int blocking)2744 _Py_set_blocking(int fd, int blocking)
2745 {
2746 /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2747    Use fcntl() instead. */
2748 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2749     int arg = !blocking;
2750     if (ioctl(fd, FIONBIO, &arg) < 0)
2751         goto error;
2752 #else
2753     int flags, res;
2754 
2755     _Py_BEGIN_SUPPRESS_IPH
2756     flags = fcntl(fd, F_GETFL, 0);
2757     if (flags >= 0) {
2758         if (blocking)
2759             flags = flags & (~O_NONBLOCK);
2760         else
2761             flags = flags | O_NONBLOCK;
2762 
2763         res = fcntl(fd, F_SETFL, flags);
2764     } else {
2765         res = -1;
2766     }
2767     _Py_END_SUPPRESS_IPH
2768 
2769     if (res < 0)
2770         goto error;
2771 #endif
2772     return 0;
2773 
2774 error:
2775     PyErr_SetFromErrno(PyExc_OSError);
2776     return -1;
2777 }
2778 #else   /* MS_WINDOWS */
2779 int
_Py_get_blocking(int fd)2780 _Py_get_blocking(int fd)
2781 {
2782     HANDLE handle;
2783     DWORD mode;
2784     BOOL success;
2785 
2786     handle = _Py_get_osfhandle(fd);
2787     if (handle == INVALID_HANDLE_VALUE) {
2788         return -1;
2789     }
2790 
2791     Py_BEGIN_ALLOW_THREADS
2792     success = GetNamedPipeHandleStateW(handle, &mode,
2793                                        NULL, NULL, NULL, NULL, 0);
2794     Py_END_ALLOW_THREADS
2795 
2796     if (!success) {
2797         PyErr_SetFromWindowsErr(0);
2798         return -1;
2799     }
2800 
2801     return !(mode & PIPE_NOWAIT);
2802 }
2803 
2804 int
_Py_set_blocking(int fd,int blocking)2805 _Py_set_blocking(int fd, int blocking)
2806 {
2807     HANDLE handle;
2808     DWORD mode;
2809     BOOL success;
2810 
2811     handle = _Py_get_osfhandle(fd);
2812     if (handle == INVALID_HANDLE_VALUE) {
2813         return -1;
2814     }
2815 
2816     Py_BEGIN_ALLOW_THREADS
2817     success = GetNamedPipeHandleStateW(handle, &mode,
2818                                        NULL, NULL, NULL, NULL, 0);
2819     if (success) {
2820         if (blocking) {
2821             mode &= ~PIPE_NOWAIT;
2822         }
2823         else {
2824             mode |= PIPE_NOWAIT;
2825         }
2826         success = SetNamedPipeHandleState(handle, &mode, NULL, NULL);
2827     }
2828     Py_END_ALLOW_THREADS
2829 
2830     if (!success) {
2831         PyErr_SetFromWindowsErr(0);
2832         return -1;
2833     }
2834     return 0;
2835 }
2836 
2837 void*
_Py_get_osfhandle_noraise(int fd)2838 _Py_get_osfhandle_noraise(int fd)
2839 {
2840     void *handle;
2841     _Py_BEGIN_SUPPRESS_IPH
2842     handle = (void*)_get_osfhandle(fd);
2843     _Py_END_SUPPRESS_IPH
2844     return handle;
2845 }
2846 
2847 void*
_Py_get_osfhandle(int fd)2848 _Py_get_osfhandle(int fd)
2849 {
2850     void *handle = _Py_get_osfhandle_noraise(fd);
2851     if (handle == INVALID_HANDLE_VALUE)
2852         PyErr_SetFromErrno(PyExc_OSError);
2853 
2854     return handle;
2855 }
2856 
2857 int
_Py_open_osfhandle_noraise(void * handle,int flags)2858 _Py_open_osfhandle_noraise(void *handle, int flags)
2859 {
2860     int fd;
2861     _Py_BEGIN_SUPPRESS_IPH
2862     fd = _open_osfhandle((intptr_t)handle, flags);
2863     _Py_END_SUPPRESS_IPH
2864     return fd;
2865 }
2866 
2867 int
_Py_open_osfhandle(void * handle,int flags)2868 _Py_open_osfhandle(void *handle, int flags)
2869 {
2870     int fd = _Py_open_osfhandle_noraise(handle, flags);
2871     if (fd == -1)
2872         PyErr_SetFromErrno(PyExc_OSError);
2873 
2874     return fd;
2875 }
2876 #endif  /* MS_WINDOWS */
2877 
2878 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2879 _Py_GetLocaleconvNumeric(struct lconv *lc,
2880                          PyObject **decimal_point, PyObject **thousands_sep)
2881 {
2882     assert(decimal_point != NULL);
2883     assert(thousands_sep != NULL);
2884 
2885 #ifndef MS_WINDOWS
2886     int change_locale = 0;
2887     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2888         change_locale = 1;
2889     }
2890     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2891         change_locale = 1;
2892     }
2893 
2894     /* Keep a copy of the LC_CTYPE locale */
2895     char *oldloc = NULL, *loc = NULL;
2896     if (change_locale) {
2897         oldloc = setlocale(LC_CTYPE, NULL);
2898         if (!oldloc) {
2899             PyErr_SetString(PyExc_RuntimeWarning,
2900                             "failed to get LC_CTYPE locale");
2901             return -1;
2902         }
2903 
2904         oldloc = _PyMem_Strdup(oldloc);
2905         if (!oldloc) {
2906             PyErr_NoMemory();
2907             return -1;
2908         }
2909 
2910         loc = setlocale(LC_NUMERIC, NULL);
2911         if (loc != NULL && strcmp(loc, oldloc) == 0) {
2912             loc = NULL;
2913         }
2914 
2915         if (loc != NULL) {
2916             /* Only set the locale temporarily the LC_CTYPE locale
2917                if LC_NUMERIC locale is different than LC_CTYPE locale and
2918                decimal_point and/or thousands_sep are non-ASCII or longer than
2919                1 byte */
2920             setlocale(LC_CTYPE, loc);
2921         }
2922     }
2923 
2924 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2925 #else /* MS_WINDOWS */
2926 /* Use _W_* fields of Windows strcut lconv */
2927 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2928 #endif /* MS_WINDOWS */
2929 
2930     int res = -1;
2931 
2932     *decimal_point = GET_LOCALE_STRING(decimal_point);
2933     if (*decimal_point == NULL) {
2934         goto done;
2935     }
2936 
2937     *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2938     if (*thousands_sep == NULL) {
2939         goto done;
2940     }
2941 
2942     res = 0;
2943 
2944 done:
2945 #ifndef MS_WINDOWS
2946     if (loc != NULL) {
2947         setlocale(LC_CTYPE, oldloc);
2948     }
2949     PyMem_Free(oldloc);
2950 #endif
2951     return res;
2952 
2953 #undef GET_LOCALE_STRING
2954 }
2955 
2956 /* Our selection logic for which function to use is as follows:
2957  * 1. If close_range(2) is available, always prefer that; it's better for
2958  *    contiguous ranges like this than fdwalk(3) which entails iterating over
2959  *    the entire fd space and simply doing nothing for those outside the range.
2960  * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2961  *    closing up to sysconf(_SC_OPEN_MAX).
2962  * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2963  *    as that will be more performant if the range happens to have any chunk of
2964  *    non-opened fd in the middle.
2965  * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2966  */
2967 #ifdef HAVE_CLOSEFROM
2968 #  define USE_CLOSEFROM
2969 #endif /* HAVE_CLOSEFROM */
2970 
2971 #ifdef HAVE_FDWALK
2972 #  define USE_FDWALK
2973 #endif /* HAVE_FDWALK */
2974 
2975 #ifdef USE_FDWALK
2976 static int
_fdwalk_close_func(void * lohi,int fd)2977 _fdwalk_close_func(void *lohi, int fd)
2978 {
2979     int lo = ((int *)lohi)[0];
2980     int hi = ((int *)lohi)[1];
2981 
2982     if (fd >= hi) {
2983         return 1;
2984     }
2985     else if (fd >= lo) {
2986         /* Ignore errors */
2987         (void)close(fd);
2988     }
2989     return 0;
2990 }
2991 #endif /* USE_FDWALK */
2992 
2993 /* Closes all file descriptors in [first, last], ignoring errors. */
2994 void
_Py_closerange(int first,int last)2995 _Py_closerange(int first, int last)
2996 {
2997     first = Py_MAX(first, 0);
2998     _Py_BEGIN_SUPPRESS_IPH
2999 #ifdef HAVE_CLOSE_RANGE
3000     if (close_range(first, last, 0) == 0) {
3001         /* close_range() ignores errors when it closes file descriptors.
3002          * Possible reasons of an error return are lack of kernel support
3003          * or denial of the underlying syscall by a seccomp sandbox on Linux.
3004          * Fallback to other methods in case of any error. */
3005     }
3006     else
3007 #endif /* HAVE_CLOSE_RANGE */
3008 #ifdef USE_CLOSEFROM
3009     if (last >= sysconf(_SC_OPEN_MAX)) {
3010         /* Any errors encountered while closing file descriptors are ignored */
3011         (void)closefrom(first);
3012     }
3013     else
3014 #endif /* USE_CLOSEFROM */
3015 #ifdef USE_FDWALK
3016     {
3017         int lohi[2];
3018         lohi[0] = first;
3019         lohi[1] = last + 1;
3020         fdwalk(_fdwalk_close_func, lohi);
3021     }
3022 #else
3023     {
3024         for (int i = first; i <= last; i++) {
3025             /* Ignore errors */
3026             (void)close(i);
3027         }
3028     }
3029 #endif /* USE_FDWALK */
3030     _Py_END_SUPPRESS_IPH
3031 }
3032 
3033 
3034 #ifndef MS_WINDOWS
3035 // Ticks per second used by clock() and times() functions.
3036 // See os.times() and time.process_time() implementations.
3037 int
_Py_GetTicksPerSecond(long * ticks_per_second)3038 _Py_GetTicksPerSecond(long *ticks_per_second)
3039 {
3040 #if defined(HAVE_SYSCONF) && defined(_SC_CLK_TCK)
3041     long value = sysconf(_SC_CLK_TCK);
3042     if (value < 1) {
3043         return -1;
3044     }
3045     *ticks_per_second = value;
3046 #elif defined(HZ)
3047     assert(HZ >= 1);
3048     *ticks_per_second = HZ;
3049 #else
3050     // Magic fallback value; may be bogus
3051     *ticks_per_second = 60;
3052 #endif
3053     return 0;
3054 }
3055 #endif
3056 
3057 
3058 /* Check if a file descriptor is valid or not.
3059    Return 0 if the file descriptor is invalid, return non-zero otherwise. */
3060 int
_Py_IsValidFD(int fd)3061 _Py_IsValidFD(int fd)
3062 {
3063 /* dup() is faster than fstat(): fstat() can require input/output operations,
3064    whereas dup() doesn't. There is a low risk of EMFILE/ENFILE at Python
3065    startup. Problem: dup() doesn't check if the file descriptor is valid on
3066    some platforms.
3067 
3068    fcntl(fd, F_GETFD) is even faster, because it only checks the process table.
3069    It is preferred over dup() when available, since it cannot fail with the
3070    "too many open files" error (EMFILE).
3071 
3072    bpo-30225: On macOS Tiger, when stdout is redirected to a pipe and the other
3073    side of the pipe is closed, dup(1) succeed, whereas fstat(1, &st) fails with
3074    EBADF. FreeBSD has similar issue (bpo-32849).
3075 
3076    Only use dup() on Linux where dup() is enough to detect invalid FD
3077    (bpo-32849).
3078 */
3079     if (fd < 0) {
3080         return 0;
3081     }
3082 #if defined(F_GETFD) && ( \
3083         defined(__linux__) || \
3084         defined(__APPLE__) || \
3085         (defined(__wasm__) && !defined(__wasi__)))
3086     return fcntl(fd, F_GETFD) >= 0;
3087 #elif defined(__linux__)
3088     int fd2 = dup(fd);
3089     if (fd2 >= 0) {
3090         close(fd2);
3091     }
3092     return (fd2 >= 0);
3093 #elif defined(MS_WINDOWS)
3094     HANDLE hfile;
3095     _Py_BEGIN_SUPPRESS_IPH
3096     hfile = (HANDLE)_get_osfhandle(fd);
3097     _Py_END_SUPPRESS_IPH
3098     return (hfile != INVALID_HANDLE_VALUE
3099             && GetFileType(hfile) != FILE_TYPE_UNKNOWN);
3100 #else
3101     struct stat st;
3102     return (fstat(fd, &st) == 0);
3103 #endif
3104 }
3105