1 #include "Python.h"
2 #include "pycore_fileutils.h" // fileutils definitions
3 #include "pycore_runtime.h" // _PyRuntime
4 #include "osdefs.h" // SEP
5
6 #include <stdlib.h> // mbstowcs()
7 #ifdef HAVE_UNISTD_H
8 # include <unistd.h> // getcwd()
9 #endif
10
11 #ifdef MS_WINDOWS
12 # include <malloc.h>
13 # include <windows.h>
14 # include <winioctl.h> // FILE_DEVICE_* constants
15 # include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION
16 # if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
17 # define PATHCCH_ALLOW_LONG_PATHS 0x01
18 # else
19 # include <pathcch.h> // PathCchCombineEx
20 # endif
21 extern int winerror_to_errno(int);
22 #endif
23
24 #ifdef HAVE_LANGINFO_H
25 # include <langinfo.h> // nl_langinfo(CODESET)
26 #endif
27
28 #ifdef HAVE_SYS_IOCTL_H
29 #include <sys/ioctl.h>
30 #endif
31
32 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
33 # include <iconv.h> // iconv_open()
34 #endif
35
36 #ifdef HAVE_FCNTL_H
37 # include <fcntl.h> // fcntl(F_GETFD)
38 #endif
39
40 #ifdef O_CLOEXEC
41 /* Does open() support the O_CLOEXEC flag? Possible values:
42
43 -1: unknown
44 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
45 1: open() supports O_CLOEXEC flag, close-on-exec is set
46
47 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
48 and os.open(). */
49 int _Py_open_cloexec_works = -1;
50 #endif
51
52 // The value must be the same in unicodeobject.c.
53 #define MAX_UNICODE 0x10ffff
54
55 // mbstowcs() and mbrtowc() errors
56 static const size_t DECODE_ERROR = ((size_t)-1);
57 static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
58
59
60 static int
get_surrogateescape(_Py_error_handler errors,int * surrogateescape)61 get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
62 {
63 switch (errors)
64 {
65 case _Py_ERROR_STRICT:
66 *surrogateescape = 0;
67 return 0;
68 case _Py_ERROR_SURROGATEESCAPE:
69 *surrogateescape = 1;
70 return 0;
71 default:
72 return -1;
73 }
74 }
75
76
77 PyObject *
_Py_device_encoding(int fd)78 _Py_device_encoding(int fd)
79 {
80 int valid;
81 Py_BEGIN_ALLOW_THREADS
82 _Py_BEGIN_SUPPRESS_IPH
83 valid = isatty(fd);
84 _Py_END_SUPPRESS_IPH
85 Py_END_ALLOW_THREADS
86 if (!valid)
87 Py_RETURN_NONE;
88
89 #ifdef MS_WINDOWS
90 #ifdef HAVE_WINDOWS_CONSOLE_IO
91 UINT cp;
92 if (fd == 0)
93 cp = GetConsoleCP();
94 else if (fd == 1 || fd == 2)
95 cp = GetConsoleOutputCP();
96 else
97 cp = 0;
98 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
99 has no console */
100 if (cp == 0) {
101 Py_RETURN_NONE;
102 }
103
104 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
105 #else
106 Py_RETURN_NONE;
107 #endif /* HAVE_WINDOWS_CONSOLE_IO */
108 #else
109 if (_PyRuntime.preconfig.utf8_mode) {
110 _Py_DECLARE_STR(utf_8, "utf-8");
111 return &_Py_STR(utf_8);
112 }
113 return _Py_GetLocaleEncodingObject();
114 #endif
115 }
116
117
118 static int
is_valid_wide_char(wchar_t ch)119 is_valid_wide_char(wchar_t ch)
120 {
121 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
122 /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
123 for non-Unicode locales, which makes values higher than MAX_UNICODE
124 possibly valid. */
125 return 1;
126 #endif
127 if (Py_UNICODE_IS_SURROGATE(ch)) {
128 // Reject lone surrogate characters
129 return 0;
130 }
131 if (ch > MAX_UNICODE) {
132 // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
133 // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
134 // it creates characters outside the [U+0000; U+10ffff] range:
135 // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
136 return 0;
137 }
138 return 1;
139 }
140
141
142 static size_t
_Py_mbstowcs(wchar_t * dest,const char * src,size_t n)143 _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
144 {
145 size_t count = mbstowcs(dest, src, n);
146 if (dest != NULL && count != DECODE_ERROR) {
147 for (size_t i=0; i < count; i++) {
148 wchar_t ch = dest[i];
149 if (!is_valid_wide_char(ch)) {
150 return DECODE_ERROR;
151 }
152 }
153 }
154 return count;
155 }
156
157
158 #ifdef HAVE_MBRTOWC
159 static size_t
_Py_mbrtowc(wchar_t * pwc,const char * str,size_t len,mbstate_t * pmbs)160 _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
161 {
162 assert(pwc != NULL);
163 size_t count = mbrtowc(pwc, str, len, pmbs);
164 if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
165 if (!is_valid_wide_char(*pwc)) {
166 return DECODE_ERROR;
167 }
168 }
169 return count;
170 }
171 #endif
172
173
174 #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
175
176 #define USE_FORCE_ASCII
177
178 extern int _Py_normalize_encoding(const char *, char *, size_t);
179
180 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
181 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
182 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
183 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
184 locale.getpreferredencoding() codec. For example, if command line arguments
185 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
186 UnicodeEncodeError instead of retrieving the original byte string.
187
188 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
189 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
190 one byte in range 0x80-0xff can be decoded from the locale encoding. The
191 workaround is also enabled on error, for example if getting the locale
192 failed.
193
194 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
195 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
196 ASCII encoding in this case.
197
198 Values of force_ascii:
199
200 1: the workaround is used: Py_EncodeLocale() uses
201 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
202 decode_ascii()
203 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
204 Py_DecodeLocale() uses mbstowcs()
205 -1: unknown, need to call check_force_ascii() to get the value
206 */
207 #define force_ascii (_PyRuntime.fileutils.force_ascii)
208
209 static int
check_force_ascii(void)210 check_force_ascii(void)
211 {
212 char *loc = setlocale(LC_CTYPE, NULL);
213 if (loc == NULL) {
214 goto error;
215 }
216 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
217 /* the LC_CTYPE locale is different than C and POSIX */
218 return 0;
219 }
220
221 #if defined(HAVE_LANGINFO_H) && defined(CODESET)
222 const char *codeset = nl_langinfo(CODESET);
223 if (!codeset || codeset[0] == '\0') {
224 /* CODESET is not set or empty */
225 goto error;
226 }
227
228 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
229 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
230 goto error;
231 }
232
233 #ifdef __hpux
234 if (strcmp(encoding, "roman8") == 0) {
235 unsigned char ch;
236 wchar_t wch;
237 size_t res;
238
239 ch = (unsigned char)0xA7;
240 res = _Py_mbstowcs(&wch, (char*)&ch, 1);
241 if (res != DECODE_ERROR && wch == L'\xA7') {
242 /* On HP-UX with C locale or the POSIX locale,
243 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
244 Latin1 encoding in practice. Force ASCII in this case.
245
246 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
247 return 1;
248 }
249 }
250 #else
251 const char* ascii_aliases[] = {
252 "ascii",
253 /* Aliases from Lib/encodings/aliases.py */
254 "646",
255 "ansi_x3.4_1968",
256 "ansi_x3.4_1986",
257 "ansi_x3_4_1968",
258 "cp367",
259 "csascii",
260 "ibm367",
261 "iso646_us",
262 "iso_646.irv_1991",
263 "iso_ir_6",
264 "us",
265 "us_ascii",
266 NULL
267 };
268
269 int is_ascii = 0;
270 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
271 if (strcmp(encoding, *alias) == 0) {
272 is_ascii = 1;
273 break;
274 }
275 }
276 if (!is_ascii) {
277 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
278 return 0;
279 }
280
281 for (unsigned int i=0x80; i<=0xff; i++) {
282 char ch[1];
283 wchar_t wch[1];
284 size_t res;
285
286 unsigned uch = (unsigned char)i;
287 ch[0] = (char)uch;
288 res = _Py_mbstowcs(wch, ch, 1);
289 if (res != DECODE_ERROR) {
290 /* decoding a non-ASCII character from the locale encoding succeed:
291 the locale encoding is not ASCII, force ASCII */
292 return 1;
293 }
294 }
295 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
296 encoding: the locale encoding is really ASCII */
297 #endif /* !defined(__hpux) */
298 return 0;
299 #else
300 /* nl_langinfo(CODESET) is not available: always force ASCII */
301 return 1;
302 #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
303
304 error:
305 /* if an error occurred, force the ASCII encoding */
306 return 1;
307 }
308
309
310 int
_Py_GetForceASCII(void)311 _Py_GetForceASCII(void)
312 {
313 if (force_ascii == -1) {
314 force_ascii = check_force_ascii();
315 }
316 return force_ascii;
317 }
318
319
320 void
_Py_ResetForceASCII(void)321 _Py_ResetForceASCII(void)
322 {
323 force_ascii = -1;
324 }
325
326
327 static int
encode_ascii(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)328 encode_ascii(const wchar_t *text, char **str,
329 size_t *error_pos, const char **reason,
330 int raw_malloc, _Py_error_handler errors)
331 {
332 char *result = NULL, *out;
333 size_t len, i;
334 wchar_t ch;
335
336 int surrogateescape;
337 if (get_surrogateescape(errors, &surrogateescape) < 0) {
338 return -3;
339 }
340
341 len = wcslen(text);
342
343 /* +1 for NULL byte */
344 if (raw_malloc) {
345 result = PyMem_RawMalloc(len + 1);
346 }
347 else {
348 result = PyMem_Malloc(len + 1);
349 }
350 if (result == NULL) {
351 return -1;
352 }
353
354 out = result;
355 for (i=0; i<len; i++) {
356 ch = text[i];
357
358 if (ch <= 0x7f) {
359 /* ASCII character */
360 *out++ = (char)ch;
361 }
362 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
363 /* UTF-8b surrogate */
364 *out++ = (char)(ch - 0xdc00);
365 }
366 else {
367 if (raw_malloc) {
368 PyMem_RawFree(result);
369 }
370 else {
371 PyMem_Free(result);
372 }
373 if (error_pos != NULL) {
374 *error_pos = i;
375 }
376 if (reason) {
377 *reason = "encoding error";
378 }
379 return -2;
380 }
381 }
382 *out = '\0';
383 *str = result;
384 return 0;
385 }
386 #else
387 int
_Py_GetForceASCII(void)388 _Py_GetForceASCII(void)
389 {
390 return 0;
391 }
392
393 void
_Py_ResetForceASCII(void)394 _Py_ResetForceASCII(void)
395 {
396 /* nothing to do */
397 }
398 #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
399
400
401 #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
402 static int
decode_ascii(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)403 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
404 const char **reason, _Py_error_handler errors)
405 {
406 wchar_t *res;
407 unsigned char *in;
408 wchar_t *out;
409 size_t argsize = strlen(arg) + 1;
410
411 int surrogateescape;
412 if (get_surrogateescape(errors, &surrogateescape) < 0) {
413 return -3;
414 }
415
416 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
417 return -1;
418 }
419 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
420 if (!res) {
421 return -1;
422 }
423
424 out = res;
425 for (in = (unsigned char*)arg; *in; in++) {
426 unsigned char ch = *in;
427 if (ch < 128) {
428 *out++ = ch;
429 }
430 else {
431 if (!surrogateescape) {
432 PyMem_RawFree(res);
433 if (wlen) {
434 *wlen = in - (unsigned char*)arg;
435 }
436 if (reason) {
437 *reason = "decoding error";
438 }
439 return -2;
440 }
441 *out++ = 0xdc00 + ch;
442 }
443 }
444 *out = 0;
445
446 if (wlen != NULL) {
447 *wlen = out - res;
448 }
449 *wstr = res;
450 return 0;
451 }
452 #endif /* !HAVE_MBRTOWC */
453
454 static int
decode_current_locale(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,_Py_error_handler errors)455 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
456 const char **reason, _Py_error_handler errors)
457 {
458 wchar_t *res;
459 size_t argsize;
460 size_t count;
461 #ifdef HAVE_MBRTOWC
462 unsigned char *in;
463 wchar_t *out;
464 mbstate_t mbs;
465 #endif
466
467 int surrogateescape;
468 if (get_surrogateescape(errors, &surrogateescape) < 0) {
469 return -3;
470 }
471
472 #ifdef HAVE_BROKEN_MBSTOWCS
473 /* Some platforms have a broken implementation of
474 * mbstowcs which does not count the characters that
475 * would result from conversion. Use an upper bound.
476 */
477 argsize = strlen(arg);
478 #else
479 argsize = _Py_mbstowcs(NULL, arg, 0);
480 #endif
481 if (argsize != DECODE_ERROR) {
482 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
483 return -1;
484 }
485 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
486 if (!res) {
487 return -1;
488 }
489
490 count = _Py_mbstowcs(res, arg, argsize + 1);
491 if (count != DECODE_ERROR) {
492 *wstr = res;
493 if (wlen != NULL) {
494 *wlen = count;
495 }
496 return 0;
497 }
498 PyMem_RawFree(res);
499 }
500
501 /* Conversion failed. Fall back to escaping with surrogateescape. */
502 #ifdef HAVE_MBRTOWC
503 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
504
505 /* Overallocate; as multi-byte characters are in the argument, the
506 actual output could use less memory. */
507 argsize = strlen(arg) + 1;
508 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
509 return -1;
510 }
511 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
512 if (!res) {
513 return -1;
514 }
515
516 in = (unsigned char*)arg;
517 out = res;
518 memset(&mbs, 0, sizeof mbs);
519 while (argsize) {
520 size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
521 if (converted == 0) {
522 /* Reached end of string; null char stored. */
523 break;
524 }
525
526 if (converted == INCOMPLETE_CHARACTER) {
527 /* Incomplete character. This should never happen,
528 since we provide everything that we have -
529 unless there is a bug in the C library, or I
530 misunderstood how mbrtowc works. */
531 goto decode_error;
532 }
533
534 if (converted == DECODE_ERROR) {
535 if (!surrogateescape) {
536 goto decode_error;
537 }
538
539 /* Decoding error. Escape as UTF-8b, and start over in the initial
540 shift state. */
541 *out++ = 0xdc00 + *in++;
542 argsize--;
543 memset(&mbs, 0, sizeof mbs);
544 continue;
545 }
546
547 // _Py_mbrtowc() reject lone surrogate characters
548 assert(!Py_UNICODE_IS_SURROGATE(*out));
549
550 /* successfully converted some bytes */
551 in += converted;
552 argsize -= converted;
553 out++;
554 }
555 if (wlen != NULL) {
556 *wlen = out - res;
557 }
558 *wstr = res;
559 return 0;
560
561 decode_error:
562 PyMem_RawFree(res);
563 if (wlen) {
564 *wlen = in - (unsigned char*)arg;
565 }
566 if (reason) {
567 *reason = "decoding error";
568 }
569 return -2;
570 #else /* HAVE_MBRTOWC */
571 /* Cannot use C locale for escaping; manually escape as if charset
572 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
573 correctly in the locale's charset, which must be an ASCII superset. */
574 return decode_ascii(arg, wstr, wlen, reason, errors);
575 #endif /* HAVE_MBRTOWC */
576 }
577
578
579 /* Decode a byte string from the locale encoding.
580
581 Use the strict error handler if 'surrogateescape' is zero. Use the
582 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
583 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
584 can be decoded as a surrogate character, escape the bytes using the
585 surrogateescape error handler instead of decoding them.
586
587 On success, return 0 and write the newly allocated wide character string into
588 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
589 the number of wide characters excluding the null character into *wlen.
590
591 On memory allocation failure, return -1.
592
593 On decoding error, return -2. If wlen is not NULL, write the start of
594 invalid byte sequence in the input string into *wlen. If reason is not NULL,
595 write the decoding error message into *reason.
596
597 Return -3 if the error handler 'errors' is not supported.
598
599 Use the Py_EncodeLocaleEx() function to encode the character string back to
600 a byte string. */
601 int
_Py_DecodeLocaleEx(const char * arg,wchar_t ** wstr,size_t * wlen,const char ** reason,int current_locale,_Py_error_handler errors)602 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
603 const char **reason,
604 int current_locale, _Py_error_handler errors)
605 {
606 if (current_locale) {
607 #ifdef _Py_FORCE_UTF8_LOCALE
608 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
609 errors);
610 #else
611 return decode_current_locale(arg, wstr, wlen, reason, errors);
612 #endif
613 }
614
615 #ifdef _Py_FORCE_UTF8_FS_ENCODING
616 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
617 errors);
618 #else
619 int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
620 #ifdef MS_WINDOWS
621 use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
622 #endif
623 if (use_utf8) {
624 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
625 errors);
626 }
627
628 #ifdef USE_FORCE_ASCII
629 if (force_ascii == -1) {
630 force_ascii = check_force_ascii();
631 }
632
633 if (force_ascii) {
634 /* force ASCII encoding to workaround mbstowcs() issue */
635 return decode_ascii(arg, wstr, wlen, reason, errors);
636 }
637 #endif
638
639 return decode_current_locale(arg, wstr, wlen, reason, errors);
640 #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
641 }
642
643
644 /* Decode a byte string from the locale encoding with the
645 surrogateescape error handler: undecodable bytes are decoded as characters
646 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
647 character, escape the bytes using the surrogateescape error handler instead
648 of decoding them.
649
650 Return a pointer to a newly allocated wide character string, use
651 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
652 wide characters excluding the null character into *size
653
654 Return NULL on decoding error or memory allocation error. If *size* is not
655 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
656 decoding error.
657
658 Decoding errors should never happen, unless there is a bug in the C
659 library.
660
661 Use the Py_EncodeLocale() function to encode the character string back to a
662 byte string. */
663 wchar_t*
Py_DecodeLocale(const char * arg,size_t * wlen)664 Py_DecodeLocale(const char* arg, size_t *wlen)
665 {
666 wchar_t *wstr;
667 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
668 NULL, 0,
669 _Py_ERROR_SURROGATEESCAPE);
670 if (res != 0) {
671 assert(res != -3);
672 if (wlen != NULL) {
673 *wlen = (size_t)res;
674 }
675 return NULL;
676 }
677 return wstr;
678 }
679
680
681 static int
encode_current_locale(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,_Py_error_handler errors)682 encode_current_locale(const wchar_t *text, char **str,
683 size_t *error_pos, const char **reason,
684 int raw_malloc, _Py_error_handler errors)
685 {
686 const size_t len = wcslen(text);
687 char *result = NULL, *bytes = NULL;
688 size_t i, size, converted;
689 wchar_t c, buf[2];
690
691 int surrogateescape;
692 if (get_surrogateescape(errors, &surrogateescape) < 0) {
693 return -3;
694 }
695
696 /* The function works in two steps:
697 1. compute the length of the output buffer in bytes (size)
698 2. outputs the bytes */
699 size = 0;
700 buf[1] = 0;
701 while (1) {
702 for (i=0; i < len; i++) {
703 c = text[i];
704 if (c >= 0xdc80 && c <= 0xdcff) {
705 if (!surrogateescape) {
706 goto encode_error;
707 }
708 /* UTF-8b surrogate */
709 if (bytes != NULL) {
710 *bytes++ = c - 0xdc00;
711 size--;
712 }
713 else {
714 size++;
715 }
716 continue;
717 }
718 else {
719 buf[0] = c;
720 if (bytes != NULL) {
721 converted = wcstombs(bytes, buf, size);
722 }
723 else {
724 converted = wcstombs(NULL, buf, 0);
725 }
726 if (converted == DECODE_ERROR) {
727 goto encode_error;
728 }
729 if (bytes != NULL) {
730 bytes += converted;
731 size -= converted;
732 }
733 else {
734 size += converted;
735 }
736 }
737 }
738 if (result != NULL) {
739 *bytes = '\0';
740 break;
741 }
742
743 size += 1; /* nul byte at the end */
744 if (raw_malloc) {
745 result = PyMem_RawMalloc(size);
746 }
747 else {
748 result = PyMem_Malloc(size);
749 }
750 if (result == NULL) {
751 return -1;
752 }
753 bytes = result;
754 }
755 *str = result;
756 return 0;
757
758 encode_error:
759 if (raw_malloc) {
760 PyMem_RawFree(result);
761 }
762 else {
763 PyMem_Free(result);
764 }
765 if (error_pos != NULL) {
766 *error_pos = i;
767 }
768 if (reason) {
769 *reason = "encoding error";
770 }
771 return -2;
772 }
773
774
775 /* Encode a string to the locale encoding.
776
777 Parameters:
778
779 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
780 of PyMem_Malloc().
781 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
782 Python filesystem encoding.
783 * errors: error handler like "strict" or "surrogateescape".
784
785 Return value:
786
787 0: success, *str is set to a newly allocated decoded string.
788 -1: memory allocation failure
789 -2: encoding error, set *error_pos and *reason (if set).
790 -3: the error handler 'errors' is not supported.
791 */
792 static int
encode_locale_ex(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int raw_malloc,int current_locale,_Py_error_handler errors)793 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
794 const char **reason,
795 int raw_malloc, int current_locale, _Py_error_handler errors)
796 {
797 if (current_locale) {
798 #ifdef _Py_FORCE_UTF8_LOCALE
799 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
800 raw_malloc, errors);
801 #else
802 return encode_current_locale(text, str, error_pos, reason,
803 raw_malloc, errors);
804 #endif
805 }
806
807 #ifdef _Py_FORCE_UTF8_FS_ENCODING
808 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
809 raw_malloc, errors);
810 #else
811 int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
812 #ifdef MS_WINDOWS
813 use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
814 #endif
815 if (use_utf8) {
816 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
817 raw_malloc, errors);
818 }
819
820 #ifdef USE_FORCE_ASCII
821 if (force_ascii == -1) {
822 force_ascii = check_force_ascii();
823 }
824
825 if (force_ascii) {
826 return encode_ascii(text, str, error_pos, reason,
827 raw_malloc, errors);
828 }
829 #endif
830
831 return encode_current_locale(text, str, error_pos, reason,
832 raw_malloc, errors);
833 #endif /* _Py_FORCE_UTF8_FS_ENCODING */
834 }
835
836 static char*
encode_locale(const wchar_t * text,size_t * error_pos,int raw_malloc,int current_locale)837 encode_locale(const wchar_t *text, size_t *error_pos,
838 int raw_malloc, int current_locale)
839 {
840 char *str;
841 int res = encode_locale_ex(text, &str, error_pos, NULL,
842 raw_malloc, current_locale,
843 _Py_ERROR_SURROGATEESCAPE);
844 if (res != -2 && error_pos) {
845 *error_pos = (size_t)-1;
846 }
847 if (res != 0) {
848 return NULL;
849 }
850 return str;
851 }
852
853 /* Encode a wide character string to the locale encoding with the
854 surrogateescape error handler: surrogate characters in the range
855 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
856
857 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
858 the memory. Return NULL on encoding or memory allocation error.
859
860 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
861 to the index of the invalid character on encoding error.
862
863 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
864 character string. */
865 char*
Py_EncodeLocale(const wchar_t * text,size_t * error_pos)866 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
867 {
868 return encode_locale(text, error_pos, 0, 0);
869 }
870
871
872 /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
873 instead of PyMem_Free(). */
874 char*
_Py_EncodeLocaleRaw(const wchar_t * text,size_t * error_pos)875 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
876 {
877 return encode_locale(text, error_pos, 1, 0);
878 }
879
880
881 int
_Py_EncodeLocaleEx(const wchar_t * text,char ** str,size_t * error_pos,const char ** reason,int current_locale,_Py_error_handler errors)882 _Py_EncodeLocaleEx(const wchar_t *text, char **str,
883 size_t *error_pos, const char **reason,
884 int current_locale, _Py_error_handler errors)
885 {
886 return encode_locale_ex(text, str, error_pos, reason, 1,
887 current_locale, errors);
888 }
889
890
891 // Get the current locale encoding name:
892 //
893 // - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
894 // - Return "utf-8" if the UTF-8 Mode is enabled
895 // - On Windows, return the ANSI code page (ex: "cp1250")
896 // - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
897 // - Otherwise, return nl_langinfo(CODESET).
898 //
899 // Return NULL on memory allocation failure.
900 //
901 // See also config_get_locale_encoding()
902 wchar_t*
_Py_GetLocaleEncoding(void)903 _Py_GetLocaleEncoding(void)
904 {
905 #ifdef _Py_FORCE_UTF8_LOCALE
906 // On Android langinfo.h and CODESET are missing,
907 // and UTF-8 is always used in mbstowcs() and wcstombs().
908 return _PyMem_RawWcsdup(L"utf-8");
909 #else
910
911 #ifdef MS_WINDOWS
912 wchar_t encoding[23];
913 unsigned int ansi_codepage = GetACP();
914 swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
915 encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
916 return _PyMem_RawWcsdup(encoding);
917 #else
918 const char *encoding = nl_langinfo(CODESET);
919 if (!encoding || encoding[0] == '\0') {
920 // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
921 // macOS if the LC_CTYPE locale is not supported.
922 return _PyMem_RawWcsdup(L"utf-8");
923 }
924
925 wchar_t *wstr;
926 int res = decode_current_locale(encoding, &wstr, NULL,
927 NULL, _Py_ERROR_SURROGATEESCAPE);
928 if (res < 0) {
929 return NULL;
930 }
931 return wstr;
932 #endif // !MS_WINDOWS
933
934 #endif // !_Py_FORCE_UTF8_LOCALE
935 }
936
937
938 PyObject *
_Py_GetLocaleEncodingObject(void)939 _Py_GetLocaleEncodingObject(void)
940 {
941 wchar_t *encoding = _Py_GetLocaleEncoding();
942 if (encoding == NULL) {
943 PyErr_NoMemory();
944 return NULL;
945 }
946
947 PyObject *str = PyUnicode_FromWideChar(encoding, -1);
948 PyMem_RawFree(encoding);
949 return str;
950 }
951
952 #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
953
954 /* Check whether current locale uses Unicode as internal wchar_t form. */
955 int
_Py_LocaleUsesNonUnicodeWchar(void)956 _Py_LocaleUsesNonUnicodeWchar(void)
957 {
958 /* Oracle Solaris uses non-Unicode internal wchar_t form for
959 non-Unicode locales and hence needs conversion to UTF first. */
960 char* codeset = nl_langinfo(CODESET);
961 if (!codeset) {
962 return 0;
963 }
964 /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
965 return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
966 }
967
968 static wchar_t *
_Py_ConvertWCharForm(const wchar_t * source,Py_ssize_t size,const char * tocode,const char * fromcode)969 _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
970 const char *tocode, const char *fromcode)
971 {
972 static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
973
974 /* Ensure we won't overflow the size. */
975 if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
976 PyErr_NoMemory();
977 return NULL;
978 }
979
980 /* the string doesn't have to be NULL terminated */
981 wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
982 if (target == NULL) {
983 PyErr_NoMemory();
984 return NULL;
985 }
986
987 iconv_t cd = iconv_open(tocode, fromcode);
988 if (cd == (iconv_t)-1) {
989 PyErr_Format(PyExc_ValueError, "iconv_open() failed");
990 PyMem_Free(target);
991 return NULL;
992 }
993
994 char *inbuf = (char *) source;
995 char *outbuf = (char *) target;
996 size_t inbytesleft = sizeof(wchar_t) * size;
997 size_t outbytesleft = inbytesleft;
998
999 size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
1000 if (ret == DECODE_ERROR) {
1001 PyErr_Format(PyExc_ValueError, "iconv() failed");
1002 PyMem_Free(target);
1003 iconv_close(cd);
1004 return NULL;
1005 }
1006
1007 iconv_close(cd);
1008 return target;
1009 }
1010
1011 /* Convert a wide character string to the UCS-4 encoded string. This
1012 is necessary on systems where internal form of wchar_t are not Unicode
1013 code points (e.g. Oracle Solaris).
1014
1015 Return a pointer to a newly allocated string, use PyMem_Free() to free
1016 the memory. Return NULL and raise exception on conversion or memory
1017 allocation error. */
1018 wchar_t *
_Py_DecodeNonUnicodeWchar(const wchar_t * native,Py_ssize_t size)1019 _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1020 {
1021 return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1022 }
1023
1024 /* Convert a UCS-4 encoded string to native wide character string. This
1025 is necessary on systems where internal form of wchar_t are not Unicode
1026 code points (e.g. Oracle Solaris).
1027
1028 The conversion is done in place. This can be done because both wchar_t
1029 and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1030 to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1031 which is currently the only system using these functions; it doesn't have
1032 to be for other systems).
1033
1034 Return 0 on success. Return -1 and raise exception on conversion
1035 or memory allocation error. */
1036 int
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t * unicode,Py_ssize_t size)1037 _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1038 {
1039 wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1040 if (!result) {
1041 return -1;
1042 }
1043 memcpy(unicode, result, size * sizeof(wchar_t));
1044 PyMem_Free(result);
1045 return 0;
1046 }
1047 #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1048
1049 #ifdef MS_WINDOWS
1050 static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1051
1052 static void
FILE_TIME_to_time_t_nsec(FILETIME * in_ptr,time_t * time_out,int * nsec_out)1053 FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1054 {
1055 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1056 /* Cannot simply cast and dereference in_ptr,
1057 since it might not be aligned properly */
1058 __int64 in;
1059 memcpy(&in, in_ptr, sizeof(in));
1060 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1061 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1062 }
1063
1064 static void
LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER * in_ptr,time_t * time_out,int * nsec_out)1065 LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out)
1066 {
1067 *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1068 *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t);
1069 }
1070
1071 void
_Py_time_t_to_FILE_TIME(time_t time_in,int nsec_in,FILETIME * out_ptr)1072 _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1073 {
1074 /* XXX endianness */
1075 __int64 out;
1076 out = time_in + secs_between_epochs;
1077 out = out * 10000000 + nsec_in / 100;
1078 memcpy(out_ptr, &out, sizeof(out));
1079 }
1080
1081 /* Below, we *know* that ugo+r is 0444 */
1082 #if _S_IREAD != 0400
1083 #error Unsupported C library
1084 #endif
1085 static int
attributes_to_mode(DWORD attr)1086 attributes_to_mode(DWORD attr)
1087 {
1088 int m = 0;
1089 if (attr & FILE_ATTRIBUTE_DIRECTORY)
1090 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1091 else
1092 m |= _S_IFREG;
1093 if (attr & FILE_ATTRIBUTE_READONLY)
1094 m |= 0444;
1095 else
1096 m |= 0666;
1097 return m;
1098 }
1099
1100
1101 typedef union {
1102 FILE_ID_128 id;
1103 struct {
1104 uint64_t st_ino;
1105 uint64_t st_ino_high;
1106 };
1107 } id_128_to_ino;
1108
1109
1110 void
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION * info,ULONG reparse_tag,FILE_BASIC_INFO * basic_info,FILE_ID_INFO * id_info,struct _Py_stat_struct * result)1111 _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1112 FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info,
1113 struct _Py_stat_struct *result)
1114 {
1115 memset(result, 0, sizeof(*result));
1116 result->st_mode = attributes_to_mode(info->dwFileAttributes);
1117 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1118 result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber;
1119 result->st_rdev = 0;
1120 /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */
1121 if (basic_info) {
1122 LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1123 LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1124 LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1125 LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1126 } else {
1127 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1128 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1129 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1130 }
1131 result->st_nlink = info->nNumberOfLinks;
1132
1133 if (id_info) {
1134 id_128_to_ino file_id;
1135 file_id.id = id_info->FileId;
1136 result->st_ino = file_id.st_ino;
1137 result->st_ino_high = file_id.st_ino_high;
1138 }
1139 if (!result->st_ino && !result->st_ino_high) {
1140 /* should only occur for DirEntry_from_find_data, in which case the
1141 index is likely to be zero anyway. */
1142 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1143 }
1144
1145 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1146 open other name surrogate reparse points without traversing them. To
1147 detect/handle these, check st_file_attributes and st_reparse_tag. */
1148 result->st_reparse_tag = reparse_tag;
1149 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1150 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1151 /* set the bits that make this a symlink */
1152 result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1153 }
1154 result->st_file_attributes = info->dwFileAttributes;
1155 }
1156
1157 void
_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION * info,struct _Py_stat_struct * result)1158 _Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info,
1159 struct _Py_stat_struct *result)
1160 {
1161 memset(result, 0, sizeof(*result));
1162 result->st_mode = attributes_to_mode(info->FileAttributes);
1163 result->st_size = info->EndOfFile.QuadPart;
1164 LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1165 LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1166 LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1167 LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1168 result->st_nlink = info->NumberOfLinks;
1169 result->st_dev = info->VolumeSerialNumber.QuadPart;
1170 /* File systems with less than 128-bits zero pad into this field */
1171 id_128_to_ino file_id;
1172 file_id.id = info->FileId128;
1173 result->st_ino = file_id.st_ino;
1174 result->st_ino_high = file_id.st_ino_high;
1175 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1176 open other name surrogate reparse points without traversing them. To
1177 detect/handle these, check st_file_attributes and st_reparse_tag. */
1178 result->st_reparse_tag = info->ReparseTag;
1179 if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1180 info->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1181 /* set the bits that make this a symlink */
1182 result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1183 }
1184 result->st_file_attributes = info->FileAttributes;
1185 switch (info->DeviceType) {
1186 case FILE_DEVICE_DISK:
1187 case FILE_DEVICE_VIRTUAL_DISK:
1188 case FILE_DEVICE_DFS:
1189 case FILE_DEVICE_CD_ROM:
1190 case FILE_DEVICE_CONTROLLER:
1191 case FILE_DEVICE_DATALINK:
1192 break;
1193 case FILE_DEVICE_DISK_FILE_SYSTEM:
1194 case FILE_DEVICE_CD_ROM_FILE_SYSTEM:
1195 case FILE_DEVICE_NETWORK_FILE_SYSTEM:
1196 result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */
1197 break;
1198 case FILE_DEVICE_CONSOLE:
1199 case FILE_DEVICE_NULL:
1200 case FILE_DEVICE_KEYBOARD:
1201 case FILE_DEVICE_MODEM:
1202 case FILE_DEVICE_MOUSE:
1203 case FILE_DEVICE_PARALLEL_PORT:
1204 case FILE_DEVICE_PRINTER:
1205 case FILE_DEVICE_SCREEN:
1206 case FILE_DEVICE_SERIAL_PORT:
1207 case FILE_DEVICE_SOUND:
1208 result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR;
1209 break;
1210 case FILE_DEVICE_NAMED_PIPE:
1211 result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO;
1212 break;
1213 default:
1214 if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
1215 result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR;
1216 }
1217 break;
1218 }
1219 }
1220
1221 #endif
1222
1223 /* Return information about a file.
1224
1225 On POSIX, use fstat().
1226
1227 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1228 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1229 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1230 #23152.
1231
1232 On Windows, set the last Windows error and return nonzero on error. On
1233 POSIX, set errno and return nonzero on error. Fill status and return 0 on
1234 success. */
1235 int
_Py_fstat_noraise(int fd,struct _Py_stat_struct * status)1236 _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1237 {
1238 #ifdef MS_WINDOWS
1239 BY_HANDLE_FILE_INFORMATION info;
1240 FILE_BASIC_INFO basicInfo;
1241 FILE_ID_INFO idInfo;
1242 FILE_ID_INFO *pIdInfo = &idInfo;
1243 HANDLE h;
1244 int type;
1245
1246 h = _Py_get_osfhandle_noraise(fd);
1247
1248 if (h == INVALID_HANDLE_VALUE) {
1249 /* errno is already set by _get_osfhandle, but we also set
1250 the Win32 error for callers who expect that */
1251 SetLastError(ERROR_INVALID_HANDLE);
1252 return -1;
1253 }
1254 memset(status, 0, sizeof(*status));
1255
1256 type = GetFileType(h);
1257 if (type == FILE_TYPE_UNKNOWN) {
1258 DWORD error = GetLastError();
1259 if (error != 0) {
1260 errno = winerror_to_errno(error);
1261 return -1;
1262 }
1263 /* else: valid but unknown file */
1264 }
1265
1266 if (type != FILE_TYPE_DISK) {
1267 if (type == FILE_TYPE_CHAR)
1268 status->st_mode = _S_IFCHR;
1269 else if (type == FILE_TYPE_PIPE)
1270 status->st_mode = _S_IFIFO;
1271 return 0;
1272 }
1273
1274 if (!GetFileInformationByHandle(h, &info) ||
1275 !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo))) {
1276 /* The Win32 error is already set, but we also set errno for
1277 callers who expect it */
1278 errno = winerror_to_errno(GetLastError());
1279 return -1;
1280 }
1281
1282 if (!GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) {
1283 /* Failed to get FileIdInfo, so do not pass it along */
1284 pIdInfo = NULL;
1285 }
1286
1287 _Py_attribute_data_to_stat(&info, 0, &basicInfo, pIdInfo, status);
1288 return 0;
1289 #else
1290 return fstat(fd, status);
1291 #endif
1292 }
1293
1294 /* Return information about a file.
1295
1296 On POSIX, use fstat().
1297
1298 On Windows, use GetFileType() and GetFileInformationByHandle() which support
1299 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1300 than 2 GiB because the file size type is a signed 32-bit integer: see issue
1301 #23152.
1302
1303 Raise an exception and return -1 on error. On Windows, set the last Windows
1304 error on error. On POSIX, set errno on error. Fill status and return 0 on
1305 success.
1306
1307 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1308 to call fstat(). The caller must hold the GIL. */
1309 int
_Py_fstat(int fd,struct _Py_stat_struct * status)1310 _Py_fstat(int fd, struct _Py_stat_struct *status)
1311 {
1312 int res;
1313
1314 assert(PyGILState_Check());
1315
1316 Py_BEGIN_ALLOW_THREADS
1317 res = _Py_fstat_noraise(fd, status);
1318 Py_END_ALLOW_THREADS
1319
1320 if (res != 0) {
1321 #ifdef MS_WINDOWS
1322 PyErr_SetFromWindowsErr(0);
1323 #else
1324 PyErr_SetFromErrno(PyExc_OSError);
1325 #endif
1326 return -1;
1327 }
1328 return 0;
1329 }
1330
1331 /* Like _Py_stat() but with a raw filename. */
1332 int
_Py_wstat(const wchar_t * path,struct stat * buf)1333 _Py_wstat(const wchar_t* path, struct stat *buf)
1334 {
1335 int err;
1336 #ifdef MS_WINDOWS
1337 struct _stat wstatbuf;
1338 err = _wstat(path, &wstatbuf);
1339 if (!err) {
1340 buf->st_mode = wstatbuf.st_mode;
1341 }
1342 #else
1343 char *fname;
1344 fname = _Py_EncodeLocaleRaw(path, NULL);
1345 if (fname == NULL) {
1346 errno = EINVAL;
1347 return -1;
1348 }
1349 err = stat(fname, buf);
1350 PyMem_RawFree(fname);
1351 #endif
1352 return err;
1353 }
1354
1355
1356 /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1357 call stat() otherwise. Only fill st_mode attribute on Windows.
1358
1359 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1360 raised. */
1361
1362 int
_Py_stat(PyObject * path,struct stat * statbuf)1363 _Py_stat(PyObject *path, struct stat *statbuf)
1364 {
1365 #ifdef MS_WINDOWS
1366 int err;
1367
1368 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1369 if (wpath == NULL)
1370 return -2;
1371
1372 err = _Py_wstat(wpath, statbuf);
1373 PyMem_Free(wpath);
1374 return err;
1375 #else
1376 int ret;
1377 PyObject *bytes;
1378 char *cpath;
1379
1380 bytes = PyUnicode_EncodeFSDefault(path);
1381 if (bytes == NULL)
1382 return -2;
1383
1384 /* check for embedded null bytes */
1385 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1386 Py_DECREF(bytes);
1387 return -2;
1388 }
1389
1390 ret = stat(cpath, statbuf);
1391 Py_DECREF(bytes);
1392 return ret;
1393 #endif
1394 }
1395
1396 #ifdef MS_WINDOWS
1397 // For some Windows API partitions, SetHandleInformation() is declared
1398 // but none of the handle flags are defined.
1399 #ifndef HANDLE_FLAG_INHERIT
1400 #define HANDLE_FLAG_INHERIT 0x00000001
1401 #endif
1402 #endif
1403
1404 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1405 static int
get_inheritable(int fd,int raise)1406 get_inheritable(int fd, int raise)
1407 {
1408 #ifdef MS_WINDOWS
1409 HANDLE handle;
1410 DWORD flags;
1411
1412 handle = _Py_get_osfhandle_noraise(fd);
1413 if (handle == INVALID_HANDLE_VALUE) {
1414 if (raise)
1415 PyErr_SetFromErrno(PyExc_OSError);
1416 return -1;
1417 }
1418
1419 if (!GetHandleInformation(handle, &flags)) {
1420 if (raise)
1421 PyErr_SetFromWindowsErr(0);
1422 return -1;
1423 }
1424
1425 return (flags & HANDLE_FLAG_INHERIT);
1426 #else
1427 int flags;
1428
1429 flags = fcntl(fd, F_GETFD, 0);
1430 if (flags == -1) {
1431 if (raise)
1432 PyErr_SetFromErrno(PyExc_OSError);
1433 return -1;
1434 }
1435 return !(flags & FD_CLOEXEC);
1436 #endif
1437 }
1438
1439 /* Get the inheritable flag of the specified file descriptor.
1440 Return 1 if the file descriptor can be inherited, 0 if it cannot,
1441 raise an exception and return -1 on error. */
1442 int
_Py_get_inheritable(int fd)1443 _Py_get_inheritable(int fd)
1444 {
1445 return get_inheritable(fd, 1);
1446 }
1447
1448
1449 /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1450 static int
set_inheritable(int fd,int inheritable,int raise,int * atomic_flag_works)1451 set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1452 {
1453 #ifdef MS_WINDOWS
1454 HANDLE handle;
1455 DWORD flags;
1456 #else
1457 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1458 static int ioctl_works = -1;
1459 int request;
1460 int err;
1461 #endif
1462 int flags, new_flags;
1463 int res;
1464 #endif
1465
1466 /* atomic_flag_works can only be used to make the file descriptor
1467 non-inheritable */
1468 assert(!(atomic_flag_works != NULL && inheritable));
1469
1470 if (atomic_flag_works != NULL && !inheritable) {
1471 if (*atomic_flag_works == -1) {
1472 int isInheritable = get_inheritable(fd, raise);
1473 if (isInheritable == -1)
1474 return -1;
1475 *atomic_flag_works = !isInheritable;
1476 }
1477
1478 if (*atomic_flag_works)
1479 return 0;
1480 }
1481
1482 #ifdef MS_WINDOWS
1483 handle = _Py_get_osfhandle_noraise(fd);
1484 if (handle == INVALID_HANDLE_VALUE) {
1485 if (raise)
1486 PyErr_SetFromErrno(PyExc_OSError);
1487 return -1;
1488 }
1489
1490 if (inheritable)
1491 flags = HANDLE_FLAG_INHERIT;
1492 else
1493 flags = 0;
1494
1495 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1496 if (raise)
1497 PyErr_SetFromWindowsErr(0);
1498 return -1;
1499 }
1500 return 0;
1501
1502 #else
1503
1504 #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1505 if (raise != 0 && _Py_atomic_load_int_relaxed(&ioctl_works) != 0) {
1506 /* fast-path: ioctl() only requires one syscall */
1507 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1508 * thus avoid using ioctl() so we skip the fast-path. */
1509 if (inheritable)
1510 request = FIONCLEX;
1511 else
1512 request = FIOCLEX;
1513 err = ioctl(fd, request, NULL);
1514 if (!err) {
1515 if (_Py_atomic_load_int_relaxed(&ioctl_works) == -1) {
1516 _Py_atomic_store_int_relaxed(&ioctl_works, 1);
1517 }
1518 return 0;
1519 }
1520
1521 #ifdef O_PATH
1522 if (errno == EBADF) {
1523 // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1524 // on O_PATH file descriptors. Fall through to the fcntl()
1525 // implementation.
1526 }
1527 else
1528 #endif
1529 if (errno != ENOTTY && errno != EACCES) {
1530 if (raise)
1531 PyErr_SetFromErrno(PyExc_OSError);
1532 return -1;
1533 }
1534 else {
1535 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1536 device". The ioctl is declared but not supported by the kernel.
1537 Remember that ioctl() doesn't work. It is the case on
1538 Illumos-based OS for example.
1539
1540 Issue #27057: When SELinux policy disallows ioctl it will fail
1541 with EACCES. While FIOCLEX is safe operation it may be
1542 unavailable because ioctl was denied altogether.
1543 This can be the case on Android. */
1544 _Py_atomic_store_int_relaxed(&ioctl_works, 0);
1545 }
1546 /* fallback to fcntl() if ioctl() does not work */
1547 }
1548 #endif
1549
1550 /* slow-path: fcntl() requires two syscalls */
1551 flags = fcntl(fd, F_GETFD);
1552 if (flags < 0) {
1553 if (raise)
1554 PyErr_SetFromErrno(PyExc_OSError);
1555 return -1;
1556 }
1557
1558 if (inheritable) {
1559 new_flags = flags & ~FD_CLOEXEC;
1560 }
1561 else {
1562 new_flags = flags | FD_CLOEXEC;
1563 }
1564
1565 if (new_flags == flags) {
1566 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1567 return 0;
1568 }
1569
1570 res = fcntl(fd, F_SETFD, new_flags);
1571 if (res < 0) {
1572 if (raise)
1573 PyErr_SetFromErrno(PyExc_OSError);
1574 return -1;
1575 }
1576 return 0;
1577 #endif
1578 }
1579
1580 /* Make the file descriptor non-inheritable.
1581 Return 0 on success, set errno and return -1 on error. */
1582 static int
make_non_inheritable(int fd)1583 make_non_inheritable(int fd)
1584 {
1585 return set_inheritable(fd, 0, 0, NULL);
1586 }
1587
1588 /* Set the inheritable flag of the specified file descriptor.
1589 On success: return 0, on error: raise an exception and return -1.
1590
1591 If atomic_flag_works is not NULL:
1592
1593 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1594 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1595 set the inheritable flag
1596 * if *atomic_flag_works==1: do nothing
1597 * if *atomic_flag_works==0: set inheritable flag to False
1598
1599 Set atomic_flag_works to NULL if no atomic flag was used to create the
1600 file descriptor.
1601
1602 atomic_flag_works can only be used to make a file descriptor
1603 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1604 int
_Py_set_inheritable(int fd,int inheritable,int * atomic_flag_works)1605 _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1606 {
1607 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1608 }
1609
1610 /* Same as _Py_set_inheritable() but on error, set errno and
1611 don't raise an exception.
1612 This function is async-signal-safe. */
1613 int
_Py_set_inheritable_async_safe(int fd,int inheritable,int * atomic_flag_works)1614 _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1615 {
1616 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1617 }
1618
1619 static int
_Py_open_impl(const char * pathname,int flags,int gil_held)1620 _Py_open_impl(const char *pathname, int flags, int gil_held)
1621 {
1622 int fd;
1623 int async_err = 0;
1624 #ifndef MS_WINDOWS
1625 int *atomic_flag_works;
1626 #endif
1627
1628 #ifdef MS_WINDOWS
1629 flags |= O_NOINHERIT;
1630 #elif defined(O_CLOEXEC)
1631 atomic_flag_works = &_Py_open_cloexec_works;
1632 flags |= O_CLOEXEC;
1633 #else
1634 atomic_flag_works = NULL;
1635 #endif
1636
1637 if (gil_held) {
1638 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1639 if (pathname_obj == NULL) {
1640 return -1;
1641 }
1642 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1643 Py_DECREF(pathname_obj);
1644 return -1;
1645 }
1646
1647 do {
1648 Py_BEGIN_ALLOW_THREADS
1649 fd = open(pathname, flags);
1650 Py_END_ALLOW_THREADS
1651 } while (fd < 0
1652 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1653 if (async_err) {
1654 Py_DECREF(pathname_obj);
1655 return -1;
1656 }
1657 if (fd < 0) {
1658 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1659 Py_DECREF(pathname_obj);
1660 return -1;
1661 }
1662 Py_DECREF(pathname_obj);
1663 }
1664 else {
1665 fd = open(pathname, flags);
1666 if (fd < 0)
1667 return -1;
1668 }
1669
1670 #ifndef MS_WINDOWS
1671 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1672 close(fd);
1673 return -1;
1674 }
1675 #endif
1676
1677 return fd;
1678 }
1679
1680 /* Open a file with the specified flags (wrapper to open() function).
1681 Return a file descriptor on success. Raise an exception and return -1 on
1682 error.
1683
1684 The file descriptor is created non-inheritable.
1685
1686 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1687 except if the Python signal handler raises an exception.
1688
1689 Release the GIL to call open(). The caller must hold the GIL. */
1690 int
_Py_open(const char * pathname,int flags)1691 _Py_open(const char *pathname, int flags)
1692 {
1693 /* _Py_open() must be called with the GIL held. */
1694 assert(PyGILState_Check());
1695 return _Py_open_impl(pathname, flags, 1);
1696 }
1697
1698 /* Open a file with the specified flags (wrapper to open() function).
1699 Return a file descriptor on success. Set errno and return -1 on error.
1700
1701 The file descriptor is created non-inheritable.
1702
1703 If interrupted by a signal, fail with EINTR. */
1704 int
_Py_open_noraise(const char * pathname,int flags)1705 _Py_open_noraise(const char *pathname, int flags)
1706 {
1707 return _Py_open_impl(pathname, flags, 0);
1708 }
1709
1710 /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1711 encoding and use fopen() otherwise.
1712
1713 The file descriptor is created non-inheritable.
1714
1715 If interrupted by a signal, fail with EINTR. */
1716 FILE *
_Py_wfopen(const wchar_t * path,const wchar_t * mode)1717 _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1718 {
1719 FILE *f;
1720 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1721 return NULL;
1722 }
1723 #ifndef MS_WINDOWS
1724 char *cpath;
1725 char cmode[10];
1726 size_t r;
1727 r = wcstombs(cmode, mode, 10);
1728 if (r == DECODE_ERROR || r >= 10) {
1729 errno = EINVAL;
1730 return NULL;
1731 }
1732 cpath = _Py_EncodeLocaleRaw(path, NULL);
1733 if (cpath == NULL) {
1734 return NULL;
1735 }
1736 f = fopen(cpath, cmode);
1737 PyMem_RawFree(cpath);
1738 #else
1739 f = _wfopen(path, mode);
1740 #endif
1741 if (f == NULL)
1742 return NULL;
1743 if (make_non_inheritable(fileno(f)) < 0) {
1744 fclose(f);
1745 return NULL;
1746 }
1747 return f;
1748 }
1749
1750
1751 /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1752 encoding and call fopen() otherwise.
1753
1754 Return the new file object on success. Raise an exception and return NULL
1755 on error.
1756
1757 The file descriptor is created non-inheritable.
1758
1759 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1760 except if the Python signal handler raises an exception.
1761
1762 Release the GIL to call _wfopen() or fopen(). The caller must hold
1763 the GIL. */
1764 FILE*
_Py_fopen_obj(PyObject * path,const char * mode)1765 _Py_fopen_obj(PyObject *path, const char *mode)
1766 {
1767 FILE *f;
1768 int async_err = 0;
1769 #ifdef MS_WINDOWS
1770 wchar_t wmode[10];
1771 int usize;
1772
1773 assert(PyGILState_Check());
1774
1775 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1776 return NULL;
1777 }
1778 if (!PyUnicode_Check(path)) {
1779 PyErr_Format(PyExc_TypeError,
1780 "str file path expected under Windows, got %R",
1781 Py_TYPE(path));
1782 return NULL;
1783 }
1784
1785 wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1786 if (wpath == NULL)
1787 return NULL;
1788
1789 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1790 wmode, Py_ARRAY_LENGTH(wmode));
1791 if (usize == 0) {
1792 PyErr_SetFromWindowsErr(0);
1793 PyMem_Free(wpath);
1794 return NULL;
1795 }
1796
1797 do {
1798 Py_BEGIN_ALLOW_THREADS
1799 f = _wfopen(wpath, wmode);
1800 Py_END_ALLOW_THREADS
1801 } while (f == NULL
1802 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1803 int saved_errno = errno;
1804 PyMem_Free(wpath);
1805 #else
1806 PyObject *bytes;
1807 const char *path_bytes;
1808
1809 assert(PyGILState_Check());
1810
1811 if (!PyUnicode_FSConverter(path, &bytes))
1812 return NULL;
1813 path_bytes = PyBytes_AS_STRING(bytes);
1814
1815 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1816 Py_DECREF(bytes);
1817 return NULL;
1818 }
1819
1820 do {
1821 Py_BEGIN_ALLOW_THREADS
1822 f = fopen(path_bytes, mode);
1823 Py_END_ALLOW_THREADS
1824 } while (f == NULL
1825 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1826 int saved_errno = errno;
1827 Py_DECREF(bytes);
1828 #endif
1829 if (async_err)
1830 return NULL;
1831
1832 if (f == NULL) {
1833 errno = saved_errno;
1834 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1835 return NULL;
1836 }
1837
1838 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1839 fclose(f);
1840 return NULL;
1841 }
1842 return f;
1843 }
1844
1845 /* Read count bytes from fd into buf.
1846
1847 On success, return the number of read bytes, it can be lower than count.
1848 If the current file offset is at or past the end of file, no bytes are read,
1849 and read() returns zero.
1850
1851 On error, raise an exception, set errno and return -1.
1852
1853 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1854 If the Python signal handler raises an exception, the function returns -1
1855 (the syscall is not retried).
1856
1857 Release the GIL to call read(). The caller must hold the GIL. */
1858 Py_ssize_t
_Py_read(int fd,void * buf,size_t count)1859 _Py_read(int fd, void *buf, size_t count)
1860 {
1861 Py_ssize_t n;
1862 int err;
1863 int async_err = 0;
1864
1865 assert(PyGILState_Check());
1866
1867 /* _Py_read() must not be called with an exception set, otherwise the
1868 * caller may think that read() was interrupted by a signal and the signal
1869 * handler raised an exception. */
1870 assert(!PyErr_Occurred());
1871
1872 if (count > _PY_READ_MAX) {
1873 count = _PY_READ_MAX;
1874 }
1875
1876 _Py_BEGIN_SUPPRESS_IPH
1877 do {
1878 Py_BEGIN_ALLOW_THREADS
1879 errno = 0;
1880 #ifdef MS_WINDOWS
1881 _doserrno = 0;
1882 n = read(fd, buf, (int)count);
1883 // read() on a non-blocking empty pipe fails with EINVAL, which is
1884 // mapped from the Windows error code ERROR_NO_DATA.
1885 if (n < 0 && errno == EINVAL) {
1886 if (_doserrno == ERROR_NO_DATA) {
1887 errno = EAGAIN;
1888 }
1889 }
1890 #else
1891 n = read(fd, buf, count);
1892 #endif
1893 /* save/restore errno because PyErr_CheckSignals()
1894 * and PyErr_SetFromErrno() can modify it */
1895 err = errno;
1896 Py_END_ALLOW_THREADS
1897 } while (n < 0 && err == EINTR &&
1898 !(async_err = PyErr_CheckSignals()));
1899 _Py_END_SUPPRESS_IPH
1900
1901 if (async_err) {
1902 /* read() was interrupted by a signal (failed with EINTR)
1903 * and the Python signal handler raised an exception */
1904 errno = err;
1905 assert(errno == EINTR && PyErr_Occurred());
1906 return -1;
1907 }
1908 if (n < 0) {
1909 PyErr_SetFromErrno(PyExc_OSError);
1910 errno = err;
1911 return -1;
1912 }
1913
1914 return n;
1915 }
1916
1917 static Py_ssize_t
_Py_write_impl(int fd,const void * buf,size_t count,int gil_held)1918 _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1919 {
1920 Py_ssize_t n;
1921 int err;
1922 int async_err = 0;
1923
1924 _Py_BEGIN_SUPPRESS_IPH
1925 #ifdef MS_WINDOWS
1926 if (count > 32767) {
1927 /* Issue #11395: the Windows console returns an error (12: not
1928 enough space error) on writing into stdout if stdout mode is
1929 binary and the length is greater than 66,000 bytes (or less,
1930 depending on heap usage). */
1931 if (gil_held) {
1932 Py_BEGIN_ALLOW_THREADS
1933 if (isatty(fd)) {
1934 count = 32767;
1935 }
1936 Py_END_ALLOW_THREADS
1937 } else {
1938 if (isatty(fd)) {
1939 count = 32767;
1940 }
1941 }
1942 }
1943
1944 #endif
1945 if (count > _PY_WRITE_MAX) {
1946 count = _PY_WRITE_MAX;
1947 }
1948
1949 if (gil_held) {
1950 do {
1951 Py_BEGIN_ALLOW_THREADS
1952 errno = 0;
1953 #ifdef MS_WINDOWS
1954 // write() on a non-blocking pipe fails with ENOSPC on Windows if
1955 // the pipe lacks available space for the entire buffer.
1956 int c = (int)count;
1957 do {
1958 _doserrno = 0;
1959 n = write(fd, buf, c);
1960 if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1961 break;
1962 }
1963 errno = EAGAIN;
1964 c /= 2;
1965 } while (c > 0);
1966 #else
1967 n = write(fd, buf, count);
1968 #endif
1969 /* save/restore errno because PyErr_CheckSignals()
1970 * and PyErr_SetFromErrno() can modify it */
1971 err = errno;
1972 Py_END_ALLOW_THREADS
1973 } while (n < 0 && err == EINTR &&
1974 !(async_err = PyErr_CheckSignals()));
1975 }
1976 else {
1977 do {
1978 errno = 0;
1979 #ifdef MS_WINDOWS
1980 // write() on a non-blocking pipe fails with ENOSPC on Windows if
1981 // the pipe lacks available space for the entire buffer.
1982 int c = (int)count;
1983 do {
1984 _doserrno = 0;
1985 n = write(fd, buf, c);
1986 if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1987 break;
1988 }
1989 errno = EAGAIN;
1990 c /= 2;
1991 } while (c > 0);
1992 #else
1993 n = write(fd, buf, count);
1994 #endif
1995 err = errno;
1996 } while (n < 0 && err == EINTR);
1997 }
1998 _Py_END_SUPPRESS_IPH
1999
2000 if (async_err) {
2001 /* write() was interrupted by a signal (failed with EINTR)
2002 and the Python signal handler raised an exception (if gil_held is
2003 nonzero). */
2004 errno = err;
2005 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
2006 return -1;
2007 }
2008 if (n < 0) {
2009 if (gil_held)
2010 PyErr_SetFromErrno(PyExc_OSError);
2011 errno = err;
2012 return -1;
2013 }
2014
2015 return n;
2016 }
2017
2018 /* Write count bytes of buf into fd.
2019
2020 On success, return the number of written bytes, it can be lower than count
2021 including 0. On error, raise an exception, set errno and return -1.
2022
2023 When interrupted by a signal (write() fails with EINTR), retry the syscall.
2024 If the Python signal handler raises an exception, the function returns -1
2025 (the syscall is not retried).
2026
2027 Release the GIL to call write(). The caller must hold the GIL. */
2028 Py_ssize_t
_Py_write(int fd,const void * buf,size_t count)2029 _Py_write(int fd, const void *buf, size_t count)
2030 {
2031 assert(PyGILState_Check());
2032
2033 /* _Py_write() must not be called with an exception set, otherwise the
2034 * caller may think that write() was interrupted by a signal and the signal
2035 * handler raised an exception. */
2036 assert(!PyErr_Occurred());
2037
2038 return _Py_write_impl(fd, buf, count, 1);
2039 }
2040
2041 /* Write count bytes of buf into fd.
2042 *
2043 * On success, return the number of written bytes, it can be lower than count
2044 * including 0. On error, set errno and return -1.
2045 *
2046 * When interrupted by a signal (write() fails with EINTR), retry the syscall
2047 * without calling the Python signal handler. */
2048 Py_ssize_t
_Py_write_noraise(int fd,const void * buf,size_t count)2049 _Py_write_noraise(int fd, const void *buf, size_t count)
2050 {
2051 return _Py_write_impl(fd, buf, count, 0);
2052 }
2053
2054 #ifdef HAVE_READLINK
2055
2056 /* Read value of symbolic link. Encode the path to the locale encoding, decode
2057 the result from the locale encoding.
2058
2059 Return -1 on encoding error, on readlink() error, if the internal buffer is
2060 too short, on decoding error, or if 'buf' is too short. */
2061 int
_Py_wreadlink(const wchar_t * path,wchar_t * buf,size_t buflen)2062 _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
2063 {
2064 char *cpath;
2065 char cbuf[MAXPATHLEN];
2066 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
2067 wchar_t *wbuf;
2068 Py_ssize_t res;
2069 size_t r1;
2070
2071 cpath = _Py_EncodeLocaleRaw(path, NULL);
2072 if (cpath == NULL) {
2073 errno = EINVAL;
2074 return -1;
2075 }
2076 res = readlink(cpath, cbuf, cbuf_len);
2077 PyMem_RawFree(cpath);
2078 if (res == -1) {
2079 return -1;
2080 }
2081 if ((size_t)res == cbuf_len) {
2082 errno = EINVAL;
2083 return -1;
2084 }
2085 cbuf[res] = '\0'; /* buf will be null terminated */
2086 wbuf = Py_DecodeLocale(cbuf, &r1);
2087 if (wbuf == NULL) {
2088 errno = EINVAL;
2089 return -1;
2090 }
2091 /* wbuf must have space to store the trailing NUL character */
2092 if (buflen <= r1) {
2093 PyMem_RawFree(wbuf);
2094 errno = EINVAL;
2095 return -1;
2096 }
2097 wcsncpy(buf, wbuf, buflen);
2098 PyMem_RawFree(wbuf);
2099 return (int)r1;
2100 }
2101 #endif
2102
2103 #ifdef HAVE_REALPATH
2104
2105 /* Return the canonicalized absolute pathname. Encode path to the locale
2106 encoding, decode the result from the locale encoding.
2107
2108 Return NULL on encoding error, realpath() error, decoding error
2109 or if 'resolved_path' is too short. */
2110 wchar_t*
_Py_wrealpath(const wchar_t * path,wchar_t * resolved_path,size_t resolved_path_len)2111 _Py_wrealpath(const wchar_t *path,
2112 wchar_t *resolved_path, size_t resolved_path_len)
2113 {
2114 char *cpath;
2115 char cresolved_path[MAXPATHLEN];
2116 wchar_t *wresolved_path;
2117 char *res;
2118 size_t r;
2119 cpath = _Py_EncodeLocaleRaw(path, NULL);
2120 if (cpath == NULL) {
2121 errno = EINVAL;
2122 return NULL;
2123 }
2124 res = realpath(cpath, cresolved_path);
2125 PyMem_RawFree(cpath);
2126 if (res == NULL)
2127 return NULL;
2128
2129 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
2130 if (wresolved_path == NULL) {
2131 errno = EINVAL;
2132 return NULL;
2133 }
2134 /* wresolved_path must have space to store the trailing NUL character */
2135 if (resolved_path_len <= r) {
2136 PyMem_RawFree(wresolved_path);
2137 errno = EINVAL;
2138 return NULL;
2139 }
2140 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
2141 PyMem_RawFree(wresolved_path);
2142 return resolved_path;
2143 }
2144 #endif
2145
2146
2147 int
_Py_isabs(const wchar_t * path)2148 _Py_isabs(const wchar_t *path)
2149 {
2150 #ifdef MS_WINDOWS
2151 const wchar_t *tail;
2152 HRESULT hr = PathCchSkipRoot(path, &tail);
2153 if (FAILED(hr) || path == tail) {
2154 return 0;
2155 }
2156 if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2157 // Exclude paths with leading SEP
2158 return 0;
2159 }
2160 if (tail == &path[2] && path[1] == L':') {
2161 // Exclude drive-relative paths (e.g. C:filename.ext)
2162 return 0;
2163 }
2164 return 1;
2165 #else
2166 return (path[0] == SEP);
2167 #endif
2168 }
2169
2170
2171 /* Get an absolute path.
2172 On error (ex: fail to get the current directory), return -1.
2173 On memory allocation failure, set *abspath_p to NULL and return 0.
2174 On success, return a newly allocated to *abspath_p to and return 0.
2175 The string must be freed by PyMem_RawFree(). */
2176 int
_Py_abspath(const wchar_t * path,wchar_t ** abspath_p)2177 _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2178 {
2179 if (path[0] == '\0' || !wcscmp(path, L".")) {
2180 wchar_t cwd[MAXPATHLEN + 1];
2181 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2182 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2183 /* unable to get the current directory */
2184 return -1;
2185 }
2186 *abspath_p = _PyMem_RawWcsdup(cwd);
2187 return 0;
2188 }
2189
2190 if (_Py_isabs(path)) {
2191 *abspath_p = _PyMem_RawWcsdup(path);
2192 return 0;
2193 }
2194
2195 #ifdef MS_WINDOWS
2196 return _PyOS_getfullpathname(path, abspath_p);
2197 #else
2198 wchar_t cwd[MAXPATHLEN + 1];
2199 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2200 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2201 /* unable to get the current directory */
2202 return -1;
2203 }
2204
2205 size_t cwd_len = wcslen(cwd);
2206 size_t path_len = wcslen(path);
2207 size_t len = cwd_len + 1 + path_len + 1;
2208 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2209 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2210 }
2211 else {
2212 *abspath_p = NULL;
2213 }
2214 if (*abspath_p == NULL) {
2215 return 0;
2216 }
2217
2218 wchar_t *abspath = *abspath_p;
2219 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2220 abspath += cwd_len;
2221
2222 *abspath = (wchar_t)SEP;
2223 abspath++;
2224
2225 memcpy(abspath, path, path_len * sizeof(wchar_t));
2226 abspath += path_len;
2227
2228 *abspath = 0;
2229 return 0;
2230 #endif
2231 }
2232
2233 // The Windows Games API family implements the PathCch* APIs in the Xbox OS,
2234 // but does not expose them yet. Load them dynamically until
2235 // 1) they are officially exposed
2236 // 2) we stop supporting older versions of the GDK which do not expose them
2237 #if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
2238 HRESULT
PathCchSkipRoot(const wchar_t * path,const wchar_t ** rootEnd)2239 PathCchSkipRoot(const wchar_t *path, const wchar_t **rootEnd)
2240 {
2241 static int initialized = 0;
2242 typedef HRESULT(__stdcall *PPathCchSkipRoot) (PCWSTR pszPath,
2243 PCWSTR *ppszRootEnd);
2244 static PPathCchSkipRoot _PathCchSkipRoot;
2245
2246 if (initialized == 0) {
2247 HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2248 LOAD_LIBRARY_SEARCH_SYSTEM32);
2249 if (pathapi) {
2250 _PathCchSkipRoot = (PPathCchSkipRoot)GetProcAddress(
2251 pathapi, "PathCchSkipRoot");
2252 }
2253 else {
2254 _PathCchSkipRoot = NULL;
2255 }
2256 initialized = 1;
2257 }
2258
2259 if (!_PathCchSkipRoot) {
2260 return E_NOINTERFACE;
2261 }
2262
2263 return _PathCchSkipRoot(path, rootEnd);
2264 }
2265
2266 static HRESULT
PathCchCombineEx(wchar_t * buffer,size_t bufsize,const wchar_t * dirname,const wchar_t * relfile,unsigned long flags)2267 PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
2268 const wchar_t *relfile, unsigned long flags)
2269 {
2270 static int initialized = 0;
2271 typedef HRESULT(__stdcall *PPathCchCombineEx) (PWSTR pszPathOut,
2272 size_t cchPathOut,
2273 PCWSTR pszPathIn,
2274 PCWSTR pszMore,
2275 unsigned long dwFlags);
2276 static PPathCchCombineEx _PathCchCombineEx;
2277
2278 if (initialized == 0) {
2279 HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2280 LOAD_LIBRARY_SEARCH_SYSTEM32);
2281 if (pathapi) {
2282 _PathCchCombineEx = (PPathCchCombineEx)GetProcAddress(
2283 pathapi, "PathCchCombineEx");
2284 }
2285 else {
2286 _PathCchCombineEx = NULL;
2287 }
2288 initialized = 1;
2289 }
2290
2291 if (!_PathCchCombineEx) {
2292 return E_NOINTERFACE;
2293 }
2294
2295 return _PathCchCombineEx(buffer, bufsize, dirname, relfile, flags);
2296 }
2297
2298 #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
2299
2300 void
_Py_skiproot(const wchar_t * path,Py_ssize_t size,Py_ssize_t * drvsize,Py_ssize_t * rootsize)2301 _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
2302 Py_ssize_t *rootsize)
2303 {
2304 assert(drvsize);
2305 assert(rootsize);
2306 #ifndef MS_WINDOWS
2307 #define IS_SEP(x) (*(x) == SEP)
2308 *drvsize = 0;
2309 if (!IS_SEP(&path[0])) {
2310 // Relative path, e.g.: 'foo'
2311 *rootsize = 0;
2312 }
2313 else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
2314 // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
2315 *rootsize = 1;
2316 }
2317 else {
2318 // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
2319 // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
2320 *rootsize = 2;
2321 }
2322 #undef IS_SEP
2323 #else
2324 const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2325 #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2326 #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2327 #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2328 if (IS_SEP(&path[0])) {
2329 if (IS_SEP(&path[1])) {
2330 // Device drives, e.g. \\.\device or \\?\device
2331 // UNC drives, e.g. \\server\share or \\?\UNC\server\share
2332 Py_ssize_t idx;
2333 if (path[2] == L'?' && IS_SEP(&path[3]) &&
2334 (path[4] == L'U' || path[4] == L'u') &&
2335 (path[5] == L'N' || path[5] == L'n') &&
2336 (path[6] == L'C' || path[6] == L'c') &&
2337 IS_SEP(&path[7]))
2338 {
2339 idx = 8;
2340 }
2341 else {
2342 idx = 2;
2343 }
2344 while (!SEP_OR_END(&path[idx])) {
2345 idx++;
2346 }
2347 if (IS_END(&path[idx])) {
2348 *drvsize = idx;
2349 *rootsize = 0;
2350 }
2351 else {
2352 idx++;
2353 while (!SEP_OR_END(&path[idx])) {
2354 idx++;
2355 }
2356 *drvsize = idx;
2357 if (IS_END(&path[idx])) {
2358 *rootsize = 0;
2359 }
2360 else {
2361 *rootsize = 1;
2362 }
2363 }
2364 }
2365 else {
2366 // Relative path with root, e.g. \Windows
2367 *drvsize = 0;
2368 *rootsize = 1;
2369 }
2370 }
2371 else if (!IS_END(&path[0]) && path[1] == L':') {
2372 *drvsize = 2;
2373 if (IS_SEP(&path[2])) {
2374 // Absolute drive-letter path, e.g. X:\Windows
2375 *rootsize = 1;
2376 }
2377 else {
2378 // Relative path with drive, e.g. X:Windows
2379 *rootsize = 0;
2380 }
2381 }
2382 else {
2383 // Relative path, e.g. Windows
2384 *drvsize = 0;
2385 *rootsize = 0;
2386 }
2387 #undef SEP_OR_END
2388 #undef IS_SEP
2389 #undef IS_END
2390 #endif
2391 }
2392
2393 // The caller must ensure "buffer" is big enough.
2394 static int
join_relfile(wchar_t * buffer,size_t bufsize,const wchar_t * dirname,const wchar_t * relfile)2395 join_relfile(wchar_t *buffer, size_t bufsize,
2396 const wchar_t *dirname, const wchar_t *relfile)
2397 {
2398 #ifdef MS_WINDOWS
2399 if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2400 PATHCCH_ALLOW_LONG_PATHS))) {
2401 return -1;
2402 }
2403 #else
2404 assert(!_Py_isabs(relfile));
2405 size_t dirlen = wcslen(dirname);
2406 size_t rellen = wcslen(relfile);
2407 size_t maxlen = bufsize - 1;
2408 if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2409 return -1;
2410 }
2411 if (dirlen == 0) {
2412 // We do not add a leading separator.
2413 wcscpy(buffer, relfile);
2414 }
2415 else {
2416 if (dirname != buffer) {
2417 wcscpy(buffer, dirname);
2418 }
2419 size_t relstart = dirlen;
2420 if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2421 buffer[dirlen] = SEP;
2422 relstart += 1;
2423 }
2424 wcscpy(&buffer[relstart], relfile);
2425 }
2426 #endif
2427 return 0;
2428 }
2429
2430 /* Join the two paths together, like os.path.join(). Return NULL
2431 if memory could not be allocated. The caller is responsible
2432 for calling PyMem_RawFree() on the result. */
2433 wchar_t *
_Py_join_relfile(const wchar_t * dirname,const wchar_t * relfile)2434 _Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2435 {
2436 assert(dirname != NULL && relfile != NULL);
2437 #ifndef MS_WINDOWS
2438 assert(!_Py_isabs(relfile));
2439 #endif
2440 size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2441 size_t bufsize = maxlen + 1;
2442 wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2443 if (filename == NULL) {
2444 return NULL;
2445 }
2446 assert(wcslen(dirname) < MAXPATHLEN);
2447 assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2448 if (join_relfile(filename, bufsize, dirname, relfile) < 0) {
2449 PyMem_RawFree(filename);
2450 return NULL;
2451 }
2452 return filename;
2453 }
2454
2455 /* Join the two paths together, like os.path.join().
2456 dirname: the target buffer with the dirname already in place,
2457 including trailing NUL
2458 relfile: this must be a relative path
2459 bufsize: total allocated size of the buffer
2460 Return -1 if anything is wrong with the path lengths. */
2461 int
_Py_add_relfile(wchar_t * dirname,const wchar_t * relfile,size_t bufsize)2462 _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2463 {
2464 assert(dirname != NULL && relfile != NULL);
2465 assert(bufsize > 0);
2466 return join_relfile(dirname, bufsize, dirname, relfile);
2467 }
2468
2469
2470 size_t
_Py_find_basename(const wchar_t * filename)2471 _Py_find_basename(const wchar_t *filename)
2472 {
2473 for (size_t i = wcslen(filename); i > 0; --i) {
2474 if (filename[i] == SEP) {
2475 return i + 1;
2476 }
2477 }
2478 return 0;
2479 }
2480
2481 /* In-place path normalisation. Returns the start of the normalized
2482 path, which will be within the original buffer. Guaranteed to not
2483 make the path longer, and will not fail. 'size' is the length of
2484 the path, if known. If -1, the first null character will be assumed
2485 to be the end of the path. 'normsize' will be set to contain the
2486 length of the resulting normalized path. */
2487 wchar_t *
_Py_normpath_and_size(wchar_t * path,Py_ssize_t size,Py_ssize_t * normsize)2488 _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
2489 {
2490 assert(path != NULL);
2491 if ((size < 0 && !path[0]) || size == 0) {
2492 *normsize = 0;
2493 return path;
2494 }
2495 wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2496 wchar_t *p1 = path; // sequentially scanned address in the path
2497 wchar_t *p2 = path; // destination of a scanned character to be ljusted
2498 wchar_t *minP2 = path; // the beginning of the destination range
2499 wchar_t lastC = L'\0'; // the last ljusted character, p2[-1] in most cases
2500
2501 #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2502 #ifdef ALTSEP
2503 #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2504 #else
2505 #define IS_SEP(x) (*(x) == SEP)
2506 #endif
2507 #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2508
2509 Py_ssize_t drvsize, rootsize;
2510 _Py_skiproot(path, size, &drvsize, &rootsize);
2511 if (drvsize || rootsize) {
2512 // Skip past root and update minP2
2513 p1 = &path[drvsize + rootsize];
2514 #ifndef ALTSEP
2515 p2 = p1;
2516 #else
2517 for (; p2 < p1; ++p2) {
2518 if (*p2 == ALTSEP) {
2519 *p2 = SEP;
2520 }
2521 }
2522 #endif
2523 minP2 = p2 - 1;
2524 lastC = *minP2;
2525 #ifdef MS_WINDOWS
2526 if (lastC != SEP) {
2527 minP2++;
2528 }
2529 #endif
2530 }
2531 if (p1[0] == L'.' && SEP_OR_END(&p1[1])) {
2532 // Skip leading '.\'
2533 lastC = *++p1;
2534 #ifdef ALTSEP
2535 if (lastC == ALTSEP) {
2536 lastC = SEP;
2537 }
2538 #endif
2539 while (IS_SEP(p1)) {
2540 p1++;
2541 }
2542 }
2543
2544 /* if pEnd is specified, check that. Else, check for null terminator */
2545 for (; !IS_END(p1); ++p1) {
2546 wchar_t c = *p1;
2547 #ifdef ALTSEP
2548 if (c == ALTSEP) {
2549 c = SEP;
2550 }
2551 #endif
2552 if (lastC == SEP) {
2553 if (c == L'.') {
2554 int sep_at_1 = SEP_OR_END(&p1[1]);
2555 int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2556 if (sep_at_2 && p1[1] == L'.') {
2557 wchar_t *p3 = p2;
2558 while (p3 != minP2 && *--p3 == SEP) { }
2559 while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2560 if (p2 == minP2
2561 || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2562 {
2563 // Previous segment is also ../, so append instead.
2564 // Relative path does not absorb ../ at minP2 as well.
2565 *p2++ = L'.';
2566 *p2++ = L'.';
2567 lastC = L'.';
2568 } else if (p3[0] == SEP) {
2569 // Absolute path, so absorb segment
2570 p2 = p3 + 1;
2571 } else {
2572 p2 = p3;
2573 }
2574 p1 += 1;
2575 } else if (sep_at_1) {
2576 } else {
2577 *p2++ = lastC = c;
2578 }
2579 } else if (c == SEP) {
2580 } else {
2581 *p2++ = lastC = c;
2582 }
2583 } else {
2584 *p2++ = lastC = c;
2585 }
2586 }
2587 *p2 = L'\0';
2588 if (p2 != minP2) {
2589 while (--p2 != minP2 && *p2 == SEP) {
2590 *p2 = L'\0';
2591 }
2592 } else {
2593 --p2;
2594 }
2595 *normsize = p2 - path + 1;
2596 #undef SEP_OR_END
2597 #undef IS_SEP
2598 #undef IS_END
2599 return path;
2600 }
2601
2602 /* In-place path normalisation. Returns the start of the normalized
2603 path, which will be within the original buffer. Guaranteed to not
2604 make the path longer, and will not fail. 'size' is the length of
2605 the path, if known. If -1, the first null character will be assumed
2606 to be the end of the path. */
2607 wchar_t *
_Py_normpath(wchar_t * path,Py_ssize_t size)2608 _Py_normpath(wchar_t *path, Py_ssize_t size)
2609 {
2610 Py_ssize_t norm_length;
2611 return _Py_normpath_and_size(path, size, &norm_length);
2612 }
2613
2614
2615 /* Get the current directory. buflen is the buffer size in wide characters
2616 including the null character. Decode the path from the locale encoding.
2617
2618 Return NULL on getcwd() error, on decoding error, or if 'buf' is
2619 too short. */
2620 wchar_t*
_Py_wgetcwd(wchar_t * buf,size_t buflen)2621 _Py_wgetcwd(wchar_t *buf, size_t buflen)
2622 {
2623 #ifdef MS_WINDOWS
2624 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2625 return _wgetcwd(buf, ibuflen);
2626 #else
2627 char fname[MAXPATHLEN];
2628 wchar_t *wname;
2629 size_t len;
2630
2631 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2632 return NULL;
2633 wname = Py_DecodeLocale(fname, &len);
2634 if (wname == NULL)
2635 return NULL;
2636 /* wname must have space to store the trailing NUL character */
2637 if (buflen <= len) {
2638 PyMem_RawFree(wname);
2639 return NULL;
2640 }
2641 wcsncpy(buf, wname, buflen);
2642 PyMem_RawFree(wname);
2643 return buf;
2644 #endif
2645 }
2646
2647 /* Duplicate a file descriptor. The new file descriptor is created as
2648 non-inheritable. Return a new file descriptor on success, raise an OSError
2649 exception and return -1 on error.
2650
2651 The GIL is released to call dup(). The caller must hold the GIL. */
2652 int
_Py_dup(int fd)2653 _Py_dup(int fd)
2654 {
2655 #ifdef MS_WINDOWS
2656 HANDLE handle;
2657 #endif
2658
2659 assert(PyGILState_Check());
2660
2661 #ifdef MS_WINDOWS
2662 handle = _Py_get_osfhandle(fd);
2663 if (handle == INVALID_HANDLE_VALUE)
2664 return -1;
2665
2666 Py_BEGIN_ALLOW_THREADS
2667 _Py_BEGIN_SUPPRESS_IPH
2668 fd = dup(fd);
2669 _Py_END_SUPPRESS_IPH
2670 Py_END_ALLOW_THREADS
2671 if (fd < 0) {
2672 PyErr_SetFromErrno(PyExc_OSError);
2673 return -1;
2674 }
2675
2676 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2677 _Py_BEGIN_SUPPRESS_IPH
2678 close(fd);
2679 _Py_END_SUPPRESS_IPH
2680 return -1;
2681 }
2682 #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2683 Py_BEGIN_ALLOW_THREADS
2684 _Py_BEGIN_SUPPRESS_IPH
2685 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2686 _Py_END_SUPPRESS_IPH
2687 Py_END_ALLOW_THREADS
2688 if (fd < 0) {
2689 PyErr_SetFromErrno(PyExc_OSError);
2690 return -1;
2691 }
2692
2693 #elif HAVE_DUP
2694 Py_BEGIN_ALLOW_THREADS
2695 _Py_BEGIN_SUPPRESS_IPH
2696 fd = dup(fd);
2697 _Py_END_SUPPRESS_IPH
2698 Py_END_ALLOW_THREADS
2699 if (fd < 0) {
2700 PyErr_SetFromErrno(PyExc_OSError);
2701 return -1;
2702 }
2703
2704 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2705 _Py_BEGIN_SUPPRESS_IPH
2706 close(fd);
2707 _Py_END_SUPPRESS_IPH
2708 return -1;
2709 }
2710 #else
2711 errno = ENOTSUP;
2712 PyErr_SetFromErrno(PyExc_OSError);
2713 return -1;
2714 #endif
2715 return fd;
2716 }
2717
2718 #ifndef MS_WINDOWS
2719 /* Get the blocking mode of the file descriptor.
2720 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2721 raise an exception and return -1 on error. */
2722 int
_Py_get_blocking(int fd)2723 _Py_get_blocking(int fd)
2724 {
2725 int flags;
2726 _Py_BEGIN_SUPPRESS_IPH
2727 flags = fcntl(fd, F_GETFL, 0);
2728 _Py_END_SUPPRESS_IPH
2729 if (flags < 0) {
2730 PyErr_SetFromErrno(PyExc_OSError);
2731 return -1;
2732 }
2733
2734 return !(flags & O_NONBLOCK);
2735 }
2736
2737 /* Set the blocking mode of the specified file descriptor.
2738
2739 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2740 otherwise.
2741
2742 Return 0 on success, raise an exception and return -1 on error. */
2743 int
_Py_set_blocking(int fd,int blocking)2744 _Py_set_blocking(int fd, int blocking)
2745 {
2746 /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2747 Use fcntl() instead. */
2748 #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2749 int arg = !blocking;
2750 if (ioctl(fd, FIONBIO, &arg) < 0)
2751 goto error;
2752 #else
2753 int flags, res;
2754
2755 _Py_BEGIN_SUPPRESS_IPH
2756 flags = fcntl(fd, F_GETFL, 0);
2757 if (flags >= 0) {
2758 if (blocking)
2759 flags = flags & (~O_NONBLOCK);
2760 else
2761 flags = flags | O_NONBLOCK;
2762
2763 res = fcntl(fd, F_SETFL, flags);
2764 } else {
2765 res = -1;
2766 }
2767 _Py_END_SUPPRESS_IPH
2768
2769 if (res < 0)
2770 goto error;
2771 #endif
2772 return 0;
2773
2774 error:
2775 PyErr_SetFromErrno(PyExc_OSError);
2776 return -1;
2777 }
2778 #else /* MS_WINDOWS */
2779 int
_Py_get_blocking(int fd)2780 _Py_get_blocking(int fd)
2781 {
2782 HANDLE handle;
2783 DWORD mode;
2784 BOOL success;
2785
2786 handle = _Py_get_osfhandle(fd);
2787 if (handle == INVALID_HANDLE_VALUE) {
2788 return -1;
2789 }
2790
2791 Py_BEGIN_ALLOW_THREADS
2792 success = GetNamedPipeHandleStateW(handle, &mode,
2793 NULL, NULL, NULL, NULL, 0);
2794 Py_END_ALLOW_THREADS
2795
2796 if (!success) {
2797 PyErr_SetFromWindowsErr(0);
2798 return -1;
2799 }
2800
2801 return !(mode & PIPE_NOWAIT);
2802 }
2803
2804 int
_Py_set_blocking(int fd,int blocking)2805 _Py_set_blocking(int fd, int blocking)
2806 {
2807 HANDLE handle;
2808 DWORD mode;
2809 BOOL success;
2810
2811 handle = _Py_get_osfhandle(fd);
2812 if (handle == INVALID_HANDLE_VALUE) {
2813 return -1;
2814 }
2815
2816 Py_BEGIN_ALLOW_THREADS
2817 success = GetNamedPipeHandleStateW(handle, &mode,
2818 NULL, NULL, NULL, NULL, 0);
2819 if (success) {
2820 if (blocking) {
2821 mode &= ~PIPE_NOWAIT;
2822 }
2823 else {
2824 mode |= PIPE_NOWAIT;
2825 }
2826 success = SetNamedPipeHandleState(handle, &mode, NULL, NULL);
2827 }
2828 Py_END_ALLOW_THREADS
2829
2830 if (!success) {
2831 PyErr_SetFromWindowsErr(0);
2832 return -1;
2833 }
2834 return 0;
2835 }
2836
2837 void*
_Py_get_osfhandle_noraise(int fd)2838 _Py_get_osfhandle_noraise(int fd)
2839 {
2840 void *handle;
2841 _Py_BEGIN_SUPPRESS_IPH
2842 handle = (void*)_get_osfhandle(fd);
2843 _Py_END_SUPPRESS_IPH
2844 return handle;
2845 }
2846
2847 void*
_Py_get_osfhandle(int fd)2848 _Py_get_osfhandle(int fd)
2849 {
2850 void *handle = _Py_get_osfhandle_noraise(fd);
2851 if (handle == INVALID_HANDLE_VALUE)
2852 PyErr_SetFromErrno(PyExc_OSError);
2853
2854 return handle;
2855 }
2856
2857 int
_Py_open_osfhandle_noraise(void * handle,int flags)2858 _Py_open_osfhandle_noraise(void *handle, int flags)
2859 {
2860 int fd;
2861 _Py_BEGIN_SUPPRESS_IPH
2862 fd = _open_osfhandle((intptr_t)handle, flags);
2863 _Py_END_SUPPRESS_IPH
2864 return fd;
2865 }
2866
2867 int
_Py_open_osfhandle(void * handle,int flags)2868 _Py_open_osfhandle(void *handle, int flags)
2869 {
2870 int fd = _Py_open_osfhandle_noraise(handle, flags);
2871 if (fd == -1)
2872 PyErr_SetFromErrno(PyExc_OSError);
2873
2874 return fd;
2875 }
2876 #endif /* MS_WINDOWS */
2877
2878 int
_Py_GetLocaleconvNumeric(struct lconv * lc,PyObject ** decimal_point,PyObject ** thousands_sep)2879 _Py_GetLocaleconvNumeric(struct lconv *lc,
2880 PyObject **decimal_point, PyObject **thousands_sep)
2881 {
2882 assert(decimal_point != NULL);
2883 assert(thousands_sep != NULL);
2884
2885 #ifndef MS_WINDOWS
2886 int change_locale = 0;
2887 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2888 change_locale = 1;
2889 }
2890 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2891 change_locale = 1;
2892 }
2893
2894 /* Keep a copy of the LC_CTYPE locale */
2895 char *oldloc = NULL, *loc = NULL;
2896 if (change_locale) {
2897 oldloc = setlocale(LC_CTYPE, NULL);
2898 if (!oldloc) {
2899 PyErr_SetString(PyExc_RuntimeWarning,
2900 "failed to get LC_CTYPE locale");
2901 return -1;
2902 }
2903
2904 oldloc = _PyMem_Strdup(oldloc);
2905 if (!oldloc) {
2906 PyErr_NoMemory();
2907 return -1;
2908 }
2909
2910 loc = setlocale(LC_NUMERIC, NULL);
2911 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2912 loc = NULL;
2913 }
2914
2915 if (loc != NULL) {
2916 /* Only set the locale temporarily the LC_CTYPE locale
2917 if LC_NUMERIC locale is different than LC_CTYPE locale and
2918 decimal_point and/or thousands_sep are non-ASCII or longer than
2919 1 byte */
2920 setlocale(LC_CTYPE, loc);
2921 }
2922 }
2923
2924 #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2925 #else /* MS_WINDOWS */
2926 /* Use _W_* fields of Windows strcut lconv */
2927 #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2928 #endif /* MS_WINDOWS */
2929
2930 int res = -1;
2931
2932 *decimal_point = GET_LOCALE_STRING(decimal_point);
2933 if (*decimal_point == NULL) {
2934 goto done;
2935 }
2936
2937 *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2938 if (*thousands_sep == NULL) {
2939 goto done;
2940 }
2941
2942 res = 0;
2943
2944 done:
2945 #ifndef MS_WINDOWS
2946 if (loc != NULL) {
2947 setlocale(LC_CTYPE, oldloc);
2948 }
2949 PyMem_Free(oldloc);
2950 #endif
2951 return res;
2952
2953 #undef GET_LOCALE_STRING
2954 }
2955
2956 /* Our selection logic for which function to use is as follows:
2957 * 1. If close_range(2) is available, always prefer that; it's better for
2958 * contiguous ranges like this than fdwalk(3) which entails iterating over
2959 * the entire fd space and simply doing nothing for those outside the range.
2960 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2961 * closing up to sysconf(_SC_OPEN_MAX).
2962 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2963 * as that will be more performant if the range happens to have any chunk of
2964 * non-opened fd in the middle.
2965 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2966 */
2967 #ifdef HAVE_CLOSEFROM
2968 # define USE_CLOSEFROM
2969 #endif /* HAVE_CLOSEFROM */
2970
2971 #ifdef HAVE_FDWALK
2972 # define USE_FDWALK
2973 #endif /* HAVE_FDWALK */
2974
2975 #ifdef USE_FDWALK
2976 static int
_fdwalk_close_func(void * lohi,int fd)2977 _fdwalk_close_func(void *lohi, int fd)
2978 {
2979 int lo = ((int *)lohi)[0];
2980 int hi = ((int *)lohi)[1];
2981
2982 if (fd >= hi) {
2983 return 1;
2984 }
2985 else if (fd >= lo) {
2986 /* Ignore errors */
2987 (void)close(fd);
2988 }
2989 return 0;
2990 }
2991 #endif /* USE_FDWALK */
2992
2993 /* Closes all file descriptors in [first, last], ignoring errors. */
2994 void
_Py_closerange(int first,int last)2995 _Py_closerange(int first, int last)
2996 {
2997 first = Py_MAX(first, 0);
2998 _Py_BEGIN_SUPPRESS_IPH
2999 #ifdef HAVE_CLOSE_RANGE
3000 if (close_range(first, last, 0) == 0) {
3001 /* close_range() ignores errors when it closes file descriptors.
3002 * Possible reasons of an error return are lack of kernel support
3003 * or denial of the underlying syscall by a seccomp sandbox on Linux.
3004 * Fallback to other methods in case of any error. */
3005 }
3006 else
3007 #endif /* HAVE_CLOSE_RANGE */
3008 #ifdef USE_CLOSEFROM
3009 if (last >= sysconf(_SC_OPEN_MAX)) {
3010 /* Any errors encountered while closing file descriptors are ignored */
3011 (void)closefrom(first);
3012 }
3013 else
3014 #endif /* USE_CLOSEFROM */
3015 #ifdef USE_FDWALK
3016 {
3017 int lohi[2];
3018 lohi[0] = first;
3019 lohi[1] = last + 1;
3020 fdwalk(_fdwalk_close_func, lohi);
3021 }
3022 #else
3023 {
3024 for (int i = first; i <= last; i++) {
3025 /* Ignore errors */
3026 (void)close(i);
3027 }
3028 }
3029 #endif /* USE_FDWALK */
3030 _Py_END_SUPPRESS_IPH
3031 }
3032
3033
3034 #ifndef MS_WINDOWS
3035 // Ticks per second used by clock() and times() functions.
3036 // See os.times() and time.process_time() implementations.
3037 int
_Py_GetTicksPerSecond(long * ticks_per_second)3038 _Py_GetTicksPerSecond(long *ticks_per_second)
3039 {
3040 #if defined(HAVE_SYSCONF) && defined(_SC_CLK_TCK)
3041 long value = sysconf(_SC_CLK_TCK);
3042 if (value < 1) {
3043 return -1;
3044 }
3045 *ticks_per_second = value;
3046 #elif defined(HZ)
3047 assert(HZ >= 1);
3048 *ticks_per_second = HZ;
3049 #else
3050 // Magic fallback value; may be bogus
3051 *ticks_per_second = 60;
3052 #endif
3053 return 0;
3054 }
3055 #endif
3056
3057
3058 /* Check if a file descriptor is valid or not.
3059 Return 0 if the file descriptor is invalid, return non-zero otherwise. */
3060 int
_Py_IsValidFD(int fd)3061 _Py_IsValidFD(int fd)
3062 {
3063 /* dup() is faster than fstat(): fstat() can require input/output operations,
3064 whereas dup() doesn't. There is a low risk of EMFILE/ENFILE at Python
3065 startup. Problem: dup() doesn't check if the file descriptor is valid on
3066 some platforms.
3067
3068 fcntl(fd, F_GETFD) is even faster, because it only checks the process table.
3069 It is preferred over dup() when available, since it cannot fail with the
3070 "too many open files" error (EMFILE).
3071
3072 bpo-30225: On macOS Tiger, when stdout is redirected to a pipe and the other
3073 side of the pipe is closed, dup(1) succeed, whereas fstat(1, &st) fails with
3074 EBADF. FreeBSD has similar issue (bpo-32849).
3075
3076 Only use dup() on Linux where dup() is enough to detect invalid FD
3077 (bpo-32849).
3078 */
3079 if (fd < 0) {
3080 return 0;
3081 }
3082 #if defined(F_GETFD) && ( \
3083 defined(__linux__) || \
3084 defined(__APPLE__) || \
3085 (defined(__wasm__) && !defined(__wasi__)))
3086 return fcntl(fd, F_GETFD) >= 0;
3087 #elif defined(__linux__)
3088 int fd2 = dup(fd);
3089 if (fd2 >= 0) {
3090 close(fd2);
3091 }
3092 return (fd2 >= 0);
3093 #elif defined(MS_WINDOWS)
3094 HANDLE hfile;
3095 _Py_BEGIN_SUPPRESS_IPH
3096 hfile = (HANDLE)_get_osfhandle(fd);
3097 _Py_END_SUPPRESS_IPH
3098 return (hfile != INVALID_HANDLE_VALUE
3099 && GetFileType(hfile) != FILE_TYPE_UNKNOWN);
3100 #else
3101 struct stat st;
3102 return (fstat(fd, &st) == 0);
3103 #endif
3104 }
3105