• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: C; c-file-style: "python" -*- */
2 
3 #include <Python.h>
4 #include "pycore_dtoa.h"
5 #include <locale.h>
6 
7 /* Case-insensitive string match used for nan and inf detection; t should be
8    lower-case.  Returns 1 for a successful match, 0 otherwise. */
9 
10 static int
case_insensitive_match(const char * s,const char * t)11 case_insensitive_match(const char *s, const char *t)
12 {
13     while(*t && Py_TOLOWER(*s) == *t) {
14         s++;
15         t++;
16     }
17     return *t ? 0 : 1;
18 }
19 
20 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
21    "infinity", with an optional leading sign of "+" or "-".  On success,
22    return the NaN or Infinity as a double and set *endptr to point just beyond
23    the successfully parsed portion of the string.  On failure, return -1.0 and
24    set *endptr to point to the start of the string. */
25 
26 #ifndef PY_NO_SHORT_FLOAT_REPR
27 
28 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)29 _Py_parse_inf_or_nan(const char *p, char **endptr)
30 {
31     double retval;
32     const char *s;
33     int negate = 0;
34 
35     s = p;
36     if (*s == '-') {
37         negate = 1;
38         s++;
39     }
40     else if (*s == '+') {
41         s++;
42     }
43     if (case_insensitive_match(s, "inf")) {
44         s += 3;
45         if (case_insensitive_match(s, "inity"))
46             s += 5;
47         retval = _Py_dg_infinity(negate);
48     }
49     else if (case_insensitive_match(s, "nan")) {
50         s += 3;
51         retval = _Py_dg_stdnan(negate);
52     }
53     else {
54         s = p;
55         retval = -1.0;
56     }
57     *endptr = (char *)s;
58     return retval;
59 }
60 
61 #else
62 
63 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)64 _Py_parse_inf_or_nan(const char *p, char **endptr)
65 {
66     double retval;
67     const char *s;
68     int negate = 0;
69 
70     s = p;
71     if (*s == '-') {
72         negate = 1;
73         s++;
74     }
75     else if (*s == '+') {
76         s++;
77     }
78     if (case_insensitive_match(s, "inf")) {
79         s += 3;
80         if (case_insensitive_match(s, "inity"))
81             s += 5;
82         retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
83     }
84 #ifdef Py_NAN
85     else if (case_insensitive_match(s, "nan")) {
86         s += 3;
87         retval = negate ? -Py_NAN : Py_NAN;
88     }
89 #endif
90     else {
91         s = p;
92         retval = -1.0;
93     }
94     *endptr = (char *)s;
95     return retval;
96 }
97 
98 #endif
99 
100 /**
101  * _PyOS_ascii_strtod:
102  * @nptr:    the string to convert to a numeric value.
103  * @endptr:  if non-%NULL, it returns the character after
104  *           the last character used in the conversion.
105  *
106  * Converts a string to a #gdouble value.
107  * This function behaves like the standard strtod() function
108  * does in the C locale. It does this without actually
109  * changing the current locale, since that would not be
110  * thread-safe.
111  *
112  * This function is typically used when reading configuration
113  * files or other non-user input that should be locale independent.
114  * To handle input from the user you should normally use the
115  * locale-sensitive system strtod() function.
116  *
117  * If the correct value would cause overflow, plus or minus %HUGE_VAL
118  * is returned (according to the sign of the value), and %ERANGE is
119  * stored in %errno. If the correct value would cause underflow,
120  * zero is returned and %ERANGE is stored in %errno.
121  * If memory allocation fails, %ENOMEM is stored in %errno.
122  *
123  * This function resets %errno before calling strtod() so that
124  * you can reliably detect overflow and underflow.
125  *
126  * Return value: the #gdouble value.
127  **/
128 
129 #ifndef PY_NO_SHORT_FLOAT_REPR
130 
131 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)132 _PyOS_ascii_strtod(const char *nptr, char **endptr)
133 {
134     double result;
135     _Py_SET_53BIT_PRECISION_HEADER;
136 
137     assert(nptr != NULL);
138     /* Set errno to zero, so that we can distinguish zero results
139        and underflows */
140     errno = 0;
141 
142     _Py_SET_53BIT_PRECISION_START;
143     result = _Py_dg_strtod(nptr, endptr);
144     _Py_SET_53BIT_PRECISION_END;
145 
146     if (*endptr == nptr)
147         /* string might represent an inf or nan */
148         result = _Py_parse_inf_or_nan(nptr, endptr);
149 
150     return result;
151 
152 }
153 
154 #else
155 
156 /*
157    Use system strtod;  since strtod is locale aware, we may
158    have to first fix the decimal separator.
159 
160    Note that unlike _Py_dg_strtod, the system strtod may not always give
161    correctly rounded results.
162 */
163 
164 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)165 _PyOS_ascii_strtod(const char *nptr, char **endptr)
166 {
167     char *fail_pos;
168     double val;
169     struct lconv *locale_data;
170     const char *decimal_point;
171     size_t decimal_point_len;
172     const char *p, *decimal_point_pos;
173     const char *end = NULL; /* Silence gcc */
174     const char *digits_pos = NULL;
175     int negate = 0;
176 
177     assert(nptr != NULL);
178 
179     fail_pos = NULL;
180 
181     locale_data = localeconv();
182     decimal_point = locale_data->decimal_point;
183     decimal_point_len = strlen(decimal_point);
184 
185     assert(decimal_point_len != 0);
186 
187     decimal_point_pos = NULL;
188 
189     /* Parse infinities and nans */
190     val = _Py_parse_inf_or_nan(nptr, endptr);
191     if (*endptr != nptr)
192         return val;
193 
194     /* Set errno to zero, so that we can distinguish zero results
195        and underflows */
196     errno = 0;
197 
198     /* We process the optional sign manually, then pass the remainder to
199        the system strtod.  This ensures that the result of an underflow
200        has the correct sign. (bug #1725)  */
201     p = nptr;
202     /* Process leading sign, if present */
203     if (*p == '-') {
204         negate = 1;
205         p++;
206     }
207     else if (*p == '+') {
208         p++;
209     }
210 
211     /* Some platform strtods accept hex floats; Python shouldn't (at the
212        moment), so we check explicitly for strings starting with '0x'. */
213     if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
214         goto invalid_string;
215 
216     /* Check that what's left begins with a digit or decimal point */
217     if (!Py_ISDIGIT(*p) && *p != '.')
218         goto invalid_string;
219 
220     digits_pos = p;
221     if (decimal_point[0] != '.' ||
222         decimal_point[1] != 0)
223     {
224         /* Look for a '.' in the input; if present, it'll need to be
225            swapped for the current locale's decimal point before we
226            call strtod.  On the other hand, if we find the current
227            locale's decimal point then the input is invalid. */
228         while (Py_ISDIGIT(*p))
229             p++;
230 
231         if (*p == '.')
232         {
233             decimal_point_pos = p++;
234 
235             /* locate end of number */
236             while (Py_ISDIGIT(*p))
237                 p++;
238 
239             if (*p == 'e' || *p == 'E')
240                 p++;
241             if (*p == '+' || *p == '-')
242                 p++;
243             while (Py_ISDIGIT(*p))
244                 p++;
245             end = p;
246         }
247         else if (strncmp(p, decimal_point, decimal_point_len) == 0)
248             /* Python bug #1417699 */
249             goto invalid_string;
250         /* For the other cases, we need not convert the decimal
251            point */
252     }
253 
254     if (decimal_point_pos) {
255         char *copy, *c;
256         /* Create a copy of the input, with the '.' converted to the
257            locale-specific decimal point */
258         copy = (char *)PyMem_Malloc(end - digits_pos +
259                                     1 + decimal_point_len);
260         if (copy == NULL) {
261             *endptr = (char *)nptr;
262             errno = ENOMEM;
263             return val;
264         }
265 
266         c = copy;
267         memcpy(c, digits_pos, decimal_point_pos - digits_pos);
268         c += decimal_point_pos - digits_pos;
269         memcpy(c, decimal_point, decimal_point_len);
270         c += decimal_point_len;
271         memcpy(c, decimal_point_pos + 1,
272                end - (decimal_point_pos + 1));
273         c += end - (decimal_point_pos + 1);
274         *c = 0;
275 
276         val = strtod(copy, &fail_pos);
277 
278         if (fail_pos)
279         {
280             if (fail_pos > decimal_point_pos)
281                 fail_pos = (char *)digits_pos +
282                     (fail_pos - copy) -
283                     (decimal_point_len - 1);
284             else
285                 fail_pos = (char *)digits_pos +
286                     (fail_pos - copy);
287         }
288 
289         PyMem_Free(copy);
290 
291     }
292     else {
293         val = strtod(digits_pos, &fail_pos);
294     }
295 
296     if (fail_pos == digits_pos)
297         goto invalid_string;
298 
299     if (negate && fail_pos != nptr)
300         val = -val;
301     *endptr = fail_pos;
302 
303     return val;
304 
305   invalid_string:
306     *endptr = (char*)nptr;
307     errno = EINVAL;
308     return -1.0;
309 }
310 
311 #endif
312 
313 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
314    as a string of ASCII characters) to a float.  The string should not have
315    leading or trailing whitespace.  The conversion is independent of the
316    current locale.
317 
318    If endptr is NULL, try to convert the whole string.  Raise ValueError and
319    return -1.0 if the string is not a valid representation of a floating-point
320    number.
321 
322    If endptr is non-NULL, try to convert as much of the string as possible.
323    If no initial segment of the string is the valid representation of a
324    floating-point number then *endptr is set to point to the beginning of the
325    string, -1.0 is returned and again ValueError is raised.
326 
327    On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
328    if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
329    exception is raised.  Otherwise, overflow_exception should point to
330    a Python exception, this exception will be raised, -1.0 will be returned,
331    and *endptr will point just past the end of the converted value.
332 
333    If any other failure occurs (for example lack of memory), -1.0 is returned
334    and the appropriate Python exception will have been set.
335 */
336 
337 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)338 PyOS_string_to_double(const char *s,
339                       char **endptr,
340                       PyObject *overflow_exception)
341 {
342     double x, result=-1.0;
343     char *fail_pos;
344 
345     errno = 0;
346     x = _PyOS_ascii_strtod(s, &fail_pos);
347 
348     if (errno == ENOMEM) {
349         PyErr_NoMemory();
350         fail_pos = (char *)s;
351     }
352     else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
353         PyErr_Format(PyExc_ValueError,
354                       "could not convert string to float: "
355                       "'%.200s'", s);
356     else if (fail_pos == s)
357         PyErr_Format(PyExc_ValueError,
358                       "could not convert string to float: "
359                       "'%.200s'", s);
360     else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
361         PyErr_Format(overflow_exception,
362                       "value too large to convert to float: "
363                       "'%.200s'", s);
364     else
365         result = x;
366 
367     if (endptr != NULL)
368         *endptr = fail_pos;
369     return result;
370 }
371 
372 /* Remove underscores that follow the underscore placement rule from
373    the string and then call the `innerfunc` function on the result.
374    It should return a new object or NULL on exception.
375 
376    `what` is used for the error message emitted when underscores are detected
377    that don't follow the rule. `arg` is an opaque pointer passed to the inner
378    function.
379 
380    This is used to implement underscore-agnostic conversion for floats
381    and complex numbers.
382 */
383 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))384 _Py_string_to_number_with_underscores(
385     const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
386     PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
387 {
388     char prev;
389     const char *p, *last;
390     char *dup, *end;
391     PyObject *result;
392 
393     assert(s[orig_len] == '\0');
394 
395     if (strchr(s, '_') == NULL) {
396         return innerfunc(s, orig_len, arg);
397     }
398 
399     dup = PyMem_Malloc(orig_len + 1);
400     if (dup == NULL) {
401         return PyErr_NoMemory();
402     }
403     end = dup;
404     prev = '\0';
405     last = s + orig_len;
406     for (p = s; *p; p++) {
407         if (*p == '_') {
408             /* Underscores are only allowed after digits. */
409             if (!(prev >= '0' && prev <= '9')) {
410                 goto error;
411             }
412         }
413         else {
414             *end++ = *p;
415             /* Underscores are only allowed before digits. */
416             if (prev == '_' && !(*p >= '0' && *p <= '9')) {
417                 goto error;
418             }
419         }
420         prev = *p;
421     }
422     /* Underscores are not allowed at the end. */
423     if (prev == '_') {
424         goto error;
425     }
426     /* No embedded NULs allowed. */
427     if (p != last) {
428         goto error;
429     }
430     *end = '\0';
431     result = innerfunc(dup, end - dup, arg);
432     PyMem_Free(dup);
433     return result;
434 
435   error:
436     PyMem_Free(dup);
437     PyErr_Format(PyExc_ValueError,
438                  "could not convert string to %s: "
439                  "%R", what, obj);
440     return NULL;
441 }
442 
443 #ifdef PY_NO_SHORT_FLOAT_REPR
444 
445 /* Given a string that may have a decimal point in the current
446    locale, change it back to a dot.  Since the string cannot get
447    longer, no need for a maximum buffer size parameter. */
448 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)449 change_decimal_from_locale_to_dot(char* buffer)
450 {
451     struct lconv *locale_data = localeconv();
452     const char *decimal_point = locale_data->decimal_point;
453 
454     if (decimal_point[0] != '.' || decimal_point[1] != 0) {
455         size_t decimal_point_len = strlen(decimal_point);
456 
457         if (*buffer == '+' || *buffer == '-')
458             buffer++;
459         while (Py_ISDIGIT(*buffer))
460             buffer++;
461         if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
462             *buffer = '.';
463             buffer++;
464             if (decimal_point_len > 1) {
465                 /* buffer needs to get smaller */
466                 size_t rest_len = strlen(buffer +
467                                      (decimal_point_len - 1));
468                 memmove(buffer,
469                     buffer + (decimal_point_len - 1),
470                     rest_len);
471                 buffer[rest_len] = 0;
472             }
473         }
474     }
475 }
476 
477 
478 /* From the C99 standard, section 7.19.6:
479 The exponent always contains at least two digits, and only as many more digits
480 as necessary to represent the exponent.
481 */
482 #define MIN_EXPONENT_DIGITS 2
483 
484 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
485    in length. */
486 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)487 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
488 {
489     char *p = strpbrk(buffer, "eE");
490     if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
491         char *start = p + 2;
492         int exponent_digit_cnt = 0;
493         int leading_zero_cnt = 0;
494         int in_leading_zeros = 1;
495         int significant_digit_cnt;
496 
497         /* Skip over the exponent and the sign. */
498         p += 2;
499 
500         /* Find the end of the exponent, keeping track of leading
501            zeros. */
502         while (*p && Py_ISDIGIT(*p)) {
503             if (in_leading_zeros && *p == '0')
504                 ++leading_zero_cnt;
505             if (*p != '0')
506                 in_leading_zeros = 0;
507             ++p;
508             ++exponent_digit_cnt;
509         }
510 
511         significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
512         if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
513             /* If there are 2 exactly digits, we're done,
514                regardless of what they contain */
515         }
516         else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
517             int extra_zeros_cnt;
518 
519             /* There are more than 2 digits in the exponent.  See
520                if we can delete some of the leading zeros */
521             if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
522                 significant_digit_cnt = MIN_EXPONENT_DIGITS;
523             extra_zeros_cnt = exponent_digit_cnt -
524                 significant_digit_cnt;
525 
526             /* Delete extra_zeros_cnt worth of characters from the
527                front of the exponent */
528             assert(extra_zeros_cnt >= 0);
529 
530             /* Add one to significant_digit_cnt to copy the
531                trailing 0 byte, thus setting the length */
532             memmove(start,
533                 start + extra_zeros_cnt,
534                 significant_digit_cnt + 1);
535         }
536         else {
537             /* If there are fewer than 2 digits, add zeros
538                until there are 2, if there's enough room */
539             int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
540             if (start + zeros + exponent_digit_cnt + 1
541                   < buffer + buf_size) {
542                 memmove(start + zeros, start,
543                     exponent_digit_cnt + 1);
544                 memset(start, '0', zeros);
545             }
546         }
547     }
548 }
549 
550 /* Remove trailing zeros after the decimal point from a numeric string; also
551    remove the decimal point if all digits following it are zero.  The numeric
552    string must end in '\0', and should not have any leading or trailing
553    whitespace.  Assumes that the decimal point is '.'. */
554 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)555 remove_trailing_zeros(char *buffer)
556 {
557     char *old_fraction_end, *new_fraction_end, *end, *p;
558 
559     p = buffer;
560     if (*p == '-' || *p == '+')
561         /* Skip leading sign, if present */
562         ++p;
563     while (Py_ISDIGIT(*p))
564         ++p;
565 
566     /* if there's no decimal point there's nothing to do */
567     if (*p++ != '.')
568         return;
569 
570     /* scan any digits after the point */
571     while (Py_ISDIGIT(*p))
572         ++p;
573     old_fraction_end = p;
574 
575     /* scan up to ending '\0' */
576     while (*p != '\0')
577         p++;
578     /* +1 to make sure that we move the null byte as well */
579     end = p+1;
580 
581     /* scan back from fraction_end, looking for removable zeros */
582     p = old_fraction_end;
583     while (*(p-1) == '0')
584         --p;
585     /* and remove point if we've got that far */
586     if (*(p-1) == '.')
587         --p;
588     new_fraction_end = p;
589 
590     memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
591 }
592 
593 /* Ensure that buffer has a decimal point in it.  The decimal point will not
594    be in the current locale, it will always be '.'. Don't add a decimal point
595    if an exponent is present.  Also, convert to exponential notation where
596    adding a '.0' would produce too many significant digits (see issue 5864).
597 
598    Returns a pointer to the fixed buffer, or NULL on failure.
599 */
600 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)601 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
602 {
603     int digit_count, insert_count = 0, convert_to_exp = 0;
604     const char *chars_to_insert;
605     char *digits_start;
606 
607     /* search for the first non-digit character */
608     char *p = buffer;
609     if (*p == '-' || *p == '+')
610         /* Skip leading sign, if present.  I think this could only
611            ever be '-', but it can't hurt to check for both. */
612         ++p;
613     digits_start = p;
614     while (*p && Py_ISDIGIT(*p))
615         ++p;
616     digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
617 
618     if (*p == '.') {
619         if (Py_ISDIGIT(*(p+1))) {
620             /* Nothing to do, we already have a decimal
621                point and a digit after it */
622         }
623         else {
624             /* We have a decimal point, but no following
625                digit.  Insert a zero after the decimal. */
626             /* can't ever get here via PyOS_double_to_string */
627             assert(precision == -1);
628             ++p;
629             chars_to_insert = "0";
630             insert_count = 1;
631         }
632     }
633     else if (!(*p == 'e' || *p == 'E')) {
634         /* Don't add ".0" if we have an exponent. */
635         if (digit_count == precision) {
636             /* issue 5864: don't add a trailing .0 in the case
637                where the '%g'-formatted result already has as many
638                significant digits as were requested.  Switch to
639                exponential notation instead. */
640             convert_to_exp = 1;
641             /* no exponent, no point, and we shouldn't land here
642                for infs and nans, so we must be at the end of the
643                string. */
644             assert(*p == '\0');
645         }
646         else {
647             assert(precision == -1 || digit_count < precision);
648             chars_to_insert = ".0";
649             insert_count = 2;
650         }
651     }
652     if (insert_count) {
653         size_t buf_len = strlen(buffer);
654         if (buf_len + insert_count + 1 >= buf_size) {
655             /* If there is not enough room in the buffer
656                for the additional text, just skip it.  It's
657                not worth generating an error over. */
658         }
659         else {
660             memmove(p + insert_count, p,
661                 buffer + strlen(buffer) - p + 1);
662             memcpy(p, chars_to_insert, insert_count);
663         }
664     }
665     if (convert_to_exp) {
666         int written;
667         size_t buf_avail;
668         p = digits_start;
669         /* insert decimal point */
670         assert(digit_count >= 1);
671         memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
672         p[1] = '.';
673         p += digit_count+1;
674         assert(p <= buf_size+buffer);
675         buf_avail = buf_size+buffer-p;
676         if (buf_avail == 0)
677             return NULL;
678         /* Add exponent.  It's okay to use lower case 'e': we only
679            arrive here as a result of using the empty format code or
680            repr/str builtins and those never want an upper case 'E' */
681         written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
682         if (!(0 <= written &&
683               written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
684             /* output truncated, or something else bad happened */
685             return NULL;
686         remove_trailing_zeros(buffer);
687     }
688     return buffer;
689 }
690 
691 /* see FORMATBUFLEN in unicodeobject.c */
692 #define FLOAT_FORMATBUFLEN 120
693 
694 /**
695  * _PyOS_ascii_formatd:
696  * @buffer: A buffer to place the resulting string in
697  * @buf_size: The length of the buffer.
698  * @format: The printf()-style format to use for the
699  *          code to use for converting.
700  * @d: The #gdouble to convert
701  * @precision: The precision to use when formatting.
702  *
703  * Converts a #gdouble to a string, using the '.' as
704  * decimal point. To format the number you pass in
705  * a printf()-style format string. Allowed conversion
706  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
707  *
708  * 'Z' is the same as 'g', except it always has a decimal and
709  *     at least one digit after the decimal.
710  *
711  * Return value: The pointer to the buffer with the converted string.
712  * On failure returns NULL but does not set any Python exception.
713  **/
714 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)715 _PyOS_ascii_formatd(char       *buffer,
716                    size_t      buf_size,
717                    const char *format,
718                    double      d,
719                    int         precision)
720 {
721     char format_char;
722     size_t format_len = strlen(format);
723 
724     /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
725        also with at least one character past the decimal. */
726     char tmp_format[FLOAT_FORMATBUFLEN];
727 
728     /* The last character in the format string must be the format char */
729     format_char = format[format_len - 1];
730 
731     if (format[0] != '%')
732         return NULL;
733 
734     /* I'm not sure why this test is here.  It's ensuring that the format
735        string after the first character doesn't have a single quote, a
736        lowercase l, or a percent. This is the reverse of the commented-out
737        test about 10 lines ago. */
738     if (strpbrk(format + 1, "'l%"))
739         return NULL;
740 
741     /* Also curious about this function is that it accepts format strings
742        like "%xg", which are invalid for floats.  In general, the
743        interface to this function is not very good, but changing it is
744        difficult because it's a public API. */
745 
746     if (!(format_char == 'e' || format_char == 'E' ||
747           format_char == 'f' || format_char == 'F' ||
748           format_char == 'g' || format_char == 'G' ||
749           format_char == 'Z'))
750         return NULL;
751 
752     /* Map 'Z' format_char to 'g', by copying the format string and
753        replacing the final char with a 'g' */
754     if (format_char == 'Z') {
755         if (format_len + 1 >= sizeof(tmp_format)) {
756             /* The format won't fit in our copy.  Error out.  In
757                practice, this will never happen and will be
758                detected by returning NULL */
759             return NULL;
760         }
761         strcpy(tmp_format, format);
762         tmp_format[format_len - 1] = 'g';
763         format = tmp_format;
764     }
765 
766 
767     /* Have PyOS_snprintf do the hard work */
768     PyOS_snprintf(buffer, buf_size, format, d);
769 
770     /* Do various fixups on the return string */
771 
772     /* Get the current locale, and find the decimal point string.
773        Convert that string back to a dot. */
774     change_decimal_from_locale_to_dot(buffer);
775 
776     /* If an exponent exists, ensure that the exponent is at least
777        MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
778        for the extra zeros.  Also, if there are more than
779        MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
780        back to MIN_EXPONENT_DIGITS */
781     ensure_minimum_exponent_length(buffer, buf_size);
782 
783     /* If format_char is 'Z', make sure we have at least one character
784        after the decimal point (and make sure we have a decimal point);
785        also switch to exponential notation in some edge cases where the
786        extra character would produce more significant digits that we
787        really want. */
788     if (format_char == 'Z')
789         buffer = ensure_decimal_point(buffer, buf_size, precision);
790 
791     return buffer;
792 }
793 
794 /* The fallback code to use if _Py_dg_dtoa is not available. */
795 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)796 char * PyOS_double_to_string(double val,
797                                          char format_code,
798                                          int precision,
799                                          int flags,
800                                          int *type)
801 {
802     char format[32];
803     Py_ssize_t bufsize;
804     char *buf;
805     int t, exp;
806     int upper = 0;
807 
808     /* Validate format_code, and map upper and lower case */
809     switch (format_code) {
810     case 'e':          /* exponent */
811     case 'f':          /* fixed */
812     case 'g':          /* general */
813         break;
814     case 'E':
815         upper = 1;
816         format_code = 'e';
817         break;
818     case 'F':
819         upper = 1;
820         format_code = 'f';
821         break;
822     case 'G':
823         upper = 1;
824         format_code = 'g';
825         break;
826     case 'r':          /* repr format */
827         /* Supplied precision is unused, must be 0. */
828         if (precision != 0) {
829             PyErr_BadInternalCall();
830             return NULL;
831         }
832         /* The repr() precision (17 significant decimal digits) is the
833            minimal number that is guaranteed to have enough precision
834            so that if the number is read back in the exact same binary
835            value is recreated.  This is true for IEEE floating point
836            by design, and also happens to work for all other modern
837            hardware. */
838         precision = 17;
839         format_code = 'g';
840         break;
841     default:
842         PyErr_BadInternalCall();
843         return NULL;
844     }
845 
846     /* Here's a quick-and-dirty calculation to figure out how big a buffer
847        we need.  In general, for a finite float we need:
848 
849          1 byte for each digit of the decimal significand, and
850 
851          1 for a possible sign
852          1 for a possible decimal point
853          2 for a possible [eE][+-]
854          1 for each digit of the exponent;  if we allow 19 digits
855            total then we're safe up to exponents of 2**63.
856          1 for the trailing nul byte
857 
858        This gives a total of 24 + the number of digits in the significand,
859        and the number of digits in the significand is:
860 
861          for 'g' format: at most precision, except possibly
862            when precision == 0, when it's 1.
863          for 'e' format: precision+1
864          for 'f' format: precision digits after the point, at least 1
865            before.  To figure out how many digits appear before the point
866            we have to examine the size of the number.  If fabs(val) < 1.0
867            then there will be only one digit before the point.  If
868            fabs(val) >= 1.0, then there are at most
869 
870          1+floor(log10(ceiling(fabs(val))))
871 
872            digits before the point (where the 'ceiling' allows for the
873            possibility that the rounding rounds the integer part of val
874            up).  A safe upper bound for the above quantity is
875            1+floor(exp/3), where exp is the unique integer such that 0.5
876            <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
877            frexp.
878 
879        So we allow room for precision+1 digits for all formats, plus an
880        extra floor(exp/3) digits for 'f' format.
881 
882     */
883 
884     if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
885         /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
886         bufsize = 5;
887     else {
888         bufsize = 25 + precision;
889         if (format_code == 'f' && fabs(val) >= 1.0) {
890             frexp(val, &exp);
891             bufsize += exp/3;
892         }
893     }
894 
895     buf = PyMem_Malloc(bufsize);
896     if (buf == NULL) {
897         PyErr_NoMemory();
898         return NULL;
899     }
900 
901     /* Handle nan and inf. */
902     if (Py_IS_NAN(val)) {
903         strcpy(buf, "nan");
904         t = Py_DTST_NAN;
905     } else if (Py_IS_INFINITY(val)) {
906         if (copysign(1., val) == 1.)
907             strcpy(buf, "inf");
908         else
909             strcpy(buf, "-inf");
910         t = Py_DTST_INFINITE;
911     } else {
912         t = Py_DTST_FINITE;
913         if (flags & Py_DTSF_ADD_DOT_0)
914             format_code = 'Z';
915 
916         PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
917                       (flags & Py_DTSF_ALT ? "#" : ""), precision,
918                       format_code);
919         _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
920     }
921 
922     /* Add sign when requested.  It's convenient (esp. when formatting
923      complex numbers) to include a sign even for inf and nan. */
924     if (flags & Py_DTSF_SIGN && buf[0] != '-') {
925         size_t len = strlen(buf);
926         /* the bufsize calculations above should ensure that we've got
927            space to add a sign */
928         assert((size_t)bufsize >= len+2);
929         memmove(buf+1, buf, len+1);
930         buf[0] = '+';
931     }
932     if (upper) {
933         /* Convert to upper case. */
934         char *p1;
935         for (p1 = buf; *p1; p1++)
936             *p1 = Py_TOUPPER(*p1);
937     }
938 
939     if (type)
940         *type = t;
941     return buf;
942 }
943 
944 #else
945 
946 /* _Py_dg_dtoa is available. */
947 
948 /* I'm using a lookup table here so that I don't have to invent a non-locale
949    specific way to convert to uppercase */
950 #define OFS_INF 0
951 #define OFS_NAN 1
952 #define OFS_E 2
953 
954 /* The lengths of these are known to the code below, so don't change them */
955 static const char * const lc_float_strings[] = {
956     "inf",
957     "nan",
958     "e",
959 };
960 static const char * const uc_float_strings[] = {
961     "INF",
962     "NAN",
963     "E",
964 };
965 
966 
967 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
968    memory contain the resulting string.
969 
970    Arguments:
971      d is the double to be converted
972      format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
973        correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
974      mode is one of '0', '2' or '3', and is completely determined by
975        format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
976      precision is the desired precision
977      always_add_sign is nonzero if a '+' sign should be included for positive
978        numbers
979      add_dot_0_if_integer is nonzero if integers in non-exponential form
980        should have ".0" added.  Only applies to format codes 'r' and 'g'.
981      use_alt_formatting is nonzero if alternative formatting should be
982        used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
983        at most one of use_alt_formatting and add_dot_0_if_integer should
984        be nonzero.
985      type, if non-NULL, will be set to one of these constants to identify
986        the type of the 'd' argument:
987      Py_DTST_FINITE
988      Py_DTST_INFINITE
989      Py_DTST_NAN
990 
991    Returns a PyMem_Malloc'd block of memory containing the resulting string,
992     or NULL on error. If NULL is returned, the Python error has been set.
993  */
994 
995 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,const char * const * float_strings,int * type)996 format_float_short(double d, char format_code,
997                    int mode, int precision,
998                    int always_add_sign, int add_dot_0_if_integer,
999                    int use_alt_formatting, const char * const *float_strings,
1000                    int *type)
1001 {
1002     char *buf = NULL;
1003     char *p = NULL;
1004     Py_ssize_t bufsize = 0;
1005     char *digits, *digits_end;
1006     int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1007     Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1008     _Py_SET_53BIT_PRECISION_HEADER;
1009 
1010     /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1011        Must be matched by a call to _Py_dg_freedtoa. */
1012     _Py_SET_53BIT_PRECISION_START;
1013     digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1014                          &digits_end);
1015     _Py_SET_53BIT_PRECISION_END;
1016 
1017     decpt = (Py_ssize_t)decpt_as_int;
1018     if (digits == NULL) {
1019         /* The only failure mode is no memory. */
1020         PyErr_NoMemory();
1021         goto exit;
1022     }
1023     assert(digits_end != NULL && digits_end >= digits);
1024     digits_len = digits_end - digits;
1025 
1026     if (digits_len && !Py_ISDIGIT(digits[0])) {
1027         /* Infinities and nans here; adapt Gay's output,
1028            so convert Infinity to inf and NaN to nan, and
1029            ignore sign of nan. Then return. */
1030 
1031         /* ignore the actual sign of a nan */
1032         if (digits[0] == 'n' || digits[0] == 'N')
1033             sign = 0;
1034 
1035         /* We only need 5 bytes to hold the result "+inf\0" . */
1036         bufsize = 5; /* Used later in an assert. */
1037         buf = (char *)PyMem_Malloc(bufsize);
1038         if (buf == NULL) {
1039             PyErr_NoMemory();
1040             goto exit;
1041         }
1042         p = buf;
1043 
1044         if (sign == 1) {
1045             *p++ = '-';
1046         }
1047         else if (always_add_sign) {
1048             *p++ = '+';
1049         }
1050         if (digits[0] == 'i' || digits[0] == 'I') {
1051             strncpy(p, float_strings[OFS_INF], 3);
1052             p += 3;
1053 
1054             if (type)
1055                 *type = Py_DTST_INFINITE;
1056         }
1057         else if (digits[0] == 'n' || digits[0] == 'N') {
1058             strncpy(p, float_strings[OFS_NAN], 3);
1059             p += 3;
1060 
1061             if (type)
1062                 *type = Py_DTST_NAN;
1063         }
1064         else {
1065             /* shouldn't get here: Gay's code should always return
1066                something starting with a digit, an 'I',  or 'N' */
1067             Py_UNREACHABLE();
1068         }
1069         goto exit;
1070     }
1071 
1072     /* The result must be finite (not inf or nan). */
1073     if (type)
1074         *type = Py_DTST_FINITE;
1075 
1076 
1077     /* We got digits back, format them.  We may need to pad 'digits'
1078        either on the left or right (or both) with extra zeros, so in
1079        general the resulting string has the form
1080 
1081          [<sign>]<zeros><digits><zeros>[<exponent>]
1082 
1083        where either of the <zeros> pieces could be empty, and there's a
1084        decimal point that could appear either in <digits> or in the
1085        leading or trailing <zeros>.
1086 
1087        Imagine an infinite 'virtual' string vdigits, consisting of the
1088        string 'digits' (starting at index 0) padded on both the left and
1089        right with infinite strings of zeros.  We want to output a slice
1090 
1091          vdigits[vdigits_start : vdigits_end]
1092 
1093        of this virtual string.  Thus if vdigits_start < 0 then we'll end
1094        up producing some leading zeros; if vdigits_end > digits_len there
1095        will be trailing zeros in the output.  The next section of code
1096        determines whether to use an exponent or not, figures out the
1097        position 'decpt' of the decimal point, and computes 'vdigits_start'
1098        and 'vdigits_end'. */
1099     vdigits_end = digits_len;
1100     switch (format_code) {
1101     case 'e':
1102         use_exp = 1;
1103         vdigits_end = precision;
1104         break;
1105     case 'f':
1106         vdigits_end = decpt + precision;
1107         break;
1108     case 'g':
1109         if (decpt <= -4 || decpt >
1110             (add_dot_0_if_integer ? precision-1 : precision))
1111             use_exp = 1;
1112         if (use_alt_formatting)
1113             vdigits_end = precision;
1114         break;
1115     case 'r':
1116         /* convert to exponential format at 1e16.  We used to convert
1117            at 1e17, but that gives odd-looking results for some values
1118            when a 16-digit 'shortest' repr is padded with bogus zeros.
1119            For example, repr(2e16+8) would give 20000000000000010.0;
1120            the true value is 20000000000000008.0. */
1121         if (decpt <= -4 || decpt > 16)
1122             use_exp = 1;
1123         break;
1124     default:
1125         PyErr_BadInternalCall();
1126         goto exit;
1127     }
1128 
1129     /* if using an exponent, reset decimal point position to 1 and adjust
1130        exponent accordingly.*/
1131     if (use_exp) {
1132         exp = (int)decpt - 1;
1133         decpt = 1;
1134     }
1135     /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1136        decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1137     vdigits_start = decpt <= 0 ? decpt-1 : 0;
1138     if (!use_exp && add_dot_0_if_integer)
1139         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1140     else
1141         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1142 
1143     /* double check inequalities */
1144     assert(vdigits_start <= 0 &&
1145            0 <= digits_len &&
1146            digits_len <= vdigits_end);
1147     /* decimal point should be in (vdigits_start, vdigits_end] */
1148     assert(vdigits_start < decpt && decpt <= vdigits_end);
1149 
1150     /* Compute an upper bound how much memory we need. This might be a few
1151        chars too long, but no big deal. */
1152     bufsize =
1153         /* sign, decimal point and trailing 0 byte */
1154         3 +
1155 
1156         /* total digit count (including zero padding on both sides) */
1157         (vdigits_end - vdigits_start) +
1158 
1159         /* exponent "e+100", max 3 numerical digits */
1160         (use_exp ? 5 : 0);
1161 
1162     /* Now allocate the memory and initialize p to point to the start of
1163        it. */
1164     buf = (char *)PyMem_Malloc(bufsize);
1165     if (buf == NULL) {
1166         PyErr_NoMemory();
1167         goto exit;
1168     }
1169     p = buf;
1170 
1171     /* Add a negative sign if negative, and a plus sign if non-negative
1172        and always_add_sign is true. */
1173     if (sign == 1)
1174         *p++ = '-';
1175     else if (always_add_sign)
1176         *p++ = '+';
1177 
1178     /* note that exactly one of the three 'if' conditions is true,
1179        so we include exactly one decimal point */
1180     /* Zero padding on left of digit string */
1181     if (decpt <= 0) {
1182         memset(p, '0', decpt-vdigits_start);
1183         p += decpt - vdigits_start;
1184         *p++ = '.';
1185         memset(p, '0', 0-decpt);
1186         p += 0-decpt;
1187     }
1188     else {
1189         memset(p, '0', 0-vdigits_start);
1190         p += 0 - vdigits_start;
1191     }
1192 
1193     /* Digits, with included decimal point */
1194     if (0 < decpt && decpt <= digits_len) {
1195         strncpy(p, digits, decpt-0);
1196         p += decpt-0;
1197         *p++ = '.';
1198         strncpy(p, digits+decpt, digits_len-decpt);
1199         p += digits_len-decpt;
1200     }
1201     else {
1202         strncpy(p, digits, digits_len);
1203         p += digits_len;
1204     }
1205 
1206     /* And zeros on the right */
1207     if (digits_len < decpt) {
1208         memset(p, '0', decpt-digits_len);
1209         p += decpt-digits_len;
1210         *p++ = '.';
1211         memset(p, '0', vdigits_end-decpt);
1212         p += vdigits_end-decpt;
1213     }
1214     else {
1215         memset(p, '0', vdigits_end-digits_len);
1216         p += vdigits_end-digits_len;
1217     }
1218 
1219     /* Delete a trailing decimal pt unless using alternative formatting. */
1220     if (p[-1] == '.' && !use_alt_formatting)
1221         p--;
1222 
1223     /* Now that we've done zero padding, add an exponent if needed. */
1224     if (use_exp) {
1225         *p++ = float_strings[OFS_E][0];
1226         exp_len = sprintf(p, "%+.02d", exp);
1227         p += exp_len;
1228     }
1229   exit:
1230     if (buf) {
1231         *p = '\0';
1232         /* It's too late if this fails, as we've already stepped on
1233            memory that isn't ours. But it's an okay debugging test. */
1234         assert(p-buf < bufsize);
1235     }
1236     if (digits)
1237         _Py_dg_freedtoa(digits);
1238 
1239     return buf;
1240 }
1241 
1242 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1243 char * PyOS_double_to_string(double val,
1244                                          char format_code,
1245                                          int precision,
1246                                          int flags,
1247                                          int *type)
1248 {
1249     const char * const *float_strings = lc_float_strings;
1250     int mode;
1251 
1252     /* Validate format_code, and map upper and lower case. Compute the
1253        mode and make any adjustments as needed. */
1254     switch (format_code) {
1255     /* exponent */
1256     case 'E':
1257         float_strings = uc_float_strings;
1258         format_code = 'e';
1259         /* Fall through. */
1260     case 'e':
1261         mode = 2;
1262         precision++;
1263         break;
1264 
1265     /* fixed */
1266     case 'F':
1267         float_strings = uc_float_strings;
1268         format_code = 'f';
1269         /* Fall through. */
1270     case 'f':
1271         mode = 3;
1272         break;
1273 
1274     /* general */
1275     case 'G':
1276         float_strings = uc_float_strings;
1277         format_code = 'g';
1278         /* Fall through. */
1279     case 'g':
1280         mode = 2;
1281         /* precision 0 makes no sense for 'g' format; interpret as 1 */
1282         if (precision == 0)
1283             precision = 1;
1284         break;
1285 
1286     /* repr format */
1287     case 'r':
1288         mode = 0;
1289         /* Supplied precision is unused, must be 0. */
1290         if (precision != 0) {
1291             PyErr_BadInternalCall();
1292             return NULL;
1293         }
1294         break;
1295 
1296     default:
1297         PyErr_BadInternalCall();
1298         return NULL;
1299     }
1300 
1301     return format_float_short(val, format_code, mode, precision,
1302                               flags & Py_DTSF_SIGN,
1303                               flags & Py_DTSF_ADD_DOT_0,
1304                               flags & Py_DTSF_ALT,
1305                               float_strings, type);
1306 }
1307 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
1308