• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: C; c-file-style: "python" -*- */
2 
3 #include <Python.h>
4 #include <locale.h>
5 
6 /* Case-insensitive string match used for nan and inf detection; t should be
7    lower-case.  Returns 1 for a successful match, 0 otherwise. */
8 
9 static int
case_insensitive_match(const char * s,const char * t)10 case_insensitive_match(const char *s, const char *t)
11 {
12     while(*t && Py_TOLOWER(*s) == *t) {
13         s++;
14         t++;
15     }
16     return *t ? 0 : 1;
17 }
18 
19 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20    "infinity", with an optional leading sign of "+" or "-".  On success,
21    return the NaN or Infinity as a double and set *endptr to point just beyond
22    the successfully parsed portion of the string.  On failure, return -1.0 and
23    set *endptr to point to the start of the string. */
24 
25 #ifndef PY_NO_SHORT_FLOAT_REPR
26 
27 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)28 _Py_parse_inf_or_nan(const char *p, char **endptr)
29 {
30     double retval;
31     const char *s;
32     int negate = 0;
33 
34     s = p;
35     if (*s == '-') {
36         negate = 1;
37         s++;
38     }
39     else if (*s == '+') {
40         s++;
41     }
42     if (case_insensitive_match(s, "inf")) {
43         s += 3;
44         if (case_insensitive_match(s, "inity"))
45             s += 5;
46         retval = _Py_dg_infinity(negate);
47     }
48     else if (case_insensitive_match(s, "nan")) {
49         s += 3;
50         retval = _Py_dg_stdnan(negate);
51     }
52     else {
53         s = p;
54         retval = -1.0;
55     }
56     *endptr = (char *)s;
57     return retval;
58 }
59 
60 #else
61 
62 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)63 _Py_parse_inf_or_nan(const char *p, char **endptr)
64 {
65     double retval;
66     const char *s;
67     int negate = 0;
68 
69     s = p;
70     if (*s == '-') {
71         negate = 1;
72         s++;
73     }
74     else if (*s == '+') {
75         s++;
76     }
77     if (case_insensitive_match(s, "inf")) {
78         s += 3;
79         if (case_insensitive_match(s, "inity"))
80             s += 5;
81         retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82     }
83 #ifdef Py_NAN
84     else if (case_insensitive_match(s, "nan")) {
85         s += 3;
86         retval = negate ? -Py_NAN : Py_NAN;
87     }
88 #endif
89     else {
90         s = p;
91         retval = -1.0;
92     }
93     *endptr = (char *)s;
94     return retval;
95 }
96 
97 #endif
98 
99 /**
100  * _PyOS_ascii_strtod:
101  * @nptr:    the string to convert to a numeric value.
102  * @endptr:  if non-%NULL, it returns the character after
103  *           the last character used in the conversion.
104  *
105  * Converts a string to a #gdouble value.
106  * This function behaves like the standard strtod() function
107  * does in the C locale. It does this without actually
108  * changing the current locale, since that would not be
109  * thread-safe.
110  *
111  * This function is typically used when reading configuration
112  * files or other non-user input that should be locale independent.
113  * To handle input from the user you should normally use the
114  * locale-sensitive system strtod() function.
115  *
116  * If the correct value would cause overflow, plus or minus %HUGE_VAL
117  * is returned (according to the sign of the value), and %ERANGE is
118  * stored in %errno. If the correct value would cause underflow,
119  * zero is returned and %ERANGE is stored in %errno.
120  * If memory allocation fails, %ENOMEM is stored in %errno.
121  *
122  * This function resets %errno before calling strtod() so that
123  * you can reliably detect overflow and underflow.
124  *
125  * Return value: the #gdouble value.
126  **/
127 
128 #ifndef PY_NO_SHORT_FLOAT_REPR
129 
130 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)131 _PyOS_ascii_strtod(const char *nptr, char **endptr)
132 {
133     double result;
134     _Py_SET_53BIT_PRECISION_HEADER;
135 
136     assert(nptr != NULL);
137     /* Set errno to zero, so that we can distinguish zero results
138        and underflows */
139     errno = 0;
140 
141     _Py_SET_53BIT_PRECISION_START;
142     result = _Py_dg_strtod(nptr, endptr);
143     _Py_SET_53BIT_PRECISION_END;
144 
145     if (*endptr == nptr)
146         /* string might represent an inf or nan */
147         result = _Py_parse_inf_or_nan(nptr, endptr);
148 
149     return result;
150 
151 }
152 
153 #else
154 
155 /*
156    Use system strtod;  since strtod is locale aware, we may
157    have to first fix the decimal separator.
158 
159    Note that unlike _Py_dg_strtod, the system strtod may not always give
160    correctly rounded results.
161 */
162 
163 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)164 _PyOS_ascii_strtod(const char *nptr, char **endptr)
165 {
166     char *fail_pos;
167     double val;
168     struct lconv *locale_data;
169     const char *decimal_point;
170     size_t decimal_point_len;
171     const char *p, *decimal_point_pos;
172     const char *end = NULL; /* Silence gcc */
173     const char *digits_pos = NULL;
174     int negate = 0;
175 
176     assert(nptr != NULL);
177 
178     fail_pos = NULL;
179 
180     locale_data = localeconv();
181     decimal_point = locale_data->decimal_point;
182     decimal_point_len = strlen(decimal_point);
183 
184     assert(decimal_point_len != 0);
185 
186     decimal_point_pos = NULL;
187 
188     /* Parse infinities and nans */
189     val = _Py_parse_inf_or_nan(nptr, endptr);
190     if (*endptr != nptr)
191         return val;
192 
193     /* Set errno to zero, so that we can distinguish zero results
194        and underflows */
195     errno = 0;
196 
197     /* We process the optional sign manually, then pass the remainder to
198        the system strtod.  This ensures that the result of an underflow
199        has the correct sign. (bug #1725)  */
200     p = nptr;
201     /* Process leading sign, if present */
202     if (*p == '-') {
203         negate = 1;
204         p++;
205     }
206     else if (*p == '+') {
207         p++;
208     }
209 
210     /* Some platform strtods accept hex floats; Python shouldn't (at the
211        moment), so we check explicitly for strings starting with '0x'. */
212     if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213         goto invalid_string;
214 
215     /* Check that what's left begins with a digit or decimal point */
216     if (!Py_ISDIGIT(*p) && *p != '.')
217         goto invalid_string;
218 
219     digits_pos = p;
220     if (decimal_point[0] != '.' ||
221         decimal_point[1] != 0)
222     {
223         /* Look for a '.' in the input; if present, it'll need to be
224            swapped for the current locale's decimal point before we
225            call strtod.  On the other hand, if we find the current
226            locale's decimal point then the input is invalid. */
227         while (Py_ISDIGIT(*p))
228             p++;
229 
230         if (*p == '.')
231         {
232             decimal_point_pos = p++;
233 
234             /* locate end of number */
235             while (Py_ISDIGIT(*p))
236                 p++;
237 
238             if (*p == 'e' || *p == 'E')
239                 p++;
240             if (*p == '+' || *p == '-')
241                 p++;
242             while (Py_ISDIGIT(*p))
243                 p++;
244             end = p;
245         }
246         else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247             /* Python bug #1417699 */
248             goto invalid_string;
249         /* For the other cases, we need not convert the decimal
250            point */
251     }
252 
253     if (decimal_point_pos) {
254         char *copy, *c;
255         /* Create a copy of the input, with the '.' converted to the
256            locale-specific decimal point */
257         copy = (char *)PyMem_MALLOC(end - digits_pos +
258                                     1 + decimal_point_len);
259         if (copy == NULL) {
260             *endptr = (char *)nptr;
261             errno = ENOMEM;
262             return val;
263         }
264 
265         c = copy;
266         memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267         c += decimal_point_pos - digits_pos;
268         memcpy(c, decimal_point, decimal_point_len);
269         c += decimal_point_len;
270         memcpy(c, decimal_point_pos + 1,
271                end - (decimal_point_pos + 1));
272         c += end - (decimal_point_pos + 1);
273         *c = 0;
274 
275         val = strtod(copy, &fail_pos);
276 
277         if (fail_pos)
278         {
279             if (fail_pos > decimal_point_pos)
280                 fail_pos = (char *)digits_pos +
281                     (fail_pos - copy) -
282                     (decimal_point_len - 1);
283             else
284                 fail_pos = (char *)digits_pos +
285                     (fail_pos - copy);
286         }
287 
288         PyMem_FREE(copy);
289 
290     }
291     else {
292         val = strtod(digits_pos, &fail_pos);
293     }
294 
295     if (fail_pos == digits_pos)
296         goto invalid_string;
297 
298     if (negate && fail_pos != nptr)
299         val = -val;
300     *endptr = fail_pos;
301 
302     return val;
303 
304   invalid_string:
305     *endptr = (char*)nptr;
306     errno = EINVAL;
307     return -1.0;
308 }
309 
310 #endif
311 
312 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313    as a string of ASCII characters) to a float.  The string should not have
314    leading or trailing whitespace.  The conversion is independent of the
315    current locale.
316 
317    If endptr is NULL, try to convert the whole string.  Raise ValueError and
318    return -1.0 if the string is not a valid representation of a floating-point
319    number.
320 
321    If endptr is non-NULL, try to convert as much of the string as possible.
322    If no initial segment of the string is the valid representation of a
323    floating-point number then *endptr is set to point to the beginning of the
324    string, -1.0 is returned and again ValueError is raised.
325 
326    On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327    if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328    exception is raised.  Otherwise, overflow_exception should point to
329    a Python exception, this exception will be raised, -1.0 will be returned,
330    and *endptr will point just past the end of the converted value.
331 
332    If any other failure occurs (for example lack of memory), -1.0 is returned
333    and the appropriate Python exception will have been set.
334 */
335 
336 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)337 PyOS_string_to_double(const char *s,
338                       char **endptr,
339                       PyObject *overflow_exception)
340 {
341     double x, result=-1.0;
342     char *fail_pos;
343 
344     errno = 0;
345     PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346     x = _PyOS_ascii_strtod(s, &fail_pos);
347     PyFPE_END_PROTECT(x)
348 
349     if (errno == ENOMEM) {
350         PyErr_NoMemory();
351         fail_pos = (char *)s;
352     }
353     else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354         PyErr_Format(PyExc_ValueError,
355                       "could not convert string to float: "
356                       "%.200s", s);
357     else if (fail_pos == s)
358         PyErr_Format(PyExc_ValueError,
359                       "could not convert string to float: "
360                       "%.200s", s);
361     else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362         PyErr_Format(overflow_exception,
363                       "value too large to convert to float: "
364                       "%.200s", s);
365     else
366         result = x;
367 
368     if (endptr != NULL)
369         *endptr = fail_pos;
370     return result;
371 }
372 
373 /* Remove underscores that follow the underscore placement rule from
374    the string and then call the `innerfunc` function on the result.
375    It should return a new object or NULL on exception.
376 
377    `what` is used for the error message emitted when underscores are detected
378    that don't follow the rule. `arg` is an opaque pointer passed to the inner
379    function.
380 
381    This is used to implement underscore-agnostic conversion for floats
382    and complex numbers.
383 */
384 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))385 _Py_string_to_number_with_underscores(
386     const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387     PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388 {
389     char prev;
390     const char *p, *last;
391     char *dup, *end;
392     PyObject *result;
393 
394     if (strchr(s, '_') == NULL) {
395         return innerfunc(s, orig_len, arg);
396     }
397 
398     dup = PyMem_Malloc(orig_len + 1);
399     end = dup;
400     prev = '\0';
401     last = s + orig_len;
402     for (p = s; *p; p++) {
403         if (*p == '_') {
404             /* Underscores are only allowed after digits. */
405             if (!(prev >= '0' && prev <= '9')) {
406                 goto error;
407             }
408         }
409         else {
410             *end++ = *p;
411             /* Underscores are only allowed before digits. */
412             if (prev == '_' && !(*p >= '0' && *p <= '9')) {
413                 goto error;
414             }
415         }
416         prev = *p;
417     }
418     /* Underscores are not allowed at the end. */
419     if (prev == '_') {
420         goto error;
421     }
422     /* No embedded NULs allowed. */
423     if (p != last) {
424         goto error;
425     }
426     *end = '\0';
427     result = innerfunc(dup, end - dup, arg);
428     PyMem_Free(dup);
429     return result;
430 
431   error:
432     PyMem_Free(dup);
433     PyErr_Format(PyExc_ValueError,
434 		 "could not convert string to %s: "
435 		 "%R", what, obj);
436     return NULL;
437 }
438 
439 #ifdef PY_NO_SHORT_FLOAT_REPR
440 
441 /* Given a string that may have a decimal point in the current
442    locale, change it back to a dot.  Since the string cannot get
443    longer, no need for a maximum buffer size parameter. */
444 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)445 change_decimal_from_locale_to_dot(char* buffer)
446 {
447     struct lconv *locale_data = localeconv();
448     const char *decimal_point = locale_data->decimal_point;
449 
450     if (decimal_point[0] != '.' || decimal_point[1] != 0) {
451         size_t decimal_point_len = strlen(decimal_point);
452 
453         if (*buffer == '+' || *buffer == '-')
454             buffer++;
455         while (Py_ISDIGIT(*buffer))
456             buffer++;
457         if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
458             *buffer = '.';
459             buffer++;
460             if (decimal_point_len > 1) {
461                 /* buffer needs to get smaller */
462                 size_t rest_len = strlen(buffer +
463                                      (decimal_point_len - 1));
464                 memmove(buffer,
465                     buffer + (decimal_point_len - 1),
466                     rest_len);
467                 buffer[rest_len] = 0;
468             }
469         }
470     }
471 }
472 
473 
474 /* From the C99 standard, section 7.19.6:
475 The exponent always contains at least two digits, and only as many more digits
476 as necessary to represent the exponent.
477 */
478 #define MIN_EXPONENT_DIGITS 2
479 
480 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
481    in length. */
482 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)483 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
484 {
485     char *p = strpbrk(buffer, "eE");
486     if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
487         char *start = p + 2;
488         int exponent_digit_cnt = 0;
489         int leading_zero_cnt = 0;
490         int in_leading_zeros = 1;
491         int significant_digit_cnt;
492 
493         /* Skip over the exponent and the sign. */
494         p += 2;
495 
496         /* Find the end of the exponent, keeping track of leading
497            zeros. */
498         while (*p && Py_ISDIGIT(*p)) {
499             if (in_leading_zeros && *p == '0')
500                 ++leading_zero_cnt;
501             if (*p != '0')
502                 in_leading_zeros = 0;
503             ++p;
504             ++exponent_digit_cnt;
505         }
506 
507         significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
508         if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
509             /* If there are 2 exactly digits, we're done,
510                regardless of what they contain */
511         }
512         else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
513             int extra_zeros_cnt;
514 
515             /* There are more than 2 digits in the exponent.  See
516                if we can delete some of the leading zeros */
517             if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
518                 significant_digit_cnt = MIN_EXPONENT_DIGITS;
519             extra_zeros_cnt = exponent_digit_cnt -
520                 significant_digit_cnt;
521 
522             /* Delete extra_zeros_cnt worth of characters from the
523                front of the exponent */
524             assert(extra_zeros_cnt >= 0);
525 
526             /* Add one to significant_digit_cnt to copy the
527                trailing 0 byte, thus setting the length */
528             memmove(start,
529                 start + extra_zeros_cnt,
530                 significant_digit_cnt + 1);
531         }
532         else {
533             /* If there are fewer than 2 digits, add zeros
534                until there are 2, if there's enough room */
535             int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
536             if (start + zeros + exponent_digit_cnt + 1
537                   < buffer + buf_size) {
538                 memmove(start + zeros, start,
539                     exponent_digit_cnt + 1);
540                 memset(start, '0', zeros);
541             }
542         }
543     }
544 }
545 
546 /* Remove trailing zeros after the decimal point from a numeric string; also
547    remove the decimal point if all digits following it are zero.  The numeric
548    string must end in '\0', and should not have any leading or trailing
549    whitespace.  Assumes that the decimal point is '.'. */
550 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)551 remove_trailing_zeros(char *buffer)
552 {
553     char *old_fraction_end, *new_fraction_end, *end, *p;
554 
555     p = buffer;
556     if (*p == '-' || *p == '+')
557         /* Skip leading sign, if present */
558         ++p;
559     while (Py_ISDIGIT(*p))
560         ++p;
561 
562     /* if there's no decimal point there's nothing to do */
563     if (*p++ != '.')
564         return;
565 
566     /* scan any digits after the point */
567     while (Py_ISDIGIT(*p))
568         ++p;
569     old_fraction_end = p;
570 
571     /* scan up to ending '\0' */
572     while (*p != '\0')
573         p++;
574     /* +1 to make sure that we move the null byte as well */
575     end = p+1;
576 
577     /* scan back from fraction_end, looking for removable zeros */
578     p = old_fraction_end;
579     while (*(p-1) == '0')
580         --p;
581     /* and remove point if we've got that far */
582     if (*(p-1) == '.')
583         --p;
584     new_fraction_end = p;
585 
586     memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
587 }
588 
589 /* Ensure that buffer has a decimal point in it.  The decimal point will not
590    be in the current locale, it will always be '.'. Don't add a decimal point
591    if an exponent is present.  Also, convert to exponential notation where
592    adding a '.0' would produce too many significant digits (see issue 5864).
593 
594    Returns a pointer to the fixed buffer, or NULL on failure.
595 */
596 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)597 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
598 {
599     int digit_count, insert_count = 0, convert_to_exp = 0;
600     char *chars_to_insert, *digits_start;
601 
602     /* search for the first non-digit character */
603     char *p = buffer;
604     if (*p == '-' || *p == '+')
605         /* Skip leading sign, if present.  I think this could only
606            ever be '-', but it can't hurt to check for both. */
607         ++p;
608     digits_start = p;
609     while (*p && Py_ISDIGIT(*p))
610         ++p;
611     digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
612 
613     if (*p == '.') {
614         if (Py_ISDIGIT(*(p+1))) {
615             /* Nothing to do, we already have a decimal
616                point and a digit after it */
617         }
618         else {
619             /* We have a decimal point, but no following
620                digit.  Insert a zero after the decimal. */
621             /* can't ever get here via PyOS_double_to_string */
622             assert(precision == -1);
623             ++p;
624             chars_to_insert = "0";
625             insert_count = 1;
626         }
627     }
628     else if (!(*p == 'e' || *p == 'E')) {
629         /* Don't add ".0" if we have an exponent. */
630         if (digit_count == precision) {
631             /* issue 5864: don't add a trailing .0 in the case
632                where the '%g'-formatted result already has as many
633                significant digits as were requested.  Switch to
634                exponential notation instead. */
635             convert_to_exp = 1;
636             /* no exponent, no point, and we shouldn't land here
637                for infs and nans, so we must be at the end of the
638                string. */
639             assert(*p == '\0');
640         }
641         else {
642             assert(precision == -1 || digit_count < precision);
643             chars_to_insert = ".0";
644             insert_count = 2;
645         }
646     }
647     if (insert_count) {
648         size_t buf_len = strlen(buffer);
649         if (buf_len + insert_count + 1 >= buf_size) {
650             /* If there is not enough room in the buffer
651                for the additional text, just skip it.  It's
652                not worth generating an error over. */
653         }
654         else {
655             memmove(p + insert_count, p,
656                 buffer + strlen(buffer) - p + 1);
657             memcpy(p, chars_to_insert, insert_count);
658         }
659     }
660     if (convert_to_exp) {
661         int written;
662         size_t buf_avail;
663         p = digits_start;
664         /* insert decimal point */
665         assert(digit_count >= 1);
666         memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
667         p[1] = '.';
668         p += digit_count+1;
669         assert(p <= buf_size+buffer);
670         buf_avail = buf_size+buffer-p;
671         if (buf_avail == 0)
672             return NULL;
673         /* Add exponent.  It's okay to use lower case 'e': we only
674            arrive here as a result of using the empty format code or
675            repr/str builtins and those never want an upper case 'E' */
676         written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
677         if (!(0 <= written &&
678               written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
679             /* output truncated, or something else bad happened */
680             return NULL;
681         remove_trailing_zeros(buffer);
682     }
683     return buffer;
684 }
685 
686 /* see FORMATBUFLEN in unicodeobject.c */
687 #define FLOAT_FORMATBUFLEN 120
688 
689 /**
690  * _PyOS_ascii_formatd:
691  * @buffer: A buffer to place the resulting string in
692  * @buf_size: The length of the buffer.
693  * @format: The printf()-style format to use for the
694  *          code to use for converting.
695  * @d: The #gdouble to convert
696  * @precision: The precision to use when formatting.
697  *
698  * Converts a #gdouble to a string, using the '.' as
699  * decimal point. To format the number you pass in
700  * a printf()-style format string. Allowed conversion
701  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
702  *
703  * 'Z' is the same as 'g', except it always has a decimal and
704  *     at least one digit after the decimal.
705  *
706  * Return value: The pointer to the buffer with the converted string.
707  * On failure returns NULL but does not set any Python exception.
708  **/
709 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)710 _PyOS_ascii_formatd(char       *buffer,
711                    size_t      buf_size,
712                    const char *format,
713                    double      d,
714                    int         precision)
715 {
716     char format_char;
717     size_t format_len = strlen(format);
718 
719     /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
720        also with at least one character past the decimal. */
721     char tmp_format[FLOAT_FORMATBUFLEN];
722 
723     /* The last character in the format string must be the format char */
724     format_char = format[format_len - 1];
725 
726     if (format[0] != '%')
727         return NULL;
728 
729     /* I'm not sure why this test is here.  It's ensuring that the format
730        string after the first character doesn't have a single quote, a
731        lowercase l, or a percent. This is the reverse of the commented-out
732        test about 10 lines ago. */
733     if (strpbrk(format + 1, "'l%"))
734         return NULL;
735 
736     /* Also curious about this function is that it accepts format strings
737        like "%xg", which are invalid for floats.  In general, the
738        interface to this function is not very good, but changing it is
739        difficult because it's a public API. */
740 
741     if (!(format_char == 'e' || format_char == 'E' ||
742           format_char == 'f' || format_char == 'F' ||
743           format_char == 'g' || format_char == 'G' ||
744           format_char == 'Z'))
745         return NULL;
746 
747     /* Map 'Z' format_char to 'g', by copying the format string and
748        replacing the final char with a 'g' */
749     if (format_char == 'Z') {
750         if (format_len + 1 >= sizeof(tmp_format)) {
751             /* The format won't fit in our copy.  Error out.  In
752                practice, this will never happen and will be
753                detected by returning NULL */
754             return NULL;
755         }
756         strcpy(tmp_format, format);
757         tmp_format[format_len - 1] = 'g';
758         format = tmp_format;
759     }
760 
761 
762     /* Have PyOS_snprintf do the hard work */
763     PyOS_snprintf(buffer, buf_size, format, d);
764 
765     /* Do various fixups on the return string */
766 
767     /* Get the current locale, and find the decimal point string.
768        Convert that string back to a dot. */
769     change_decimal_from_locale_to_dot(buffer);
770 
771     /* If an exponent exists, ensure that the exponent is at least
772        MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
773        for the extra zeros.  Also, if there are more than
774        MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
775        back to MIN_EXPONENT_DIGITS */
776     ensure_minimum_exponent_length(buffer, buf_size);
777 
778     /* If format_char is 'Z', make sure we have at least one character
779        after the decimal point (and make sure we have a decimal point);
780        also switch to exponential notation in some edge cases where the
781        extra character would produce more significant digits that we
782        really want. */
783     if (format_char == 'Z')
784         buffer = ensure_decimal_point(buffer, buf_size, precision);
785 
786     return buffer;
787 }
788 
789 /* The fallback code to use if _Py_dg_dtoa is not available. */
790 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)791 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
792                                          char format_code,
793                                          int precision,
794                                          int flags,
795                                          int *type)
796 {
797     char format[32];
798     Py_ssize_t bufsize;
799     char *buf;
800     int t, exp;
801     int upper = 0;
802 
803     /* Validate format_code, and map upper and lower case */
804     switch (format_code) {
805     case 'e':          /* exponent */
806     case 'f':          /* fixed */
807     case 'g':          /* general */
808         break;
809     case 'E':
810         upper = 1;
811         format_code = 'e';
812         break;
813     case 'F':
814         upper = 1;
815         format_code = 'f';
816         break;
817     case 'G':
818         upper = 1;
819         format_code = 'g';
820         break;
821     case 'r':          /* repr format */
822         /* Supplied precision is unused, must be 0. */
823         if (precision != 0) {
824             PyErr_BadInternalCall();
825             return NULL;
826         }
827         /* The repr() precision (17 significant decimal digits) is the
828            minimal number that is guaranteed to have enough precision
829            so that if the number is read back in the exact same binary
830            value is recreated.  This is true for IEEE floating point
831            by design, and also happens to work for all other modern
832            hardware. */
833         precision = 17;
834         format_code = 'g';
835         break;
836     default:
837         PyErr_BadInternalCall();
838         return NULL;
839     }
840 
841     /* Here's a quick-and-dirty calculation to figure out how big a buffer
842        we need.  In general, for a finite float we need:
843 
844          1 byte for each digit of the decimal significand, and
845 
846          1 for a possible sign
847          1 for a possible decimal point
848          2 for a possible [eE][+-]
849          1 for each digit of the exponent;  if we allow 19 digits
850            total then we're safe up to exponents of 2**63.
851          1 for the trailing nul byte
852 
853        This gives a total of 24 + the number of digits in the significand,
854        and the number of digits in the significand is:
855 
856          for 'g' format: at most precision, except possibly
857            when precision == 0, when it's 1.
858          for 'e' format: precision+1
859          for 'f' format: precision digits after the point, at least 1
860            before.  To figure out how many digits appear before the point
861            we have to examine the size of the number.  If fabs(val) < 1.0
862            then there will be only one digit before the point.  If
863            fabs(val) >= 1.0, then there are at most
864 
865          1+floor(log10(ceiling(fabs(val))))
866 
867            digits before the point (where the 'ceiling' allows for the
868            possibility that the rounding rounds the integer part of val
869            up).  A safe upper bound for the above quantity is
870            1+floor(exp/3), where exp is the unique integer such that 0.5
871            <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
872            frexp.
873 
874        So we allow room for precision+1 digits for all formats, plus an
875        extra floor(exp/3) digits for 'f' format.
876 
877     */
878 
879     if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
880         /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
881         bufsize = 5;
882     else {
883         bufsize = 25 + precision;
884         if (format_code == 'f' && fabs(val) >= 1.0) {
885             frexp(val, &exp);
886             bufsize += exp/3;
887         }
888     }
889 
890     buf = PyMem_Malloc(bufsize);
891     if (buf == NULL) {
892         PyErr_NoMemory();
893         return NULL;
894     }
895 
896     /* Handle nan and inf. */
897     if (Py_IS_NAN(val)) {
898         strcpy(buf, "nan");
899         t = Py_DTST_NAN;
900     } else if (Py_IS_INFINITY(val)) {
901         if (copysign(1., val) == 1.)
902             strcpy(buf, "inf");
903         else
904             strcpy(buf, "-inf");
905         t = Py_DTST_INFINITE;
906     } else {
907         t = Py_DTST_FINITE;
908         if (flags & Py_DTSF_ADD_DOT_0)
909             format_code = 'Z';
910 
911         PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
912                       (flags & Py_DTSF_ALT ? "#" : ""), precision,
913                       format_code);
914         _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
915     }
916 
917     /* Add sign when requested.  It's convenient (esp. when formatting
918      complex numbers) to include a sign even for inf and nan. */
919     if (flags & Py_DTSF_SIGN && buf[0] != '-') {
920         size_t len = strlen(buf);
921         /* the bufsize calculations above should ensure that we've got
922            space to add a sign */
923         assert((size_t)bufsize >= len+2);
924         memmove(buf+1, buf, len+1);
925         buf[0] = '+';
926     }
927     if (upper) {
928         /* Convert to upper case. */
929         char *p1;
930         for (p1 = buf; *p1; p1++)
931             *p1 = Py_TOUPPER(*p1);
932     }
933 
934     if (type)
935         *type = t;
936     return buf;
937 }
938 
939 #else
940 
941 /* _Py_dg_dtoa is available. */
942 
943 /* I'm using a lookup table here so that I don't have to invent a non-locale
944    specific way to convert to uppercase */
945 #define OFS_INF 0
946 #define OFS_NAN 1
947 #define OFS_E 2
948 
949 /* The lengths of these are known to the code below, so don't change them */
950 static const char * const lc_float_strings[] = {
951     "inf",
952     "nan",
953     "e",
954 };
955 static const char * const uc_float_strings[] = {
956     "INF",
957     "NAN",
958     "E",
959 };
960 
961 
962 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
963    memory contain the resulting string.
964 
965    Arguments:
966      d is the double to be converted
967      format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
968        correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
969      mode is one of '0', '2' or '3', and is completely determined by
970        format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
971      precision is the desired precision
972      always_add_sign is nonzero if a '+' sign should be included for positive
973        numbers
974      add_dot_0_if_integer is nonzero if integers in non-exponential form
975        should have ".0" added.  Only applies to format codes 'r' and 'g'.
976      use_alt_formatting is nonzero if alternative formatting should be
977        used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
978        at most one of use_alt_formatting and add_dot_0_if_integer should
979        be nonzero.
980      type, if non-NULL, will be set to one of these constants to identify
981        the type of the 'd' argument:
982      Py_DTST_FINITE
983      Py_DTST_INFINITE
984      Py_DTST_NAN
985 
986    Returns a PyMem_Malloc'd block of memory containing the resulting string,
987     or NULL on error. If NULL is returned, the Python error has been set.
988  */
989 
990 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,const char * const * float_strings,int * type)991 format_float_short(double d, char format_code,
992                    int mode, int precision,
993                    int always_add_sign, int add_dot_0_if_integer,
994                    int use_alt_formatting, const char * const *float_strings,
995                    int *type)
996 {
997     char *buf = NULL;
998     char *p = NULL;
999     Py_ssize_t bufsize = 0;
1000     char *digits, *digits_end;
1001     int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1002     Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1003     _Py_SET_53BIT_PRECISION_HEADER;
1004 
1005     /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1006        Must be matched by a call to _Py_dg_freedtoa. */
1007     _Py_SET_53BIT_PRECISION_START;
1008     digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1009                          &digits_end);
1010     _Py_SET_53BIT_PRECISION_END;
1011 
1012     decpt = (Py_ssize_t)decpt_as_int;
1013     if (digits == NULL) {
1014         /* The only failure mode is no memory. */
1015         PyErr_NoMemory();
1016         goto exit;
1017     }
1018     assert(digits_end != NULL && digits_end >= digits);
1019     digits_len = digits_end - digits;
1020 
1021     if (digits_len && !Py_ISDIGIT(digits[0])) {
1022         /* Infinities and nans here; adapt Gay's output,
1023            so convert Infinity to inf and NaN to nan, and
1024            ignore sign of nan. Then return. */
1025 
1026         /* ignore the actual sign of a nan */
1027         if (digits[0] == 'n' || digits[0] == 'N')
1028             sign = 0;
1029 
1030         /* We only need 5 bytes to hold the result "+inf\0" . */
1031         bufsize = 5; /* Used later in an assert. */
1032         buf = (char *)PyMem_Malloc(bufsize);
1033         if (buf == NULL) {
1034             PyErr_NoMemory();
1035             goto exit;
1036         }
1037         p = buf;
1038 
1039         if (sign == 1) {
1040             *p++ = '-';
1041         }
1042         else if (always_add_sign) {
1043             *p++ = '+';
1044         }
1045         if (digits[0] == 'i' || digits[0] == 'I') {
1046             strncpy(p, float_strings[OFS_INF], 3);
1047             p += 3;
1048 
1049             if (type)
1050                 *type = Py_DTST_INFINITE;
1051         }
1052         else if (digits[0] == 'n' || digits[0] == 'N') {
1053             strncpy(p, float_strings[OFS_NAN], 3);
1054             p += 3;
1055 
1056             if (type)
1057                 *type = Py_DTST_NAN;
1058         }
1059         else {
1060             /* shouldn't get here: Gay's code should always return
1061                something starting with a digit, an 'I',  or 'N' */
1062             strncpy(p, "ERR", 3);
1063             /* p += 3; */
1064             assert(0);
1065         }
1066         goto exit;
1067     }
1068 
1069     /* The result must be finite (not inf or nan). */
1070     if (type)
1071         *type = Py_DTST_FINITE;
1072 
1073 
1074     /* We got digits back, format them.  We may need to pad 'digits'
1075        either on the left or right (or both) with extra zeros, so in
1076        general the resulting string has the form
1077 
1078          [<sign>]<zeros><digits><zeros>[<exponent>]
1079 
1080        where either of the <zeros> pieces could be empty, and there's a
1081        decimal point that could appear either in <digits> or in the
1082        leading or trailing <zeros>.
1083 
1084        Imagine an infinite 'virtual' string vdigits, consisting of the
1085        string 'digits' (starting at index 0) padded on both the left and
1086        right with infinite strings of zeros.  We want to output a slice
1087 
1088          vdigits[vdigits_start : vdigits_end]
1089 
1090        of this virtual string.  Thus if vdigits_start < 0 then we'll end
1091        up producing some leading zeros; if vdigits_end > digits_len there
1092        will be trailing zeros in the output.  The next section of code
1093        determines whether to use an exponent or not, figures out the
1094        position 'decpt' of the decimal point, and computes 'vdigits_start'
1095        and 'vdigits_end'. */
1096     vdigits_end = digits_len;
1097     switch (format_code) {
1098     case 'e':
1099         use_exp = 1;
1100         vdigits_end = precision;
1101         break;
1102     case 'f':
1103         vdigits_end = decpt + precision;
1104         break;
1105     case 'g':
1106         if (decpt <= -4 || decpt >
1107             (add_dot_0_if_integer ? precision-1 : precision))
1108             use_exp = 1;
1109         if (use_alt_formatting)
1110             vdigits_end = precision;
1111         break;
1112     case 'r':
1113         /* convert to exponential format at 1e16.  We used to convert
1114            at 1e17, but that gives odd-looking results for some values
1115            when a 16-digit 'shortest' repr is padded with bogus zeros.
1116            For example, repr(2e16+8) would give 20000000000000010.0;
1117            the true value is 20000000000000008.0. */
1118         if (decpt <= -4 || decpt > 16)
1119             use_exp = 1;
1120         break;
1121     default:
1122         PyErr_BadInternalCall();
1123         goto exit;
1124     }
1125 
1126     /* if using an exponent, reset decimal point position to 1 and adjust
1127        exponent accordingly.*/
1128     if (use_exp) {
1129         exp = (int)decpt - 1;
1130         decpt = 1;
1131     }
1132     /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1133        decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1134     vdigits_start = decpt <= 0 ? decpt-1 : 0;
1135     if (!use_exp && add_dot_0_if_integer)
1136         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1137     else
1138         vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1139 
1140     /* double check inequalities */
1141     assert(vdigits_start <= 0 &&
1142            0 <= digits_len &&
1143            digits_len <= vdigits_end);
1144     /* decimal point should be in (vdigits_start, vdigits_end] */
1145     assert(vdigits_start < decpt && decpt <= vdigits_end);
1146 
1147     /* Compute an upper bound how much memory we need. This might be a few
1148        chars too long, but no big deal. */
1149     bufsize =
1150         /* sign, decimal point and trailing 0 byte */
1151         3 +
1152 
1153         /* total digit count (including zero padding on both sides) */
1154         (vdigits_end - vdigits_start) +
1155 
1156         /* exponent "e+100", max 3 numerical digits */
1157         (use_exp ? 5 : 0);
1158 
1159     /* Now allocate the memory and initialize p to point to the start of
1160        it. */
1161     buf = (char *)PyMem_Malloc(bufsize);
1162     if (buf == NULL) {
1163         PyErr_NoMemory();
1164         goto exit;
1165     }
1166     p = buf;
1167 
1168     /* Add a negative sign if negative, and a plus sign if non-negative
1169        and always_add_sign is true. */
1170     if (sign == 1)
1171         *p++ = '-';
1172     else if (always_add_sign)
1173         *p++ = '+';
1174 
1175     /* note that exactly one of the three 'if' conditions is true,
1176        so we include exactly one decimal point */
1177     /* Zero padding on left of digit string */
1178     if (decpt <= 0) {
1179         memset(p, '0', decpt-vdigits_start);
1180         p += decpt - vdigits_start;
1181         *p++ = '.';
1182         memset(p, '0', 0-decpt);
1183         p += 0-decpt;
1184     }
1185     else {
1186         memset(p, '0', 0-vdigits_start);
1187         p += 0 - vdigits_start;
1188     }
1189 
1190     /* Digits, with included decimal point */
1191     if (0 < decpt && decpt <= digits_len) {
1192         strncpy(p, digits, decpt-0);
1193         p += decpt-0;
1194         *p++ = '.';
1195         strncpy(p, digits+decpt, digits_len-decpt);
1196         p += digits_len-decpt;
1197     }
1198     else {
1199         strncpy(p, digits, digits_len);
1200         p += digits_len;
1201     }
1202 
1203     /* And zeros on the right */
1204     if (digits_len < decpt) {
1205         memset(p, '0', decpt-digits_len);
1206         p += decpt-digits_len;
1207         *p++ = '.';
1208         memset(p, '0', vdigits_end-decpt);
1209         p += vdigits_end-decpt;
1210     }
1211     else {
1212         memset(p, '0', vdigits_end-digits_len);
1213         p += vdigits_end-digits_len;
1214     }
1215 
1216     /* Delete a trailing decimal pt unless using alternative formatting. */
1217     if (p[-1] == '.' && !use_alt_formatting)
1218         p--;
1219 
1220     /* Now that we've done zero padding, add an exponent if needed. */
1221     if (use_exp) {
1222         *p++ = float_strings[OFS_E][0];
1223         exp_len = sprintf(p, "%+.02d", exp);
1224         p += exp_len;
1225     }
1226   exit:
1227     if (buf) {
1228         *p = '\0';
1229         /* It's too late if this fails, as we've already stepped on
1230            memory that isn't ours. But it's an okay debugging test. */
1231         assert(p-buf < bufsize);
1232     }
1233     if (digits)
1234         _Py_dg_freedtoa(digits);
1235 
1236     return buf;
1237 }
1238 
1239 
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1240 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1241                                          char format_code,
1242                                          int precision,
1243                                          int flags,
1244                                          int *type)
1245 {
1246     const char * const *float_strings = lc_float_strings;
1247     int mode;
1248 
1249     /* Validate format_code, and map upper and lower case. Compute the
1250        mode and make any adjustments as needed. */
1251     switch (format_code) {
1252     /* exponent */
1253     case 'E':
1254         float_strings = uc_float_strings;
1255         format_code = 'e';
1256         /* Fall through. */
1257     case 'e':
1258         mode = 2;
1259         precision++;
1260         break;
1261 
1262     /* fixed */
1263     case 'F':
1264         float_strings = uc_float_strings;
1265         format_code = 'f';
1266         /* Fall through. */
1267     case 'f':
1268         mode = 3;
1269         break;
1270 
1271     /* general */
1272     case 'G':
1273         float_strings = uc_float_strings;
1274         format_code = 'g';
1275         /* Fall through. */
1276     case 'g':
1277         mode = 2;
1278         /* precision 0 makes no sense for 'g' format; interpret as 1 */
1279         if (precision == 0)
1280             precision = 1;
1281         break;
1282 
1283     /* repr format */
1284     case 'r':
1285         mode = 0;
1286         /* Supplied precision is unused, must be 0. */
1287         if (precision != 0) {
1288             PyErr_BadInternalCall();
1289             return NULL;
1290         }
1291         break;
1292 
1293     default:
1294         PyErr_BadInternalCall();
1295         return NULL;
1296     }
1297 
1298     return format_float_short(val, format_code, mode, precision,
1299                               flags & Py_DTSF_SIGN,
1300                               flags & Py_DTSF_ADD_DOT_0,
1301                               flags & Py_DTSF_ALT,
1302                               float_strings, type);
1303 }
1304 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
1305