1 /* -*- Mode: C; c-file-style: "python" -*- */
2
3 #include <Python.h>
4 #include "pycore_dtoa.h" // _Py_dg_strtod()
5 #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
6
7 #include <locale.h> // localeconv()
8
9 /* Case-insensitive string match used for nan and inf detection; t should be
10 lower-case. Returns 1 for a successful match, 0 otherwise. */
11
12 static int
case_insensitive_match(const char * s,const char * t)13 case_insensitive_match(const char *s, const char *t)
14 {
15 while(*t && Py_TOLOWER(*s) == *t) {
16 s++;
17 t++;
18 }
19 return *t ? 0 : 1;
20 }
21
22 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
23 "infinity", with an optional leading sign of "+" or "-". On success,
24 return the NaN or Infinity as a double and set *endptr to point just beyond
25 the successfully parsed portion of the string. On failure, return -1.0 and
26 set *endptr to point to the start of the string. */
27 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)28 _Py_parse_inf_or_nan(const char *p, char **endptr)
29 {
30 double retval;
31 const char *s;
32 int negate = 0;
33
34 s = p;
35 if (*s == '-') {
36 negate = 1;
37 s++;
38 }
39 else if (*s == '+') {
40 s++;
41 }
42 if (case_insensitive_match(s, "inf")) {
43 s += 3;
44 if (case_insensitive_match(s, "inity"))
45 s += 5;
46 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
47 }
48 else if (case_insensitive_match(s, "nan")) {
49 s += 3;
50 retval = negate ? -fabs(Py_NAN) : fabs(Py_NAN);
51 }
52 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
58 }
59
60
61 /**
62 * _PyOS_ascii_strtod:
63 * @nptr: the string to convert to a numeric value.
64 * @endptr: if non-%NULL, it returns the character after
65 * the last character used in the conversion.
66 *
67 * Converts a string to a #gdouble value.
68 * This function behaves like the standard strtod() function
69 * does in the C locale. It does this without actually
70 * changing the current locale, since that would not be
71 * thread-safe.
72 *
73 * This function is typically used when reading configuration
74 * files or other non-user input that should be locale independent.
75 * To handle input from the user you should normally use the
76 * locale-sensitive system strtod() function.
77 *
78 * If the correct value would cause overflow, plus or minus %HUGE_VAL
79 * is returned (according to the sign of the value), and %ERANGE is
80 * stored in %errno. If the correct value would cause underflow,
81 * zero is returned and %ERANGE is stored in %errno.
82 * If memory allocation fails, %ENOMEM is stored in %errno.
83 *
84 * This function resets %errno before calling strtod() so that
85 * you can reliably detect overflow and underflow.
86 *
87 * Return value: the #gdouble value.
88 **/
89
90 #if _PY_SHORT_FLOAT_REPR == 1
91
92 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)93 _PyOS_ascii_strtod(const char *nptr, char **endptr)
94 {
95 double result;
96 _Py_SET_53BIT_PRECISION_HEADER;
97
98 assert(nptr != NULL);
99 /* Set errno to zero, so that we can distinguish zero results
100 and underflows */
101 errno = 0;
102
103 _Py_SET_53BIT_PRECISION_START;
104 result = _Py_dg_strtod(nptr, endptr);
105 _Py_SET_53BIT_PRECISION_END;
106
107 if (*endptr == nptr)
108 /* string might represent an inf or nan */
109 result = _Py_parse_inf_or_nan(nptr, endptr);
110
111 return result;
112
113 }
114
115 #else
116
117 /*
118 Use system strtod; since strtod is locale aware, we may
119 have to first fix the decimal separator.
120
121 Note that unlike _Py_dg_strtod, the system strtod may not always give
122 correctly rounded results.
123 */
124
125 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)126 _PyOS_ascii_strtod(const char *nptr, char **endptr)
127 {
128 char *fail_pos;
129 double val;
130 struct lconv *locale_data;
131 const char *decimal_point;
132 size_t decimal_point_len;
133 const char *p, *decimal_point_pos;
134 const char *end = NULL; /* Silence gcc */
135 const char *digits_pos = NULL;
136 int negate = 0;
137
138 assert(nptr != NULL);
139
140 fail_pos = NULL;
141
142 locale_data = localeconv();
143 decimal_point = locale_data->decimal_point;
144 decimal_point_len = strlen(decimal_point);
145
146 assert(decimal_point_len != 0);
147
148 decimal_point_pos = NULL;
149
150 /* Parse infinities and nans */
151 val = _Py_parse_inf_or_nan(nptr, endptr);
152 if (*endptr != nptr)
153 return val;
154
155 /* Set errno to zero, so that we can distinguish zero results
156 and underflows */
157 errno = 0;
158
159 /* We process the optional sign manually, then pass the remainder to
160 the system strtod. This ensures that the result of an underflow
161 has the correct sign. (bug #1725) */
162 p = nptr;
163 /* Process leading sign, if present */
164 if (*p == '-') {
165 negate = 1;
166 p++;
167 }
168 else if (*p == '+') {
169 p++;
170 }
171
172 /* Some platform strtods accept hex floats; Python shouldn't (at the
173 moment), so we check explicitly for strings starting with '0x'. */
174 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
175 goto invalid_string;
176
177 /* Check that what's left begins with a digit or decimal point */
178 if (!Py_ISDIGIT(*p) && *p != '.')
179 goto invalid_string;
180
181 digits_pos = p;
182 if (decimal_point[0] != '.' ||
183 decimal_point[1] != 0)
184 {
185 /* Look for a '.' in the input; if present, it'll need to be
186 swapped for the current locale's decimal point before we
187 call strtod. On the other hand, if we find the current
188 locale's decimal point then the input is invalid. */
189 while (Py_ISDIGIT(*p))
190 p++;
191
192 if (*p == '.')
193 {
194 decimal_point_pos = p++;
195
196 /* locate end of number */
197 while (Py_ISDIGIT(*p))
198 p++;
199
200 if (*p == 'e' || *p == 'E')
201 p++;
202 if (*p == '+' || *p == '-')
203 p++;
204 while (Py_ISDIGIT(*p))
205 p++;
206 end = p;
207 }
208 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
209 /* Python bug #1417699 */
210 goto invalid_string;
211 /* For the other cases, we need not convert the decimal
212 point */
213 }
214
215 if (decimal_point_pos) {
216 char *copy, *c;
217 /* Create a copy of the input, with the '.' converted to the
218 locale-specific decimal point */
219 copy = (char *)PyMem_Malloc(end - digits_pos +
220 1 + decimal_point_len);
221 if (copy == NULL) {
222 *endptr = (char *)nptr;
223 errno = ENOMEM;
224 return val;
225 }
226
227 c = copy;
228 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
229 c += decimal_point_pos - digits_pos;
230 memcpy(c, decimal_point, decimal_point_len);
231 c += decimal_point_len;
232 memcpy(c, decimal_point_pos + 1,
233 end - (decimal_point_pos + 1));
234 c += end - (decimal_point_pos + 1);
235 *c = 0;
236
237 val = strtod(copy, &fail_pos);
238
239 if (fail_pos)
240 {
241 if (fail_pos > decimal_point_pos)
242 fail_pos = (char *)digits_pos +
243 (fail_pos - copy) -
244 (decimal_point_len - 1);
245 else
246 fail_pos = (char *)digits_pos +
247 (fail_pos - copy);
248 }
249
250 PyMem_Free(copy);
251
252 }
253 else {
254 val = strtod(digits_pos, &fail_pos);
255 }
256
257 if (fail_pos == digits_pos)
258 goto invalid_string;
259
260 if (negate && fail_pos != nptr)
261 val = -val;
262 *endptr = fail_pos;
263
264 return val;
265
266 invalid_string:
267 *endptr = (char*)nptr;
268 errno = EINVAL;
269 return -1.0;
270 }
271
272 #endif
273
274 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
275 as a string of ASCII characters) to a float. The string should not have
276 leading or trailing whitespace. The conversion is independent of the
277 current locale.
278
279 If endptr is NULL, try to convert the whole string. Raise ValueError and
280 return -1.0 if the string is not a valid representation of a floating-point
281 number.
282
283 If endptr is non-NULL, try to convert as much of the string as possible.
284 If no initial segment of the string is the valid representation of a
285 floating-point number then *endptr is set to point to the beginning of the
286 string, -1.0 is returned and again ValueError is raised.
287
288 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
289 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
290 exception is raised. Otherwise, overflow_exception should point to
291 a Python exception, this exception will be raised, -1.0 will be returned,
292 and *endptr will point just past the end of the converted value.
293
294 If any other failure occurs (for example lack of memory), -1.0 is returned
295 and the appropriate Python exception will have been set.
296 */
297
298 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)299 PyOS_string_to_double(const char *s,
300 char **endptr,
301 PyObject *overflow_exception)
302 {
303 double x, result=-1.0;
304 char *fail_pos;
305
306 errno = 0;
307 x = _PyOS_ascii_strtod(s, &fail_pos);
308
309 if (errno == ENOMEM) {
310 PyErr_NoMemory();
311 fail_pos = (char *)s;
312 }
313 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
314 PyErr_Format(PyExc_ValueError,
315 "could not convert string to float: "
316 "'%.200s'", s);
317 else if (fail_pos == s)
318 PyErr_Format(PyExc_ValueError,
319 "could not convert string to float: "
320 "'%.200s'", s);
321 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
322 PyErr_Format(overflow_exception,
323 "value too large to convert to float: "
324 "'%.200s'", s);
325 else
326 result = x;
327
328 if (endptr != NULL)
329 *endptr = fail_pos;
330 return result;
331 }
332
333 /* Remove underscores that follow the underscore placement rule from
334 the string and then call the `innerfunc` function on the result.
335 It should return a new object or NULL on exception.
336
337 `what` is used for the error message emitted when underscores are detected
338 that don't follow the rule. `arg` is an opaque pointer passed to the inner
339 function.
340
341 This is used to implement underscore-agnostic conversion for floats
342 and complex numbers.
343 */
344 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))345 _Py_string_to_number_with_underscores(
346 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
347 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
348 {
349 char prev;
350 const char *p, *last;
351 char *dup, *end;
352 PyObject *result;
353
354 assert(s[orig_len] == '\0');
355
356 if (strchr(s, '_') == NULL) {
357 return innerfunc(s, orig_len, arg);
358 }
359
360 dup = PyMem_Malloc(orig_len + 1);
361 if (dup == NULL) {
362 return PyErr_NoMemory();
363 }
364 end = dup;
365 prev = '\0';
366 last = s + orig_len;
367 for (p = s; *p; p++) {
368 if (*p == '_') {
369 /* Underscores are only allowed after digits. */
370 if (!(prev >= '0' && prev <= '9')) {
371 goto error;
372 }
373 }
374 else {
375 *end++ = *p;
376 /* Underscores are only allowed before digits. */
377 if (prev == '_' && !(*p >= '0' && *p <= '9')) {
378 goto error;
379 }
380 }
381 prev = *p;
382 }
383 /* Underscores are not allowed at the end. */
384 if (prev == '_') {
385 goto error;
386 }
387 /* No embedded NULs allowed. */
388 if (p != last) {
389 goto error;
390 }
391 *end = '\0';
392 result = innerfunc(dup, end - dup, arg);
393 PyMem_Free(dup);
394 return result;
395
396 error:
397 PyMem_Free(dup);
398 PyErr_Format(PyExc_ValueError,
399 "could not convert string to %s: "
400 "%R", what, obj);
401 return NULL;
402 }
403
404 #if _PY_SHORT_FLOAT_REPR == 0
405
406 /* Given a string that may have a decimal point in the current
407 locale, change it back to a dot. Since the string cannot get
408 longer, no need for a maximum buffer size parameter. */
409 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)410 change_decimal_from_locale_to_dot(char* buffer)
411 {
412 struct lconv *locale_data = localeconv();
413 const char *decimal_point = locale_data->decimal_point;
414
415 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
416 size_t decimal_point_len = strlen(decimal_point);
417
418 if (*buffer == '+' || *buffer == '-')
419 buffer++;
420 while (Py_ISDIGIT(*buffer))
421 buffer++;
422 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
423 *buffer = '.';
424 buffer++;
425 if (decimal_point_len > 1) {
426 /* buffer needs to get smaller */
427 size_t rest_len = strlen(buffer +
428 (decimal_point_len - 1));
429 memmove(buffer,
430 buffer + (decimal_point_len - 1),
431 rest_len);
432 buffer[rest_len] = 0;
433 }
434 }
435 }
436 }
437
438
439 /* From the C99 standard, section 7.19.6:
440 The exponent always contains at least two digits, and only as many more digits
441 as necessary to represent the exponent.
442 */
443 #define MIN_EXPONENT_DIGITS 2
444
445 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
446 in length. */
447 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)448 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
449 {
450 char *p = strpbrk(buffer, "eE");
451 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
452 char *start = p + 2;
453 int exponent_digit_cnt = 0;
454 int leading_zero_cnt = 0;
455 int in_leading_zeros = 1;
456 int significant_digit_cnt;
457
458 /* Skip over the exponent and the sign. */
459 p += 2;
460
461 /* Find the end of the exponent, keeping track of leading
462 zeros. */
463 while (*p && Py_ISDIGIT(*p)) {
464 if (in_leading_zeros && *p == '0')
465 ++leading_zero_cnt;
466 if (*p != '0')
467 in_leading_zeros = 0;
468 ++p;
469 ++exponent_digit_cnt;
470 }
471
472 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
473 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
474 /* If there are 2 exactly digits, we're done,
475 regardless of what they contain */
476 }
477 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
478 int extra_zeros_cnt;
479
480 /* There are more than 2 digits in the exponent. See
481 if we can delete some of the leading zeros */
482 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
483 significant_digit_cnt = MIN_EXPONENT_DIGITS;
484 extra_zeros_cnt = exponent_digit_cnt -
485 significant_digit_cnt;
486
487 /* Delete extra_zeros_cnt worth of characters from the
488 front of the exponent */
489 assert(extra_zeros_cnt >= 0);
490
491 /* Add one to significant_digit_cnt to copy the
492 trailing 0 byte, thus setting the length */
493 memmove(start,
494 start + extra_zeros_cnt,
495 significant_digit_cnt + 1);
496 }
497 else {
498 /* If there are fewer than 2 digits, add zeros
499 until there are 2, if there's enough room */
500 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
501 if (start + zeros + exponent_digit_cnt + 1
502 < buffer + buf_size) {
503 memmove(start + zeros, start,
504 exponent_digit_cnt + 1);
505 memset(start, '0', zeros);
506 }
507 }
508 }
509 }
510
511 /* Remove trailing zeros after the decimal point from a numeric string; also
512 remove the decimal point if all digits following it are zero. The numeric
513 string must end in '\0', and should not have any leading or trailing
514 whitespace. Assumes that the decimal point is '.'. */
515 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)516 remove_trailing_zeros(char *buffer)
517 {
518 char *old_fraction_end, *new_fraction_end, *end, *p;
519
520 p = buffer;
521 if (*p == '-' || *p == '+')
522 /* Skip leading sign, if present */
523 ++p;
524 while (Py_ISDIGIT(*p))
525 ++p;
526
527 /* if there's no decimal point there's nothing to do */
528 if (*p++ != '.')
529 return;
530
531 /* scan any digits after the point */
532 while (Py_ISDIGIT(*p))
533 ++p;
534 old_fraction_end = p;
535
536 /* scan up to ending '\0' */
537 while (*p != '\0')
538 p++;
539 /* +1 to make sure that we move the null byte as well */
540 end = p+1;
541
542 /* scan back from fraction_end, looking for removable zeros */
543 p = old_fraction_end;
544 while (*(p-1) == '0')
545 --p;
546 /* and remove point if we've got that far */
547 if (*(p-1) == '.')
548 --p;
549 new_fraction_end = p;
550
551 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
552 }
553
554 /* Ensure that buffer has a decimal point in it. The decimal point will not
555 be in the current locale, it will always be '.'. Don't add a decimal point
556 if an exponent is present. Also, convert to exponential notation where
557 adding a '.0' would produce too many significant digits (see issue 5864).
558
559 Returns a pointer to the fixed buffer, or NULL on failure.
560 */
561 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)562 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
563 {
564 int digit_count, insert_count = 0, convert_to_exp = 0;
565 const char *chars_to_insert;
566 char *digits_start;
567
568 /* search for the first non-digit character */
569 char *p = buffer;
570 if (*p == '-' || *p == '+')
571 /* Skip leading sign, if present. I think this could only
572 ever be '-', but it can't hurt to check for both. */
573 ++p;
574 digits_start = p;
575 while (*p && Py_ISDIGIT(*p))
576 ++p;
577 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
578
579 if (*p == '.') {
580 if (Py_ISDIGIT(*(p+1))) {
581 /* Nothing to do, we already have a decimal
582 point and a digit after it */
583 }
584 else {
585 /* We have a decimal point, but no following
586 digit. Insert a zero after the decimal. */
587 /* can't ever get here via PyOS_double_to_string */
588 assert(precision == -1);
589 ++p;
590 chars_to_insert = "0";
591 insert_count = 1;
592 }
593 }
594 else if (!(*p == 'e' || *p == 'E')) {
595 /* Don't add ".0" if we have an exponent. */
596 if (digit_count == precision) {
597 /* issue 5864: don't add a trailing .0 in the case
598 where the '%g'-formatted result already has as many
599 significant digits as were requested. Switch to
600 exponential notation instead. */
601 convert_to_exp = 1;
602 /* no exponent, no point, and we shouldn't land here
603 for infs and nans, so we must be at the end of the
604 string. */
605 assert(*p == '\0');
606 }
607 else {
608 assert(precision == -1 || digit_count < precision);
609 chars_to_insert = ".0";
610 insert_count = 2;
611 }
612 }
613 if (insert_count) {
614 size_t buf_len = strlen(buffer);
615 if (buf_len + insert_count + 1 >= buf_size) {
616 /* If there is not enough room in the buffer
617 for the additional text, just skip it. It's
618 not worth generating an error over. */
619 }
620 else {
621 memmove(p + insert_count, p,
622 buffer + strlen(buffer) - p + 1);
623 memcpy(p, chars_to_insert, insert_count);
624 }
625 }
626 if (convert_to_exp) {
627 int written;
628 size_t buf_avail;
629 p = digits_start;
630 /* insert decimal point */
631 assert(digit_count >= 1);
632 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
633 p[1] = '.';
634 p += digit_count+1;
635 assert(p <= buf_size+buffer);
636 buf_avail = buf_size+buffer-p;
637 if (buf_avail == 0)
638 return NULL;
639 /* Add exponent. It's okay to use lower case 'e': we only
640 arrive here as a result of using the empty format code or
641 repr/str builtins and those never want an upper case 'E' */
642 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
643 if (!(0 <= written &&
644 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
645 /* output truncated, or something else bad happened */
646 return NULL;
647 remove_trailing_zeros(buffer);
648 }
649 return buffer;
650 }
651
652 /* see FORMATBUFLEN in unicodeobject.c */
653 #define FLOAT_FORMATBUFLEN 120
654
655 /**
656 * _PyOS_ascii_formatd:
657 * @buffer: A buffer to place the resulting string in
658 * @buf_size: The length of the buffer.
659 * @format: The printf()-style format to use for the
660 * code to use for converting.
661 * @d: The #gdouble to convert
662 * @precision: The precision to use when formatting.
663 *
664 * Converts a #gdouble to a string, using the '.' as
665 * decimal point. To format the number you pass in
666 * a printf()-style format string. Allowed conversion
667 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
668 *
669 * 'Z' is the same as 'g', except it always has a decimal and
670 * at least one digit after the decimal.
671 *
672 * Return value: The pointer to the buffer with the converted string.
673 * On failure returns NULL but does not set any Python exception.
674 **/
675 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)676 _PyOS_ascii_formatd(char *buffer,
677 size_t buf_size,
678 const char *format,
679 double d,
680 int precision)
681 {
682 char format_char;
683 size_t format_len = strlen(format);
684
685 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
686 also with at least one character past the decimal. */
687 char tmp_format[FLOAT_FORMATBUFLEN];
688
689 /* The last character in the format string must be the format char */
690 format_char = format[format_len - 1];
691
692 if (format[0] != '%')
693 return NULL;
694
695 /* I'm not sure why this test is here. It's ensuring that the format
696 string after the first character doesn't have a single quote, a
697 lowercase l, or a percent. This is the reverse of the commented-out
698 test about 10 lines ago. */
699 if (strpbrk(format + 1, "'l%"))
700 return NULL;
701
702 /* Also curious about this function is that it accepts format strings
703 like "%xg", which are invalid for floats. In general, the
704 interface to this function is not very good, but changing it is
705 difficult because it's a public API. */
706
707 if (!(format_char == 'e' || format_char == 'E' ||
708 format_char == 'f' || format_char == 'F' ||
709 format_char == 'g' || format_char == 'G' ||
710 format_char == 'Z'))
711 return NULL;
712
713 /* Map 'Z' format_char to 'g', by copying the format string and
714 replacing the final char with a 'g' */
715 if (format_char == 'Z') {
716 if (format_len + 1 >= sizeof(tmp_format)) {
717 /* The format won't fit in our copy. Error out. In
718 practice, this will never happen and will be
719 detected by returning NULL */
720 return NULL;
721 }
722 strcpy(tmp_format, format);
723 tmp_format[format_len - 1] = 'g';
724 format = tmp_format;
725 }
726
727
728 /* Have PyOS_snprintf do the hard work */
729 PyOS_snprintf(buffer, buf_size, format, d);
730
731 /* Do various fixups on the return string */
732
733 /* Get the current locale, and find the decimal point string.
734 Convert that string back to a dot. */
735 change_decimal_from_locale_to_dot(buffer);
736
737 /* If an exponent exists, ensure that the exponent is at least
738 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
739 for the extra zeros. Also, if there are more than
740 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
741 back to MIN_EXPONENT_DIGITS */
742 ensure_minimum_exponent_length(buffer, buf_size);
743
744 /* If format_char is 'Z', make sure we have at least one character
745 after the decimal point (and make sure we have a decimal point);
746 also switch to exponential notation in some edge cases where the
747 extra character would produce more significant digits that we
748 really want. */
749 if (format_char == 'Z')
750 buffer = ensure_decimal_point(buffer, buf_size, precision);
751
752 return buffer;
753 }
754
755 /* The fallback code to use if _Py_dg_dtoa is not available. */
756
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)757 char * PyOS_double_to_string(double val,
758 char format_code,
759 int precision,
760 int flags,
761 int *type)
762 {
763 char format[32];
764 Py_ssize_t bufsize;
765 char *buf;
766 int t, exp;
767 int upper = 0;
768
769 /* Validate format_code, and map upper and lower case */
770 switch (format_code) {
771 case 'e': /* exponent */
772 case 'f': /* fixed */
773 case 'g': /* general */
774 break;
775 case 'E':
776 upper = 1;
777 format_code = 'e';
778 break;
779 case 'F':
780 upper = 1;
781 format_code = 'f';
782 break;
783 case 'G':
784 upper = 1;
785 format_code = 'g';
786 break;
787 case 'r': /* repr format */
788 /* Supplied precision is unused, must be 0. */
789 if (precision != 0) {
790 PyErr_BadInternalCall();
791 return NULL;
792 }
793 /* The repr() precision (17 significant decimal digits) is the
794 minimal number that is guaranteed to have enough precision
795 so that if the number is read back in the exact same binary
796 value is recreated. This is true for IEEE floating point
797 by design, and also happens to work for all other modern
798 hardware. */
799 precision = 17;
800 format_code = 'g';
801 break;
802 default:
803 PyErr_BadInternalCall();
804 return NULL;
805 }
806
807 /* Here's a quick-and-dirty calculation to figure out how big a buffer
808 we need. In general, for a finite float we need:
809
810 1 byte for each digit of the decimal significand, and
811
812 1 for a possible sign
813 1 for a possible decimal point
814 2 for a possible [eE][+-]
815 1 for each digit of the exponent; if we allow 19 digits
816 total then we're safe up to exponents of 2**63.
817 1 for the trailing nul byte
818
819 This gives a total of 24 + the number of digits in the significand,
820 and the number of digits in the significand is:
821
822 for 'g' format: at most precision, except possibly
823 when precision == 0, when it's 1.
824 for 'e' format: precision+1
825 for 'f' format: precision digits after the point, at least 1
826 before. To figure out how many digits appear before the point
827 we have to examine the size of the number. If fabs(val) < 1.0
828 then there will be only one digit before the point. If
829 fabs(val) >= 1.0, then there are at most
830
831 1+floor(log10(ceiling(fabs(val))))
832
833 digits before the point (where the 'ceiling' allows for the
834 possibility that the rounding rounds the integer part of val
835 up). A safe upper bound for the above quantity is
836 1+floor(exp/3), where exp is the unique integer such that 0.5
837 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
838 frexp.
839
840 So we allow room for precision+1 digits for all formats, plus an
841 extra floor(exp/3) digits for 'f' format.
842
843 */
844
845 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
846 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
847 bufsize = 5;
848 else {
849 bufsize = 25 + precision;
850 if (format_code == 'f' && fabs(val) >= 1.0) {
851 frexp(val, &exp);
852 bufsize += exp/3;
853 }
854 }
855
856 buf = PyMem_Malloc(bufsize);
857 if (buf == NULL) {
858 PyErr_NoMemory();
859 return NULL;
860 }
861
862 /* Handle nan and inf. */
863 if (Py_IS_NAN(val)) {
864 strcpy(buf, "nan");
865 t = Py_DTST_NAN;
866 } else if (Py_IS_INFINITY(val)) {
867 if (copysign(1., val) == 1.)
868 strcpy(buf, "inf");
869 else
870 strcpy(buf, "-inf");
871 t = Py_DTST_INFINITE;
872 } else {
873 t = Py_DTST_FINITE;
874 if (flags & Py_DTSF_ADD_DOT_0)
875 format_code = 'Z';
876
877 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
878 (flags & Py_DTSF_ALT ? "#" : ""), precision,
879 format_code);
880 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
881
882 if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') {
883 char *buf2 = buf + 1;
884 while (*buf2 == '0' || *buf2 == '.') {
885 ++buf2;
886 }
887 if (*buf2 == 0 || *buf2 == 'e') {
888 size_t len = buf2 - buf + strlen(buf2);
889 assert(buf[len] == 0);
890 memmove(buf, buf+1, len);
891 }
892 }
893 }
894
895 /* Add sign when requested. It's convenient (esp. when formatting
896 complex numbers) to include a sign even for inf and nan. */
897 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
898 size_t len = strlen(buf);
899 /* the bufsize calculations above should ensure that we've got
900 space to add a sign */
901 assert((size_t)bufsize >= len+2);
902 memmove(buf+1, buf, len+1);
903 buf[0] = '+';
904 }
905 if (upper) {
906 /* Convert to upper case. */
907 char *p1;
908 for (p1 = buf; *p1; p1++)
909 *p1 = Py_TOUPPER(*p1);
910 }
911
912 if (type)
913 *type = t;
914 return buf;
915 }
916
917 #else // _PY_SHORT_FLOAT_REPR == 1
918
919 /* _Py_dg_dtoa is available. */
920
921 /* I'm using a lookup table here so that I don't have to invent a non-locale
922 specific way to convert to uppercase */
923 #define OFS_INF 0
924 #define OFS_NAN 1
925 #define OFS_E 2
926
927 /* The lengths of these are known to the code below, so don't change them */
928 static const char * const lc_float_strings[] = {
929 "inf",
930 "nan",
931 "e",
932 };
933 static const char * const uc_float_strings[] = {
934 "INF",
935 "NAN",
936 "E",
937 };
938
939
940 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
941 memory contain the resulting string.
942
943 Arguments:
944 d is the double to be converted
945 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
946 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
947 mode is one of '0', '2' or '3', and is completely determined by
948 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
949 precision is the desired precision
950 always_add_sign is nonzero if a '+' sign should be included for positive
951 numbers
952 add_dot_0_if_integer is nonzero if integers in non-exponential form
953 should have ".0" added. Only applies to format codes 'r' and 'g'.
954 use_alt_formatting is nonzero if alternative formatting should be
955 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
956 at most one of use_alt_formatting and add_dot_0_if_integer should
957 be nonzero.
958 type, if non-NULL, will be set to one of these constants to identify
959 the type of the 'd' argument:
960 Py_DTST_FINITE
961 Py_DTST_INFINITE
962 Py_DTST_NAN
963
964 Returns a PyMem_Malloc'd block of memory containing the resulting string,
965 or NULL on error. If NULL is returned, the Python error has been set.
966 */
967
968 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,int no_negative_zero,const char * const * float_strings,int * type)969 format_float_short(double d, char format_code,
970 int mode, int precision,
971 int always_add_sign, int add_dot_0_if_integer,
972 int use_alt_formatting, int no_negative_zero,
973 const char * const *float_strings, int *type)
974 {
975 char *buf = NULL;
976 char *p = NULL;
977 Py_ssize_t bufsize = 0;
978 char *digits, *digits_end;
979 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
980 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
981 _Py_SET_53BIT_PRECISION_HEADER;
982
983 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
984 Must be matched by a call to _Py_dg_freedtoa. */
985 _Py_SET_53BIT_PRECISION_START;
986 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
987 &digits_end);
988 _Py_SET_53BIT_PRECISION_END;
989
990 decpt = (Py_ssize_t)decpt_as_int;
991 if (digits == NULL) {
992 /* The only failure mode is no memory. */
993 PyErr_NoMemory();
994 goto exit;
995 }
996 assert(digits_end != NULL && digits_end >= digits);
997 digits_len = digits_end - digits;
998
999 if (no_negative_zero && sign == 1 &&
1000 (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) {
1001 sign = 0;
1002 }
1003
1004 if (digits_len && !Py_ISDIGIT(digits[0])) {
1005 /* Infinities and nans here; adapt Gay's output,
1006 so convert Infinity to inf and NaN to nan, and
1007 ignore sign of nan. Then return. */
1008
1009 /* ignore the actual sign of a nan */
1010 if (digits[0] == 'n' || digits[0] == 'N')
1011 sign = 0;
1012
1013 /* We only need 5 bytes to hold the result "+inf\0" . */
1014 bufsize = 5; /* Used later in an assert. */
1015 buf = (char *)PyMem_Malloc(bufsize);
1016 if (buf == NULL) {
1017 PyErr_NoMemory();
1018 goto exit;
1019 }
1020 p = buf;
1021
1022 if (sign == 1) {
1023 *p++ = '-';
1024 }
1025 else if (always_add_sign) {
1026 *p++ = '+';
1027 }
1028 if (digits[0] == 'i' || digits[0] == 'I') {
1029 strncpy(p, float_strings[OFS_INF], 3);
1030 p += 3;
1031
1032 if (type)
1033 *type = Py_DTST_INFINITE;
1034 }
1035 else if (digits[0] == 'n' || digits[0] == 'N') {
1036 strncpy(p, float_strings[OFS_NAN], 3);
1037 p += 3;
1038
1039 if (type)
1040 *type = Py_DTST_NAN;
1041 }
1042 else {
1043 /* shouldn't get here: Gay's code should always return
1044 something starting with a digit, an 'I', or 'N' */
1045 Py_UNREACHABLE();
1046 }
1047 goto exit;
1048 }
1049
1050 /* The result must be finite (not inf or nan). */
1051 if (type)
1052 *type = Py_DTST_FINITE;
1053
1054
1055 /* We got digits back, format them. We may need to pad 'digits'
1056 either on the left or right (or both) with extra zeros, so in
1057 general the resulting string has the form
1058
1059 [<sign>]<zeros><digits><zeros>[<exponent>]
1060
1061 where either of the <zeros> pieces could be empty, and there's a
1062 decimal point that could appear either in <digits> or in the
1063 leading or trailing <zeros>.
1064
1065 Imagine an infinite 'virtual' string vdigits, consisting of the
1066 string 'digits' (starting at index 0) padded on both the left and
1067 right with infinite strings of zeros. We want to output a slice
1068
1069 vdigits[vdigits_start : vdigits_end]
1070
1071 of this virtual string. Thus if vdigits_start < 0 then we'll end
1072 up producing some leading zeros; if vdigits_end > digits_len there
1073 will be trailing zeros in the output. The next section of code
1074 determines whether to use an exponent or not, figures out the
1075 position 'decpt' of the decimal point, and computes 'vdigits_start'
1076 and 'vdigits_end'. */
1077 vdigits_end = digits_len;
1078 switch (format_code) {
1079 case 'e':
1080 use_exp = 1;
1081 vdigits_end = precision;
1082 break;
1083 case 'f':
1084 vdigits_end = decpt + precision;
1085 break;
1086 case 'g':
1087 if (decpt <= -4 || decpt >
1088 (add_dot_0_if_integer ? precision-1 : precision))
1089 use_exp = 1;
1090 if (use_alt_formatting)
1091 vdigits_end = precision;
1092 break;
1093 case 'r':
1094 /* convert to exponential format at 1e16. We used to convert
1095 at 1e17, but that gives odd-looking results for some values
1096 when a 16-digit 'shortest' repr is padded with bogus zeros.
1097 For example, repr(2e16+8) would give 20000000000000010.0;
1098 the true value is 20000000000000008.0. */
1099 if (decpt <= -4 || decpt > 16)
1100 use_exp = 1;
1101 break;
1102 default:
1103 PyErr_BadInternalCall();
1104 goto exit;
1105 }
1106
1107 /* if using an exponent, reset decimal point position to 1 and adjust
1108 exponent accordingly.*/
1109 if (use_exp) {
1110 exp = (int)decpt - 1;
1111 decpt = 1;
1112 }
1113 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1114 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1115 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1116 if (!use_exp && add_dot_0_if_integer)
1117 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1118 else
1119 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1120
1121 /* double check inequalities */
1122 assert(vdigits_start <= 0 &&
1123 0 <= digits_len &&
1124 digits_len <= vdigits_end);
1125 /* decimal point should be in (vdigits_start, vdigits_end] */
1126 assert(vdigits_start < decpt && decpt <= vdigits_end);
1127
1128 /* Compute an upper bound how much memory we need. This might be a few
1129 chars too long, but no big deal. */
1130 bufsize =
1131 /* sign, decimal point and trailing 0 byte */
1132 3 +
1133
1134 /* total digit count (including zero padding on both sides) */
1135 (vdigits_end - vdigits_start) +
1136
1137 /* exponent "e+100", max 3 numerical digits */
1138 (use_exp ? 5 : 0);
1139
1140 /* Now allocate the memory and initialize p to point to the start of
1141 it. */
1142 buf = (char *)PyMem_Malloc(bufsize);
1143 if (buf == NULL) {
1144 PyErr_NoMemory();
1145 goto exit;
1146 }
1147 p = buf;
1148
1149 /* Add a negative sign if negative, and a plus sign if non-negative
1150 and always_add_sign is true. */
1151 if (sign == 1)
1152 *p++ = '-';
1153 else if (always_add_sign)
1154 *p++ = '+';
1155
1156 /* note that exactly one of the three 'if' conditions is true,
1157 so we include exactly one decimal point */
1158 /* Zero padding on left of digit string */
1159 if (decpt <= 0) {
1160 memset(p, '0', decpt-vdigits_start);
1161 p += decpt - vdigits_start;
1162 *p++ = '.';
1163 memset(p, '0', 0-decpt);
1164 p += 0-decpt;
1165 }
1166 else {
1167 memset(p, '0', 0-vdigits_start);
1168 p += 0 - vdigits_start;
1169 }
1170
1171 /* Digits, with included decimal point */
1172 if (0 < decpt && decpt <= digits_len) {
1173 strncpy(p, digits, decpt-0);
1174 p += decpt-0;
1175 *p++ = '.';
1176 strncpy(p, digits+decpt, digits_len-decpt);
1177 p += digits_len-decpt;
1178 }
1179 else {
1180 strncpy(p, digits, digits_len);
1181 p += digits_len;
1182 }
1183
1184 /* And zeros on the right */
1185 if (digits_len < decpt) {
1186 memset(p, '0', decpt-digits_len);
1187 p += decpt-digits_len;
1188 *p++ = '.';
1189 memset(p, '0', vdigits_end-decpt);
1190 p += vdigits_end-decpt;
1191 }
1192 else {
1193 memset(p, '0', vdigits_end-digits_len);
1194 p += vdigits_end-digits_len;
1195 }
1196
1197 /* Delete a trailing decimal pt unless using alternative formatting. */
1198 if (p[-1] == '.' && !use_alt_formatting)
1199 p--;
1200
1201 /* Now that we've done zero padding, add an exponent if needed. */
1202 if (use_exp) {
1203 *p++ = float_strings[OFS_E][0];
1204 exp_len = sprintf(p, "%+.02d", exp);
1205 p += exp_len;
1206 }
1207 exit:
1208 if (buf) {
1209 *p = '\0';
1210 /* It's too late if this fails, as we've already stepped on
1211 memory that isn't ours. But it's an okay debugging test. */
1212 assert(p-buf < bufsize);
1213 }
1214 if (digits)
1215 _Py_dg_freedtoa(digits);
1216
1217 return buf;
1218 }
1219
1220
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1221 char * PyOS_double_to_string(double val,
1222 char format_code,
1223 int precision,
1224 int flags,
1225 int *type)
1226 {
1227 const char * const *float_strings = lc_float_strings;
1228 int mode;
1229
1230 /* Validate format_code, and map upper and lower case. Compute the
1231 mode and make any adjustments as needed. */
1232 switch (format_code) {
1233 /* exponent */
1234 case 'E':
1235 float_strings = uc_float_strings;
1236 format_code = 'e';
1237 /* Fall through. */
1238 case 'e':
1239 mode = 2;
1240 precision++;
1241 break;
1242
1243 /* fixed */
1244 case 'F':
1245 float_strings = uc_float_strings;
1246 format_code = 'f';
1247 /* Fall through. */
1248 case 'f':
1249 mode = 3;
1250 break;
1251
1252 /* general */
1253 case 'G':
1254 float_strings = uc_float_strings;
1255 format_code = 'g';
1256 /* Fall through. */
1257 case 'g':
1258 mode = 2;
1259 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1260 if (precision == 0)
1261 precision = 1;
1262 break;
1263
1264 /* repr format */
1265 case 'r':
1266 mode = 0;
1267 /* Supplied precision is unused, must be 0. */
1268 if (precision != 0) {
1269 PyErr_BadInternalCall();
1270 return NULL;
1271 }
1272 break;
1273
1274 default:
1275 PyErr_BadInternalCall();
1276 return NULL;
1277 }
1278
1279 return format_float_short(val, format_code, mode, precision,
1280 flags & Py_DTSF_SIGN,
1281 flags & Py_DTSF_ADD_DOT_0,
1282 flags & Py_DTSF_ALT,
1283 flags & Py_DTSF_NO_NEG_0,
1284 float_strings, type);
1285 }
1286 #endif // _PY_SHORT_FLOAT_REPR == 1
1287