1 /* -*- Mode: C; c-file-style: "python" -*- */
2
3 #include <Python.h>
4 #include <locale.h>
5
6 /* Case-insensitive string match used for nan and inf detection; t should be
7 lower-case. Returns 1 for a successful match, 0 otherwise. */
8
9 static int
case_insensitive_match(const char * s,const char * t)10 case_insensitive_match(const char *s, const char *t)
11 {
12 while(*t && Py_TOLOWER(*s) == *t) {
13 s++;
14 t++;
15 }
16 return *t ? 0 : 1;
17 }
18
19 /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20 "infinity", with an optional leading sign of "+" or "-". On success,
21 return the NaN or Infinity as a double and set *endptr to point just beyond
22 the successfully parsed portion of the string. On failure, return -1.0 and
23 set *endptr to point to the start of the string. */
24
25 #ifndef PY_NO_SHORT_FLOAT_REPR
26
27 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)28 _Py_parse_inf_or_nan(const char *p, char **endptr)
29 {
30 double retval;
31 const char *s;
32 int negate = 0;
33
34 s = p;
35 if (*s == '-') {
36 negate = 1;
37 s++;
38 }
39 else if (*s == '+') {
40 s++;
41 }
42 if (case_insensitive_match(s, "inf")) {
43 s += 3;
44 if (case_insensitive_match(s, "inity"))
45 s += 5;
46 retval = _Py_dg_infinity(negate);
47 }
48 else if (case_insensitive_match(s, "nan")) {
49 s += 3;
50 retval = _Py_dg_stdnan(negate);
51 }
52 else {
53 s = p;
54 retval = -1.0;
55 }
56 *endptr = (char *)s;
57 return retval;
58 }
59
60 #else
61
62 double
_Py_parse_inf_or_nan(const char * p,char ** endptr)63 _Py_parse_inf_or_nan(const char *p, char **endptr)
64 {
65 double retval;
66 const char *s;
67 int negate = 0;
68
69 s = p;
70 if (*s == '-') {
71 negate = 1;
72 s++;
73 }
74 else if (*s == '+') {
75 s++;
76 }
77 if (case_insensitive_match(s, "inf")) {
78 s += 3;
79 if (case_insensitive_match(s, "inity"))
80 s += 5;
81 retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82 }
83 #ifdef Py_NAN
84 else if (case_insensitive_match(s, "nan")) {
85 s += 3;
86 retval = negate ? -Py_NAN : Py_NAN;
87 }
88 #endif
89 else {
90 s = p;
91 retval = -1.0;
92 }
93 *endptr = (char *)s;
94 return retval;
95 }
96
97 #endif
98
99 /**
100 * _PyOS_ascii_strtod:
101 * @nptr: the string to convert to a numeric value.
102 * @endptr: if non-%NULL, it returns the character after
103 * the last character used in the conversion.
104 *
105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
120 * If memory allocation fails, %ENOMEM is stored in %errno.
121 *
122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
127
128 #ifndef PY_NO_SHORT_FLOAT_REPR
129
130 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)131 _PyOS_ascii_strtod(const char *nptr, char **endptr)
132 {
133 double result;
134 _Py_SET_53BIT_PRECISION_HEADER;
135
136 assert(nptr != NULL);
137 /* Set errno to zero, so that we can distinguish zero results
138 and underflows */
139 errno = 0;
140
141 _Py_SET_53BIT_PRECISION_START;
142 result = _Py_dg_strtod(nptr, endptr);
143 _Py_SET_53BIT_PRECISION_END;
144
145 if (*endptr == nptr)
146 /* string might represent an inf or nan */
147 result = _Py_parse_inf_or_nan(nptr, endptr);
148
149 return result;
150
151 }
152
153 #else
154
155 /*
156 Use system strtod; since strtod is locale aware, we may
157 have to first fix the decimal separator.
158
159 Note that unlike _Py_dg_strtod, the system strtod may not always give
160 correctly rounded results.
161 */
162
163 static double
_PyOS_ascii_strtod(const char * nptr,char ** endptr)164 _PyOS_ascii_strtod(const char *nptr, char **endptr)
165 {
166 char *fail_pos;
167 double val;
168 struct lconv *locale_data;
169 const char *decimal_point;
170 size_t decimal_point_len;
171 const char *p, *decimal_point_pos;
172 const char *end = NULL; /* Silence gcc */
173 const char *digits_pos = NULL;
174 int negate = 0;
175
176 assert(nptr != NULL);
177
178 fail_pos = NULL;
179
180 locale_data = localeconv();
181 decimal_point = locale_data->decimal_point;
182 decimal_point_len = strlen(decimal_point);
183
184 assert(decimal_point_len != 0);
185
186 decimal_point_pos = NULL;
187
188 /* Parse infinities and nans */
189 val = _Py_parse_inf_or_nan(nptr, endptr);
190 if (*endptr != nptr)
191 return val;
192
193 /* Set errno to zero, so that we can distinguish zero results
194 and underflows */
195 errno = 0;
196
197 /* We process the optional sign manually, then pass the remainder to
198 the system strtod. This ensures that the result of an underflow
199 has the correct sign. (bug #1725) */
200 p = nptr;
201 /* Process leading sign, if present */
202 if (*p == '-') {
203 negate = 1;
204 p++;
205 }
206 else if (*p == '+') {
207 p++;
208 }
209
210 /* Some platform strtods accept hex floats; Python shouldn't (at the
211 moment), so we check explicitly for strings starting with '0x'. */
212 if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213 goto invalid_string;
214
215 /* Check that what's left begins with a digit or decimal point */
216 if (!Py_ISDIGIT(*p) && *p != '.')
217 goto invalid_string;
218
219 digits_pos = p;
220 if (decimal_point[0] != '.' ||
221 decimal_point[1] != 0)
222 {
223 /* Look for a '.' in the input; if present, it'll need to be
224 swapped for the current locale's decimal point before we
225 call strtod. On the other hand, if we find the current
226 locale's decimal point then the input is invalid. */
227 while (Py_ISDIGIT(*p))
228 p++;
229
230 if (*p == '.')
231 {
232 decimal_point_pos = p++;
233
234 /* locate end of number */
235 while (Py_ISDIGIT(*p))
236 p++;
237
238 if (*p == 'e' || *p == 'E')
239 p++;
240 if (*p == '+' || *p == '-')
241 p++;
242 while (Py_ISDIGIT(*p))
243 p++;
244 end = p;
245 }
246 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247 /* Python bug #1417699 */
248 goto invalid_string;
249 /* For the other cases, we need not convert the decimal
250 point */
251 }
252
253 if (decimal_point_pos) {
254 char *copy, *c;
255 /* Create a copy of the input, with the '.' converted to the
256 locale-specific decimal point */
257 copy = (char *)PyMem_MALLOC(end - digits_pos +
258 1 + decimal_point_len);
259 if (copy == NULL) {
260 *endptr = (char *)nptr;
261 errno = ENOMEM;
262 return val;
263 }
264
265 c = copy;
266 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267 c += decimal_point_pos - digits_pos;
268 memcpy(c, decimal_point, decimal_point_len);
269 c += decimal_point_len;
270 memcpy(c, decimal_point_pos + 1,
271 end - (decimal_point_pos + 1));
272 c += end - (decimal_point_pos + 1);
273 *c = 0;
274
275 val = strtod(copy, &fail_pos);
276
277 if (fail_pos)
278 {
279 if (fail_pos > decimal_point_pos)
280 fail_pos = (char *)digits_pos +
281 (fail_pos - copy) -
282 (decimal_point_len - 1);
283 else
284 fail_pos = (char *)digits_pos +
285 (fail_pos - copy);
286 }
287
288 PyMem_FREE(copy);
289
290 }
291 else {
292 val = strtod(digits_pos, &fail_pos);
293 }
294
295 if (fail_pos == digits_pos)
296 goto invalid_string;
297
298 if (negate && fail_pos != nptr)
299 val = -val;
300 *endptr = fail_pos;
301
302 return val;
303
304 invalid_string:
305 *endptr = (char*)nptr;
306 errno = EINVAL;
307 return -1.0;
308 }
309
310 #endif
311
312 /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313 as a string of ASCII characters) to a float. The string should not have
314 leading or trailing whitespace. The conversion is independent of the
315 current locale.
316
317 If endptr is NULL, try to convert the whole string. Raise ValueError and
318 return -1.0 if the string is not a valid representation of a floating-point
319 number.
320
321 If endptr is non-NULL, try to convert as much of the string as possible.
322 If no initial segment of the string is the valid representation of a
323 floating-point number then *endptr is set to point to the beginning of the
324 string, -1.0 is returned and again ValueError is raised.
325
326 On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327 if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328 exception is raised. Otherwise, overflow_exception should point to
329 a Python exception, this exception will be raised, -1.0 will be returned,
330 and *endptr will point just past the end of the converted value.
331
332 If any other failure occurs (for example lack of memory), -1.0 is returned
333 and the appropriate Python exception will have been set.
334 */
335
336 double
PyOS_string_to_double(const char * s,char ** endptr,PyObject * overflow_exception)337 PyOS_string_to_double(const char *s,
338 char **endptr,
339 PyObject *overflow_exception)
340 {
341 double x, result=-1.0;
342 char *fail_pos;
343
344 errno = 0;
345 PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346 x = _PyOS_ascii_strtod(s, &fail_pos);
347 PyFPE_END_PROTECT(x)
348
349 if (errno == ENOMEM) {
350 PyErr_NoMemory();
351 fail_pos = (char *)s;
352 }
353 else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354 PyErr_Format(PyExc_ValueError,
355 "could not convert string to float: "
356 "%.200s", s);
357 else if (fail_pos == s)
358 PyErr_Format(PyExc_ValueError,
359 "could not convert string to float: "
360 "%.200s", s);
361 else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362 PyErr_Format(overflow_exception,
363 "value too large to convert to float: "
364 "%.200s", s);
365 else
366 result = x;
367
368 if (endptr != NULL)
369 *endptr = fail_pos;
370 return result;
371 }
372
373 /* Remove underscores that follow the underscore placement rule from
374 the string and then call the `innerfunc` function on the result.
375 It should return a new object or NULL on exception.
376
377 `what` is used for the error message emitted when underscores are detected
378 that don't follow the rule. `arg` is an opaque pointer passed to the inner
379 function.
380
381 This is used to implement underscore-agnostic conversion for floats
382 and complex numbers.
383 */
384 PyObject *
_Py_string_to_number_with_underscores(const char * s,Py_ssize_t orig_len,const char * what,PyObject * obj,void * arg,PyObject * (* innerfunc)(const char *,Py_ssize_t,void *))385 _Py_string_to_number_with_underscores(
386 const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387 PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388 {
389 char prev;
390 const char *p, *last;
391 char *dup, *end;
392 PyObject *result;
393
394 if (strchr(s, '_') == NULL) {
395 return innerfunc(s, orig_len, arg);
396 }
397
398 dup = PyMem_Malloc(orig_len + 1);
399 end = dup;
400 prev = '\0';
401 last = s + orig_len;
402 for (p = s; *p; p++) {
403 if (*p == '_') {
404 /* Underscores are only allowed after digits. */
405 if (!(prev >= '0' && prev <= '9')) {
406 goto error;
407 }
408 }
409 else {
410 *end++ = *p;
411 /* Underscores are only allowed before digits. */
412 if (prev == '_' && !(*p >= '0' && *p <= '9')) {
413 goto error;
414 }
415 }
416 prev = *p;
417 }
418 /* Underscores are not allowed at the end. */
419 if (prev == '_') {
420 goto error;
421 }
422 /* No embedded NULs allowed. */
423 if (p != last) {
424 goto error;
425 }
426 *end = '\0';
427 result = innerfunc(dup, end - dup, arg);
428 PyMem_Free(dup);
429 return result;
430
431 error:
432 PyMem_Free(dup);
433 PyErr_Format(PyExc_ValueError,
434 "could not convert string to %s: "
435 "%R", what, obj);
436 return NULL;
437 }
438
439 #ifdef PY_NO_SHORT_FLOAT_REPR
440
441 /* Given a string that may have a decimal point in the current
442 locale, change it back to a dot. Since the string cannot get
443 longer, no need for a maximum buffer size parameter. */
444 Py_LOCAL_INLINE(void)
change_decimal_from_locale_to_dot(char * buffer)445 change_decimal_from_locale_to_dot(char* buffer)
446 {
447 struct lconv *locale_data = localeconv();
448 const char *decimal_point = locale_data->decimal_point;
449
450 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
451 size_t decimal_point_len = strlen(decimal_point);
452
453 if (*buffer == '+' || *buffer == '-')
454 buffer++;
455 while (Py_ISDIGIT(*buffer))
456 buffer++;
457 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
458 *buffer = '.';
459 buffer++;
460 if (decimal_point_len > 1) {
461 /* buffer needs to get smaller */
462 size_t rest_len = strlen(buffer +
463 (decimal_point_len - 1));
464 memmove(buffer,
465 buffer + (decimal_point_len - 1),
466 rest_len);
467 buffer[rest_len] = 0;
468 }
469 }
470 }
471 }
472
473
474 /* From the C99 standard, section 7.19.6:
475 The exponent always contains at least two digits, and only as many more digits
476 as necessary to represent the exponent.
477 */
478 #define MIN_EXPONENT_DIGITS 2
479
480 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
481 in length. */
482 Py_LOCAL_INLINE(void)
ensure_minimum_exponent_length(char * buffer,size_t buf_size)483 ensure_minimum_exponent_length(char* buffer, size_t buf_size)
484 {
485 char *p = strpbrk(buffer, "eE");
486 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
487 char *start = p + 2;
488 int exponent_digit_cnt = 0;
489 int leading_zero_cnt = 0;
490 int in_leading_zeros = 1;
491 int significant_digit_cnt;
492
493 /* Skip over the exponent and the sign. */
494 p += 2;
495
496 /* Find the end of the exponent, keeping track of leading
497 zeros. */
498 while (*p && Py_ISDIGIT(*p)) {
499 if (in_leading_zeros && *p == '0')
500 ++leading_zero_cnt;
501 if (*p != '0')
502 in_leading_zeros = 0;
503 ++p;
504 ++exponent_digit_cnt;
505 }
506
507 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
508 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
509 /* If there are 2 exactly digits, we're done,
510 regardless of what they contain */
511 }
512 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
513 int extra_zeros_cnt;
514
515 /* There are more than 2 digits in the exponent. See
516 if we can delete some of the leading zeros */
517 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
518 significant_digit_cnt = MIN_EXPONENT_DIGITS;
519 extra_zeros_cnt = exponent_digit_cnt -
520 significant_digit_cnt;
521
522 /* Delete extra_zeros_cnt worth of characters from the
523 front of the exponent */
524 assert(extra_zeros_cnt >= 0);
525
526 /* Add one to significant_digit_cnt to copy the
527 trailing 0 byte, thus setting the length */
528 memmove(start,
529 start + extra_zeros_cnt,
530 significant_digit_cnt + 1);
531 }
532 else {
533 /* If there are fewer than 2 digits, add zeros
534 until there are 2, if there's enough room */
535 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
536 if (start + zeros + exponent_digit_cnt + 1
537 < buffer + buf_size) {
538 memmove(start + zeros, start,
539 exponent_digit_cnt + 1);
540 memset(start, '0', zeros);
541 }
542 }
543 }
544 }
545
546 /* Remove trailing zeros after the decimal point from a numeric string; also
547 remove the decimal point if all digits following it are zero. The numeric
548 string must end in '\0', and should not have any leading or trailing
549 whitespace. Assumes that the decimal point is '.'. */
550 Py_LOCAL_INLINE(void)
remove_trailing_zeros(char * buffer)551 remove_trailing_zeros(char *buffer)
552 {
553 char *old_fraction_end, *new_fraction_end, *end, *p;
554
555 p = buffer;
556 if (*p == '-' || *p == '+')
557 /* Skip leading sign, if present */
558 ++p;
559 while (Py_ISDIGIT(*p))
560 ++p;
561
562 /* if there's no decimal point there's nothing to do */
563 if (*p++ != '.')
564 return;
565
566 /* scan any digits after the point */
567 while (Py_ISDIGIT(*p))
568 ++p;
569 old_fraction_end = p;
570
571 /* scan up to ending '\0' */
572 while (*p != '\0')
573 p++;
574 /* +1 to make sure that we move the null byte as well */
575 end = p+1;
576
577 /* scan back from fraction_end, looking for removable zeros */
578 p = old_fraction_end;
579 while (*(p-1) == '0')
580 --p;
581 /* and remove point if we've got that far */
582 if (*(p-1) == '.')
583 --p;
584 new_fraction_end = p;
585
586 memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
587 }
588
589 /* Ensure that buffer has a decimal point in it. The decimal point will not
590 be in the current locale, it will always be '.'. Don't add a decimal point
591 if an exponent is present. Also, convert to exponential notation where
592 adding a '.0' would produce too many significant digits (see issue 5864).
593
594 Returns a pointer to the fixed buffer, or NULL on failure.
595 */
596 Py_LOCAL_INLINE(char *)
ensure_decimal_point(char * buffer,size_t buf_size,int precision)597 ensure_decimal_point(char* buffer, size_t buf_size, int precision)
598 {
599 int digit_count, insert_count = 0, convert_to_exp = 0;
600 char *chars_to_insert, *digits_start;
601
602 /* search for the first non-digit character */
603 char *p = buffer;
604 if (*p == '-' || *p == '+')
605 /* Skip leading sign, if present. I think this could only
606 ever be '-', but it can't hurt to check for both. */
607 ++p;
608 digits_start = p;
609 while (*p && Py_ISDIGIT(*p))
610 ++p;
611 digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
612
613 if (*p == '.') {
614 if (Py_ISDIGIT(*(p+1))) {
615 /* Nothing to do, we already have a decimal
616 point and a digit after it */
617 }
618 else {
619 /* We have a decimal point, but no following
620 digit. Insert a zero after the decimal. */
621 /* can't ever get here via PyOS_double_to_string */
622 assert(precision == -1);
623 ++p;
624 chars_to_insert = "0";
625 insert_count = 1;
626 }
627 }
628 else if (!(*p == 'e' || *p == 'E')) {
629 /* Don't add ".0" if we have an exponent. */
630 if (digit_count == precision) {
631 /* issue 5864: don't add a trailing .0 in the case
632 where the '%g'-formatted result already has as many
633 significant digits as were requested. Switch to
634 exponential notation instead. */
635 convert_to_exp = 1;
636 /* no exponent, no point, and we shouldn't land here
637 for infs and nans, so we must be at the end of the
638 string. */
639 assert(*p == '\0');
640 }
641 else {
642 assert(precision == -1 || digit_count < precision);
643 chars_to_insert = ".0";
644 insert_count = 2;
645 }
646 }
647 if (insert_count) {
648 size_t buf_len = strlen(buffer);
649 if (buf_len + insert_count + 1 >= buf_size) {
650 /* If there is not enough room in the buffer
651 for the additional text, just skip it. It's
652 not worth generating an error over. */
653 }
654 else {
655 memmove(p + insert_count, p,
656 buffer + strlen(buffer) - p + 1);
657 memcpy(p, chars_to_insert, insert_count);
658 }
659 }
660 if (convert_to_exp) {
661 int written;
662 size_t buf_avail;
663 p = digits_start;
664 /* insert decimal point */
665 assert(digit_count >= 1);
666 memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
667 p[1] = '.';
668 p += digit_count+1;
669 assert(p <= buf_size+buffer);
670 buf_avail = buf_size+buffer-p;
671 if (buf_avail == 0)
672 return NULL;
673 /* Add exponent. It's okay to use lower case 'e': we only
674 arrive here as a result of using the empty format code or
675 repr/str builtins and those never want an upper case 'E' */
676 written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
677 if (!(0 <= written &&
678 written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
679 /* output truncated, or something else bad happened */
680 return NULL;
681 remove_trailing_zeros(buffer);
682 }
683 return buffer;
684 }
685
686 /* see FORMATBUFLEN in unicodeobject.c */
687 #define FLOAT_FORMATBUFLEN 120
688
689 /**
690 * _PyOS_ascii_formatd:
691 * @buffer: A buffer to place the resulting string in
692 * @buf_size: The length of the buffer.
693 * @format: The printf()-style format to use for the
694 * code to use for converting.
695 * @d: The #gdouble to convert
696 * @precision: The precision to use when formatting.
697 *
698 * Converts a #gdouble to a string, using the '.' as
699 * decimal point. To format the number you pass in
700 * a printf()-style format string. Allowed conversion
701 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
702 *
703 * 'Z' is the same as 'g', except it always has a decimal and
704 * at least one digit after the decimal.
705 *
706 * Return value: The pointer to the buffer with the converted string.
707 * On failure returns NULL but does not set any Python exception.
708 **/
709 static char *
_PyOS_ascii_formatd(char * buffer,size_t buf_size,const char * format,double d,int precision)710 _PyOS_ascii_formatd(char *buffer,
711 size_t buf_size,
712 const char *format,
713 double d,
714 int precision)
715 {
716 char format_char;
717 size_t format_len = strlen(format);
718
719 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
720 also with at least one character past the decimal. */
721 char tmp_format[FLOAT_FORMATBUFLEN];
722
723 /* The last character in the format string must be the format char */
724 format_char = format[format_len - 1];
725
726 if (format[0] != '%')
727 return NULL;
728
729 /* I'm not sure why this test is here. It's ensuring that the format
730 string after the first character doesn't have a single quote, a
731 lowercase l, or a percent. This is the reverse of the commented-out
732 test about 10 lines ago. */
733 if (strpbrk(format + 1, "'l%"))
734 return NULL;
735
736 /* Also curious about this function is that it accepts format strings
737 like "%xg", which are invalid for floats. In general, the
738 interface to this function is not very good, but changing it is
739 difficult because it's a public API. */
740
741 if (!(format_char == 'e' || format_char == 'E' ||
742 format_char == 'f' || format_char == 'F' ||
743 format_char == 'g' || format_char == 'G' ||
744 format_char == 'Z'))
745 return NULL;
746
747 /* Map 'Z' format_char to 'g', by copying the format string and
748 replacing the final char with a 'g' */
749 if (format_char == 'Z') {
750 if (format_len + 1 >= sizeof(tmp_format)) {
751 /* The format won't fit in our copy. Error out. In
752 practice, this will never happen and will be
753 detected by returning NULL */
754 return NULL;
755 }
756 strcpy(tmp_format, format);
757 tmp_format[format_len - 1] = 'g';
758 format = tmp_format;
759 }
760
761
762 /* Have PyOS_snprintf do the hard work */
763 PyOS_snprintf(buffer, buf_size, format, d);
764
765 /* Do various fixups on the return string */
766
767 /* Get the current locale, and find the decimal point string.
768 Convert that string back to a dot. */
769 change_decimal_from_locale_to_dot(buffer);
770
771 /* If an exponent exists, ensure that the exponent is at least
772 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
773 for the extra zeros. Also, if there are more than
774 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
775 back to MIN_EXPONENT_DIGITS */
776 ensure_minimum_exponent_length(buffer, buf_size);
777
778 /* If format_char is 'Z', make sure we have at least one character
779 after the decimal point (and make sure we have a decimal point);
780 also switch to exponential notation in some edge cases where the
781 extra character would produce more significant digits that we
782 really want. */
783 if (format_char == 'Z')
784 buffer = ensure_decimal_point(buffer, buf_size, precision);
785
786 return buffer;
787 }
788
789 /* The fallback code to use if _Py_dg_dtoa is not available. */
790
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)791 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
792 char format_code,
793 int precision,
794 int flags,
795 int *type)
796 {
797 char format[32];
798 Py_ssize_t bufsize;
799 char *buf;
800 int t, exp;
801 int upper = 0;
802
803 /* Validate format_code, and map upper and lower case */
804 switch (format_code) {
805 case 'e': /* exponent */
806 case 'f': /* fixed */
807 case 'g': /* general */
808 break;
809 case 'E':
810 upper = 1;
811 format_code = 'e';
812 break;
813 case 'F':
814 upper = 1;
815 format_code = 'f';
816 break;
817 case 'G':
818 upper = 1;
819 format_code = 'g';
820 break;
821 case 'r': /* repr format */
822 /* Supplied precision is unused, must be 0. */
823 if (precision != 0) {
824 PyErr_BadInternalCall();
825 return NULL;
826 }
827 /* The repr() precision (17 significant decimal digits) is the
828 minimal number that is guaranteed to have enough precision
829 so that if the number is read back in the exact same binary
830 value is recreated. This is true for IEEE floating point
831 by design, and also happens to work for all other modern
832 hardware. */
833 precision = 17;
834 format_code = 'g';
835 break;
836 default:
837 PyErr_BadInternalCall();
838 return NULL;
839 }
840
841 /* Here's a quick-and-dirty calculation to figure out how big a buffer
842 we need. In general, for a finite float we need:
843
844 1 byte for each digit of the decimal significand, and
845
846 1 for a possible sign
847 1 for a possible decimal point
848 2 for a possible [eE][+-]
849 1 for each digit of the exponent; if we allow 19 digits
850 total then we're safe up to exponents of 2**63.
851 1 for the trailing nul byte
852
853 This gives a total of 24 + the number of digits in the significand,
854 and the number of digits in the significand is:
855
856 for 'g' format: at most precision, except possibly
857 when precision == 0, when it's 1.
858 for 'e' format: precision+1
859 for 'f' format: precision digits after the point, at least 1
860 before. To figure out how many digits appear before the point
861 we have to examine the size of the number. If fabs(val) < 1.0
862 then there will be only one digit before the point. If
863 fabs(val) >= 1.0, then there are at most
864
865 1+floor(log10(ceiling(fabs(val))))
866
867 digits before the point (where the 'ceiling' allows for the
868 possibility that the rounding rounds the integer part of val
869 up). A safe upper bound for the above quantity is
870 1+floor(exp/3), where exp is the unique integer such that 0.5
871 <= fabs(val)/2**exp < 1.0. This exp can be obtained from
872 frexp.
873
874 So we allow room for precision+1 digits for all formats, plus an
875 extra floor(exp/3) digits for 'f' format.
876
877 */
878
879 if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
880 /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
881 bufsize = 5;
882 else {
883 bufsize = 25 + precision;
884 if (format_code == 'f' && fabs(val) >= 1.0) {
885 frexp(val, &exp);
886 bufsize += exp/3;
887 }
888 }
889
890 buf = PyMem_Malloc(bufsize);
891 if (buf == NULL) {
892 PyErr_NoMemory();
893 return NULL;
894 }
895
896 /* Handle nan and inf. */
897 if (Py_IS_NAN(val)) {
898 strcpy(buf, "nan");
899 t = Py_DTST_NAN;
900 } else if (Py_IS_INFINITY(val)) {
901 if (copysign(1., val) == 1.)
902 strcpy(buf, "inf");
903 else
904 strcpy(buf, "-inf");
905 t = Py_DTST_INFINITE;
906 } else {
907 t = Py_DTST_FINITE;
908 if (flags & Py_DTSF_ADD_DOT_0)
909 format_code = 'Z';
910
911 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
912 (flags & Py_DTSF_ALT ? "#" : ""), precision,
913 format_code);
914 _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
915 }
916
917 /* Add sign when requested. It's convenient (esp. when formatting
918 complex numbers) to include a sign even for inf and nan. */
919 if (flags & Py_DTSF_SIGN && buf[0] != '-') {
920 size_t len = strlen(buf);
921 /* the bufsize calculations above should ensure that we've got
922 space to add a sign */
923 assert((size_t)bufsize >= len+2);
924 memmove(buf+1, buf, len+1);
925 buf[0] = '+';
926 }
927 if (upper) {
928 /* Convert to upper case. */
929 char *p1;
930 for (p1 = buf; *p1; p1++)
931 *p1 = Py_TOUPPER(*p1);
932 }
933
934 if (type)
935 *type = t;
936 return buf;
937 }
938
939 #else
940
941 /* _Py_dg_dtoa is available. */
942
943 /* I'm using a lookup table here so that I don't have to invent a non-locale
944 specific way to convert to uppercase */
945 #define OFS_INF 0
946 #define OFS_NAN 1
947 #define OFS_E 2
948
949 /* The lengths of these are known to the code below, so don't change them */
950 static const char * const lc_float_strings[] = {
951 "inf",
952 "nan",
953 "e",
954 };
955 static const char * const uc_float_strings[] = {
956 "INF",
957 "NAN",
958 "E",
959 };
960
961
962 /* Convert a double d to a string, and return a PyMem_Malloc'd block of
963 memory contain the resulting string.
964
965 Arguments:
966 d is the double to be converted
967 format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
968 correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
969 mode is one of '0', '2' or '3', and is completely determined by
970 format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
971 precision is the desired precision
972 always_add_sign is nonzero if a '+' sign should be included for positive
973 numbers
974 add_dot_0_if_integer is nonzero if integers in non-exponential form
975 should have ".0" added. Only applies to format codes 'r' and 'g'.
976 use_alt_formatting is nonzero if alternative formatting should be
977 used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
978 at most one of use_alt_formatting and add_dot_0_if_integer should
979 be nonzero.
980 type, if non-NULL, will be set to one of these constants to identify
981 the type of the 'd' argument:
982 Py_DTST_FINITE
983 Py_DTST_INFINITE
984 Py_DTST_NAN
985
986 Returns a PyMem_Malloc'd block of memory containing the resulting string,
987 or NULL on error. If NULL is returned, the Python error has been set.
988 */
989
990 static char *
format_float_short(double d,char format_code,int mode,int precision,int always_add_sign,int add_dot_0_if_integer,int use_alt_formatting,const char * const * float_strings,int * type)991 format_float_short(double d, char format_code,
992 int mode, int precision,
993 int always_add_sign, int add_dot_0_if_integer,
994 int use_alt_formatting, const char * const *float_strings,
995 int *type)
996 {
997 char *buf = NULL;
998 char *p = NULL;
999 Py_ssize_t bufsize = 0;
1000 char *digits, *digits_end;
1001 int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1002 Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1003 _Py_SET_53BIT_PRECISION_HEADER;
1004
1005 /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1006 Must be matched by a call to _Py_dg_freedtoa. */
1007 _Py_SET_53BIT_PRECISION_START;
1008 digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1009 &digits_end);
1010 _Py_SET_53BIT_PRECISION_END;
1011
1012 decpt = (Py_ssize_t)decpt_as_int;
1013 if (digits == NULL) {
1014 /* The only failure mode is no memory. */
1015 PyErr_NoMemory();
1016 goto exit;
1017 }
1018 assert(digits_end != NULL && digits_end >= digits);
1019 digits_len = digits_end - digits;
1020
1021 if (digits_len && !Py_ISDIGIT(digits[0])) {
1022 /* Infinities and nans here; adapt Gay's output,
1023 so convert Infinity to inf and NaN to nan, and
1024 ignore sign of nan. Then return. */
1025
1026 /* ignore the actual sign of a nan */
1027 if (digits[0] == 'n' || digits[0] == 'N')
1028 sign = 0;
1029
1030 /* We only need 5 bytes to hold the result "+inf\0" . */
1031 bufsize = 5; /* Used later in an assert. */
1032 buf = (char *)PyMem_Malloc(bufsize);
1033 if (buf == NULL) {
1034 PyErr_NoMemory();
1035 goto exit;
1036 }
1037 p = buf;
1038
1039 if (sign == 1) {
1040 *p++ = '-';
1041 }
1042 else if (always_add_sign) {
1043 *p++ = '+';
1044 }
1045 if (digits[0] == 'i' || digits[0] == 'I') {
1046 strncpy(p, float_strings[OFS_INF], 3);
1047 p += 3;
1048
1049 if (type)
1050 *type = Py_DTST_INFINITE;
1051 }
1052 else if (digits[0] == 'n' || digits[0] == 'N') {
1053 strncpy(p, float_strings[OFS_NAN], 3);
1054 p += 3;
1055
1056 if (type)
1057 *type = Py_DTST_NAN;
1058 }
1059 else {
1060 /* shouldn't get here: Gay's code should always return
1061 something starting with a digit, an 'I', or 'N' */
1062 strncpy(p, "ERR", 3);
1063 /* p += 3; */
1064 assert(0);
1065 }
1066 goto exit;
1067 }
1068
1069 /* The result must be finite (not inf or nan). */
1070 if (type)
1071 *type = Py_DTST_FINITE;
1072
1073
1074 /* We got digits back, format them. We may need to pad 'digits'
1075 either on the left or right (or both) with extra zeros, so in
1076 general the resulting string has the form
1077
1078 [<sign>]<zeros><digits><zeros>[<exponent>]
1079
1080 where either of the <zeros> pieces could be empty, and there's a
1081 decimal point that could appear either in <digits> or in the
1082 leading or trailing <zeros>.
1083
1084 Imagine an infinite 'virtual' string vdigits, consisting of the
1085 string 'digits' (starting at index 0) padded on both the left and
1086 right with infinite strings of zeros. We want to output a slice
1087
1088 vdigits[vdigits_start : vdigits_end]
1089
1090 of this virtual string. Thus if vdigits_start < 0 then we'll end
1091 up producing some leading zeros; if vdigits_end > digits_len there
1092 will be trailing zeros in the output. The next section of code
1093 determines whether to use an exponent or not, figures out the
1094 position 'decpt' of the decimal point, and computes 'vdigits_start'
1095 and 'vdigits_end'. */
1096 vdigits_end = digits_len;
1097 switch (format_code) {
1098 case 'e':
1099 use_exp = 1;
1100 vdigits_end = precision;
1101 break;
1102 case 'f':
1103 vdigits_end = decpt + precision;
1104 break;
1105 case 'g':
1106 if (decpt <= -4 || decpt >
1107 (add_dot_0_if_integer ? precision-1 : precision))
1108 use_exp = 1;
1109 if (use_alt_formatting)
1110 vdigits_end = precision;
1111 break;
1112 case 'r':
1113 /* convert to exponential format at 1e16. We used to convert
1114 at 1e17, but that gives odd-looking results for some values
1115 when a 16-digit 'shortest' repr is padded with bogus zeros.
1116 For example, repr(2e16+8) would give 20000000000000010.0;
1117 the true value is 20000000000000008.0. */
1118 if (decpt <= -4 || decpt > 16)
1119 use_exp = 1;
1120 break;
1121 default:
1122 PyErr_BadInternalCall();
1123 goto exit;
1124 }
1125
1126 /* if using an exponent, reset decimal point position to 1 and adjust
1127 exponent accordingly.*/
1128 if (use_exp) {
1129 exp = (int)decpt - 1;
1130 decpt = 1;
1131 }
1132 /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1133 decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1134 vdigits_start = decpt <= 0 ? decpt-1 : 0;
1135 if (!use_exp && add_dot_0_if_integer)
1136 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1137 else
1138 vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1139
1140 /* double check inequalities */
1141 assert(vdigits_start <= 0 &&
1142 0 <= digits_len &&
1143 digits_len <= vdigits_end);
1144 /* decimal point should be in (vdigits_start, vdigits_end] */
1145 assert(vdigits_start < decpt && decpt <= vdigits_end);
1146
1147 /* Compute an upper bound how much memory we need. This might be a few
1148 chars too long, but no big deal. */
1149 bufsize =
1150 /* sign, decimal point and trailing 0 byte */
1151 3 +
1152
1153 /* total digit count (including zero padding on both sides) */
1154 (vdigits_end - vdigits_start) +
1155
1156 /* exponent "e+100", max 3 numerical digits */
1157 (use_exp ? 5 : 0);
1158
1159 /* Now allocate the memory and initialize p to point to the start of
1160 it. */
1161 buf = (char *)PyMem_Malloc(bufsize);
1162 if (buf == NULL) {
1163 PyErr_NoMemory();
1164 goto exit;
1165 }
1166 p = buf;
1167
1168 /* Add a negative sign if negative, and a plus sign if non-negative
1169 and always_add_sign is true. */
1170 if (sign == 1)
1171 *p++ = '-';
1172 else if (always_add_sign)
1173 *p++ = '+';
1174
1175 /* note that exactly one of the three 'if' conditions is true,
1176 so we include exactly one decimal point */
1177 /* Zero padding on left of digit string */
1178 if (decpt <= 0) {
1179 memset(p, '0', decpt-vdigits_start);
1180 p += decpt - vdigits_start;
1181 *p++ = '.';
1182 memset(p, '0', 0-decpt);
1183 p += 0-decpt;
1184 }
1185 else {
1186 memset(p, '0', 0-vdigits_start);
1187 p += 0 - vdigits_start;
1188 }
1189
1190 /* Digits, with included decimal point */
1191 if (0 < decpt && decpt <= digits_len) {
1192 strncpy(p, digits, decpt-0);
1193 p += decpt-0;
1194 *p++ = '.';
1195 strncpy(p, digits+decpt, digits_len-decpt);
1196 p += digits_len-decpt;
1197 }
1198 else {
1199 strncpy(p, digits, digits_len);
1200 p += digits_len;
1201 }
1202
1203 /* And zeros on the right */
1204 if (digits_len < decpt) {
1205 memset(p, '0', decpt-digits_len);
1206 p += decpt-digits_len;
1207 *p++ = '.';
1208 memset(p, '0', vdigits_end-decpt);
1209 p += vdigits_end-decpt;
1210 }
1211 else {
1212 memset(p, '0', vdigits_end-digits_len);
1213 p += vdigits_end-digits_len;
1214 }
1215
1216 /* Delete a trailing decimal pt unless using alternative formatting. */
1217 if (p[-1] == '.' && !use_alt_formatting)
1218 p--;
1219
1220 /* Now that we've done zero padding, add an exponent if needed. */
1221 if (use_exp) {
1222 *p++ = float_strings[OFS_E][0];
1223 exp_len = sprintf(p, "%+.02d", exp);
1224 p += exp_len;
1225 }
1226 exit:
1227 if (buf) {
1228 *p = '\0';
1229 /* It's too late if this fails, as we've already stepped on
1230 memory that isn't ours. But it's an okay debugging test. */
1231 assert(p-buf < bufsize);
1232 }
1233 if (digits)
1234 _Py_dg_freedtoa(digits);
1235
1236 return buf;
1237 }
1238
1239
PyOS_double_to_string(double val,char format_code,int precision,int flags,int * type)1240 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1241 char format_code,
1242 int precision,
1243 int flags,
1244 int *type)
1245 {
1246 const char * const *float_strings = lc_float_strings;
1247 int mode;
1248
1249 /* Validate format_code, and map upper and lower case. Compute the
1250 mode and make any adjustments as needed. */
1251 switch (format_code) {
1252 /* exponent */
1253 case 'E':
1254 float_strings = uc_float_strings;
1255 format_code = 'e';
1256 /* Fall through. */
1257 case 'e':
1258 mode = 2;
1259 precision++;
1260 break;
1261
1262 /* fixed */
1263 case 'F':
1264 float_strings = uc_float_strings;
1265 format_code = 'f';
1266 /* Fall through. */
1267 case 'f':
1268 mode = 3;
1269 break;
1270
1271 /* general */
1272 case 'G':
1273 float_strings = uc_float_strings;
1274 format_code = 'g';
1275 /* Fall through. */
1276 case 'g':
1277 mode = 2;
1278 /* precision 0 makes no sense for 'g' format; interpret as 1 */
1279 if (precision == 0)
1280 precision = 1;
1281 break;
1282
1283 /* repr format */
1284 case 'r':
1285 mode = 0;
1286 /* Supplied precision is unused, must be 0. */
1287 if (precision != 0) {
1288 PyErr_BadInternalCall();
1289 return NULL;
1290 }
1291 break;
1292
1293 default:
1294 PyErr_BadInternalCall();
1295 return NULL;
1296 }
1297
1298 return format_float_short(val, format_code, mode, precision,
1299 flags & Py_DTSF_SIGN,
1300 flags & Py_DTSF_ADD_DOT_0,
1301 flags & Py_DTSF_ALT,
1302 float_strings, type);
1303 }
1304 #endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
1305