1 /* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5 #include "Python.h"
6 #include "pycore_fileutils.h"
7 #include <locale.h>
8
9 /* Raises an exception about an unknown presentation type for this
10 * type. */
11
12 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)13 unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15 {
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29 }
30
31 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)32 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33 {
34 assert(specifier == ',' || specifier == '_');
35 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
37 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
39 else
40 PyErr_Format(PyExc_ValueError,
41 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
43 }
44
45 static void
invalid_comma_and_underscore(void)46 invalid_comma_and_underscore(void)
47 {
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49 }
50
51 /*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57 */
58 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)59 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60 Py_ssize_t *result)
61 {
62 Py_ssize_t accumulator, digitval, pos = *ppos;
63 int numdigits;
64 int kind = PyUnicode_KIND(str);
65 const void *data = PyUnicode_DATA(str);
66
67 accumulator = numdigits = 0;
68 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70 if (digitval < 0)
71 break;
72 /*
73 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77 */
78 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
81 *ppos = pos;
82 return -1;
83 }
84 accumulator = accumulator * 10 + digitval;
85 }
86 *ppos = pos;
87 *result = accumulator;
88 return numdigits;
89 }
90
91 /************************************************************************/
92 /*********** standard format specifier parsing **************************/
93 /************************************************************************/
94
95 /* returns true if this character is a specifier alignment token */
96 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)97 is_alignment_token(Py_UCS4 c)
98 {
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105 }
106
107 /* returns true if this character is a sign element */
108 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)109 is_sign_element(Py_UCS4 c)
110 {
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117 }
118
119 /* Locale type codes. LT_NO_LOCALE must be zero. */
120 enum LocaleType {
121 LT_NO_LOCALE = 0,
122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126 };
127
128 typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
134 enum LocaleType thousands_separators;
135 Py_ssize_t precision;
136 Py_UCS4 type;
137 } InternalFormatSpec;
138
139 #if 0
140 /* Occasionally useful for debugging. Should normally be commented out. */
141 static void
142 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143 {
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154 }
155 #endif
156
157
158 /*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163 */
164 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)165 parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170 {
171 Py_ssize_t pos = start;
172 int kind = PyUnicode_KIND(format_spec);
173 const void *data = PyUnicode_DATA(format_spec);
174 /* end-pos is used throughout this code to specify the length of
175 the input string */
176 #define READ_spec(index) PyUnicode_READ(kind, data, index)
177
178 Py_ssize_t consumed;
179 int align_specified = 0;
180 int fill_char_specified = 0;
181
182 format->fill_char = ' ';
183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
187 format->thousands_separators = LT_NO_LOCALE;
188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
196 fill_char_specified = 1;
197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221 format->fill_char = '0';
222 if (!align_specified) {
223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
242 format->thousands_separators = LT_DEFAULT_LOCALE;
243 ++pos;
244 }
245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
247 if (format->thousands_separators != LT_NO_LOCALE) {
248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
255 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
256 invalid_comma_and_underscore();
257 return 0;
258 }
259 }
260
261 /* Parse field precision */
262 if (end-pos && READ_spec(pos) == '.') {
263 ++pos;
264
265 consumed = get_integer(format_spec, &pos, end, &format->precision);
266 if (consumed == -1)
267 /* Overflow error. Exception already set. */
268 return 0;
269
270 /* Not having a precision after a dot is an error. */
271 if (consumed == 0) {
272 PyErr_Format(PyExc_ValueError,
273 "Format specifier missing precision");
274 return 0;
275 }
276
277 }
278
279 /* Finally, parse the type field. */
280
281 if (end-pos > 1) {
282 /* More than one char remain, invalid format specifier. */
283 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
284 return 0;
285 }
286
287 if (end-pos == 1) {
288 format->type = READ_spec(pos);
289 ++pos;
290 }
291
292 /* Do as much validating as we can, just by looking at the format
293 specifier. Do not take into account what type of formatting
294 we're doing (int, float, string). */
295
296 if (format->thousands_separators) {
297 switch (format->type) {
298 case 'd':
299 case 'e':
300 case 'f':
301 case 'g':
302 case 'E':
303 case 'G':
304 case '%':
305 case 'F':
306 case '\0':
307 /* These are allowed. See PEP 378.*/
308 break;
309 case 'b':
310 case 'o':
311 case 'x':
312 case 'X':
313 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
314 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
315 /* Every four digits, not every three, in bin/oct/hex. */
316 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
317 break;
318 }
319 /* fall through */
320 default:
321 invalid_thousands_separator_type(format->thousands_separators, format->type);
322 return 0;
323 }
324 }
325
326 assert (format->align <= 127);
327 assert (format->sign <= 127);
328 return 1;
329 }
330
331 /* Calculate the padding needed. */
332 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)333 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
334 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
335 Py_ssize_t *n_total)
336 {
337 if (width >= 0) {
338 if (nchars > width)
339 *n_total = nchars;
340 else
341 *n_total = width;
342 }
343 else {
344 /* not specified, use all of the chars and no more */
345 *n_total = nchars;
346 }
347
348 /* Figure out how much leading space we need, based on the
349 aligning */
350 if (align == '>')
351 *n_lpadding = *n_total - nchars;
352 else if (align == '^')
353 *n_lpadding = (*n_total - nchars) / 2;
354 else if (align == '<' || align == '=')
355 *n_lpadding = 0;
356 else {
357 /* We should never have an unspecified alignment. */
358 Py_UNREACHABLE();
359 }
360
361 *n_rpadding = *n_total - nchars - *n_lpadding;
362 }
363
364 /* Do the padding, and return a pointer to where the caller-supplied
365 content goes. */
366 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)367 fill_padding(_PyUnicodeWriter *writer,
368 Py_ssize_t nchars,
369 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
370 Py_ssize_t n_rpadding)
371 {
372 Py_ssize_t pos;
373
374 /* Pad on left. */
375 if (n_lpadding) {
376 pos = writer->pos;
377 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
378 }
379
380 /* Pad on right. */
381 if (n_rpadding) {
382 pos = writer->pos + nchars + n_lpadding;
383 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
384 }
385
386 /* Pointer to the user content. */
387 writer->pos += n_lpadding;
388 return 0;
389 }
390
391 /************************************************************************/
392 /*********** common routines for numeric formatting *********************/
393 /************************************************************************/
394
395 /* Locale info needed for formatting integers and the part of floats
396 before and including the decimal. Note that locales only support
397 8-bit chars, not unicode. */
398 typedef struct {
399 PyObject *decimal_point;
400 PyObject *thousands_sep;
401 const char *grouping;
402 char *grouping_buffer;
403 } LocaleInfo;
404
405 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
406
407 /* describes the layout for an integer, see the comment in
408 calc_number_widths() for details */
409 typedef struct {
410 Py_ssize_t n_lpadding;
411 Py_ssize_t n_prefix;
412 Py_ssize_t n_spadding;
413 Py_ssize_t n_rpadding;
414 char sign;
415 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
416 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
417 any grouping chars. */
418 Py_ssize_t n_decimal; /* 0 if only an integer */
419 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
420 excluding the decimal itself, if
421 present. */
422
423 /* These 2 are not the widths of fields, but are needed by
424 STRINGLIB_GROUPING. */
425 Py_ssize_t n_digits; /* The number of digits before a decimal
426 or exponent. */
427 Py_ssize_t n_min_width; /* The min_width we used when we computed
428 the n_grouped_digits width. */
429 } NumberFieldWidths;
430
431
432 /* Given a number of the form:
433 digits[remainder]
434 where ptr points to the start and end points to the end, find where
435 the integer part ends. This could be a decimal, an exponent, both,
436 or neither.
437 If a decimal point is present, set *has_decimal and increment
438 remainder beyond it.
439 Results are undefined (but shouldn't crash) for improperly
440 formatted strings.
441 */
442 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)443 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
444 Py_ssize_t *n_remainder, int *has_decimal)
445 {
446 Py_ssize_t remainder;
447 int kind = PyUnicode_KIND(s);
448 const void *data = PyUnicode_DATA(s);
449
450 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
451 ++pos;
452 remainder = pos;
453
454 /* Does remainder start with a decimal point? */
455 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
456
457 /* Skip the decimal point. */
458 if (*has_decimal)
459 remainder++;
460
461 *n_remainder = end - remainder;
462 }
463
464 /* not all fields of format are used. for example, precision is
465 unused. should this take discrete params in order to be more clear
466 about what it does? or is passing a single format parameter easier
467 and more efficient enough to justify a little obfuscation?
468 Return -1 on error. */
469 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)470 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
471 Py_UCS4 sign_char, Py_ssize_t n_start,
472 Py_ssize_t n_end, Py_ssize_t n_remainder,
473 int has_decimal, const LocaleInfo *locale,
474 const InternalFormatSpec *format, Py_UCS4 *maxchar)
475 {
476 Py_ssize_t n_non_digit_non_padding;
477 Py_ssize_t n_padding;
478
479 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
480 spec->n_lpadding = 0;
481 spec->n_prefix = n_prefix;
482 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
483 spec->n_remainder = n_remainder;
484 spec->n_spadding = 0;
485 spec->n_rpadding = 0;
486 spec->sign = '\0';
487 spec->n_sign = 0;
488
489 /* the output will look like:
490 | |
491 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
492 | |
493
494 sign is computed from format->sign and the actual
495 sign of the number
496
497 prefix is given (it's for the '0x' prefix)
498
499 digits is already known
500
501 the total width is either given, or computed from the
502 actual digits
503
504 only one of lpadding, spadding, and rpadding can be non-zero,
505 and it's calculated from the width and other fields
506 */
507
508 /* compute the various parts we're going to write */
509 switch (format->sign) {
510 case '+':
511 /* always put a + or - */
512 spec->n_sign = 1;
513 spec->sign = (sign_char == '-' ? '-' : '+');
514 break;
515 case ' ':
516 spec->n_sign = 1;
517 spec->sign = (sign_char == '-' ? '-' : ' ');
518 break;
519 default:
520 /* Not specified, or the default (-) */
521 if (sign_char == '-') {
522 spec->n_sign = 1;
523 spec->sign = '-';
524 }
525 }
526
527 /* The number of chars used for non-digits and non-padding. */
528 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
529 spec->n_remainder;
530
531 /* min_width can go negative, that's okay. format->width == -1 means
532 we don't care. */
533 if (format->fill_char == '0' && format->align == '=')
534 spec->n_min_width = format->width - n_non_digit_non_padding;
535 else
536 spec->n_min_width = 0;
537
538 if (spec->n_digits == 0)
539 /* This case only occurs when using 'c' formatting, we need
540 to special case it because the grouping code always wants
541 to have at least one character. */
542 spec->n_grouped_digits = 0;
543 else {
544 Py_UCS4 grouping_maxchar;
545 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
546 NULL, 0,
547 NULL, 0, spec->n_digits,
548 spec->n_min_width,
549 locale->grouping, locale->thousands_sep, &grouping_maxchar);
550 if (spec->n_grouped_digits == -1) {
551 return -1;
552 }
553 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
554 }
555
556 /* Given the desired width and the total of digit and non-digit
557 space we consume, see if we need any padding. format->width can
558 be negative (meaning no padding), but this code still works in
559 that case. */
560 n_padding = format->width -
561 (n_non_digit_non_padding + spec->n_grouped_digits);
562 if (n_padding > 0) {
563 /* Some padding is needed. Determine if it's left, space, or right. */
564 switch (format->align) {
565 case '<':
566 spec->n_rpadding = n_padding;
567 break;
568 case '^':
569 spec->n_lpadding = n_padding / 2;
570 spec->n_rpadding = n_padding - spec->n_lpadding;
571 break;
572 case '=':
573 spec->n_spadding = n_padding;
574 break;
575 case '>':
576 spec->n_lpadding = n_padding;
577 break;
578 default:
579 /* Shouldn't get here */
580 Py_UNREACHABLE();
581 }
582 }
583
584 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
585 *maxchar = Py_MAX(*maxchar, format->fill_char);
586
587 if (spec->n_decimal)
588 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
589
590 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
591 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
592 spec->n_remainder + spec->n_rpadding;
593 }
594
595 /* Fill in the digit parts of a number's string representation,
596 as determined in calc_number_widths().
597 Return -1 on error, or 0 on success. */
598 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)599 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
600 PyObject *digits, Py_ssize_t d_start,
601 PyObject *prefix, Py_ssize_t p_start,
602 Py_UCS4 fill_char,
603 LocaleInfo *locale, int toupper)
604 {
605 /* Used to keep track of digits, decimal, and remainder. */
606 Py_ssize_t d_pos = d_start;
607 const unsigned int kind = writer->kind;
608 const void *data = writer->data;
609 Py_ssize_t r;
610
611 if (spec->n_lpadding) {
612 _PyUnicode_FastFill(writer->buffer,
613 writer->pos, spec->n_lpadding, fill_char);
614 writer->pos += spec->n_lpadding;
615 }
616 if (spec->n_sign == 1) {
617 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
618 writer->pos++;
619 }
620 if (spec->n_prefix) {
621 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
622 prefix, p_start,
623 spec->n_prefix);
624 if (toupper) {
625 Py_ssize_t t;
626 for (t = 0; t < spec->n_prefix; t++) {
627 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
628 c = Py_TOUPPER(c);
629 assert (c <= 127);
630 PyUnicode_WRITE(kind, data, writer->pos + t, c);
631 }
632 }
633 writer->pos += spec->n_prefix;
634 }
635 if (spec->n_spadding) {
636 _PyUnicode_FastFill(writer->buffer,
637 writer->pos, spec->n_spadding, fill_char);
638 writer->pos += spec->n_spadding;
639 }
640
641 /* Only for type 'c' special case, it has no digits. */
642 if (spec->n_digits != 0) {
643 /* Fill the digits with InsertThousandsGrouping. */
644 r = _PyUnicode_InsertThousandsGrouping(
645 writer, spec->n_grouped_digits,
646 digits, d_pos, spec->n_digits,
647 spec->n_min_width,
648 locale->grouping, locale->thousands_sep, NULL);
649 if (r == -1)
650 return -1;
651 assert(r == spec->n_grouped_digits);
652 d_pos += spec->n_digits;
653 }
654 if (toupper) {
655 Py_ssize_t t;
656 for (t = 0; t < spec->n_grouped_digits; t++) {
657 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
658 c = Py_TOUPPER(c);
659 if (c > 127) {
660 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
661 return -1;
662 }
663 PyUnicode_WRITE(kind, data, writer->pos + t, c);
664 }
665 }
666 writer->pos += spec->n_grouped_digits;
667
668 if (spec->n_decimal) {
669 _PyUnicode_FastCopyCharacters(
670 writer->buffer, writer->pos,
671 locale->decimal_point, 0, spec->n_decimal);
672 writer->pos += spec->n_decimal;
673 d_pos += 1;
674 }
675
676 if (spec->n_remainder) {
677 _PyUnicode_FastCopyCharacters(
678 writer->buffer, writer->pos,
679 digits, d_pos, spec->n_remainder);
680 writer->pos += spec->n_remainder;
681 /* d_pos += spec->n_remainder; */
682 }
683
684 if (spec->n_rpadding) {
685 _PyUnicode_FastFill(writer->buffer,
686 writer->pos, spec->n_rpadding,
687 fill_char);
688 writer->pos += spec->n_rpadding;
689 }
690 return 0;
691 }
692
693 static const char no_grouping[1] = {CHAR_MAX};
694
695 /* Find the decimal point character(s?), thousands_separator(s?), and
696 grouping description, either for the current locale if type is
697 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
698 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
699 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)700 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
701 {
702 switch (type) {
703 case LT_CURRENT_LOCALE: {
704 struct lconv *lc = localeconv();
705 if (_Py_GetLocaleconvNumeric(lc,
706 &locale_info->decimal_point,
707 &locale_info->thousands_sep) < 0) {
708 return -1;
709 }
710
711 /* localeconv() grouping can become a dangling pointer or point
712 to a different string if another thread calls localeconv() during
713 the string formatting. Copy the string to avoid this risk. */
714 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
715 if (locale_info->grouping_buffer == NULL) {
716 PyErr_NoMemory();
717 return -1;
718 }
719 locale_info->grouping = locale_info->grouping_buffer;
720 break;
721 }
722 case LT_DEFAULT_LOCALE:
723 case LT_UNDERSCORE_LOCALE:
724 case LT_UNDER_FOUR_LOCALE:
725 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
726 locale_info->thousands_sep = PyUnicode_FromOrdinal(
727 type == LT_DEFAULT_LOCALE ? ',' : '_');
728 if (!locale_info->decimal_point || !locale_info->thousands_sep)
729 return -1;
730 if (type != LT_UNDER_FOUR_LOCALE)
731 locale_info->grouping = "\3"; /* Group every 3 characters. The
732 (implicit) trailing 0 means repeat
733 infinitely. */
734 else
735 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
736 break;
737 case LT_NO_LOCALE:
738 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
739 locale_info->thousands_sep = PyUnicode_New(0, 0);
740 if (!locale_info->decimal_point || !locale_info->thousands_sep)
741 return -1;
742 locale_info->grouping = no_grouping;
743 break;
744 }
745 return 0;
746 }
747
748 static void
free_locale_info(LocaleInfo * locale_info)749 free_locale_info(LocaleInfo *locale_info)
750 {
751 Py_XDECREF(locale_info->decimal_point);
752 Py_XDECREF(locale_info->thousands_sep);
753 PyMem_Free(locale_info->grouping_buffer);
754 }
755
756 /************************************************************************/
757 /*********** string formatting ******************************************/
758 /************************************************************************/
759
760 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
762 _PyUnicodeWriter *writer)
763 {
764 Py_ssize_t lpad;
765 Py_ssize_t rpad;
766 Py_ssize_t total;
767 Py_ssize_t len;
768 int result = -1;
769 Py_UCS4 maxchar;
770
771 assert(PyUnicode_IS_READY(value));
772 len = PyUnicode_GET_LENGTH(value);
773
774 /* sign is not allowed on strings */
775 if (format->sign != '\0') {
776 PyErr_SetString(PyExc_ValueError,
777 "Sign not allowed in string format specifier");
778 goto done;
779 }
780
781 /* alternate is not allowed on strings */
782 if (format->alternate) {
783 PyErr_SetString(PyExc_ValueError,
784 "Alternate form (#) not allowed in string format "
785 "specifier");
786 goto done;
787 }
788
789 /* '=' alignment not allowed on strings */
790 if (format->align == '=') {
791 PyErr_SetString(PyExc_ValueError,
792 "'=' alignment not allowed "
793 "in string format specifier");
794 goto done;
795 }
796
797 if ((format->width == -1 || format->width <= len)
798 && (format->precision == -1 || format->precision >= len)) {
799 /* Fast path */
800 return _PyUnicodeWriter_WriteStr(writer, value);
801 }
802
803 /* if precision is specified, output no more that format.precision
804 characters */
805 if (format->precision >= 0 && len >= format->precision) {
806 len = format->precision;
807 }
808
809 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
810
811 maxchar = writer->maxchar;
812 if (lpad != 0 || rpad != 0)
813 maxchar = Py_MAX(maxchar, format->fill_char);
814 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
815 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
816 maxchar = Py_MAX(maxchar, valmaxchar);
817 }
818
819 /* allocate the resulting string */
820 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
821 goto done;
822
823 /* Write into that space. First the padding. */
824 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
825 if (result == -1)
826 goto done;
827
828 /* Then the source string. */
829 if (len) {
830 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
831 value, 0, len);
832 }
833 writer->pos += (len + rpad);
834 result = 0;
835
836 done:
837 return result;
838 }
839
840
841 /************************************************************************/
842 /*********** long formatting ********************************************/
843 /************************************************************************/
844
845 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)846 format_long_internal(PyObject *value, const InternalFormatSpec *format,
847 _PyUnicodeWriter *writer)
848 {
849 int result = -1;
850 Py_UCS4 maxchar = 127;
851 PyObject *tmp = NULL;
852 Py_ssize_t inumeric_chars;
853 Py_UCS4 sign_char = '\0';
854 Py_ssize_t n_digits; /* count of digits need from the computed
855 string */
856 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
857 produces non-digits */
858 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
859 Py_ssize_t n_total;
860 Py_ssize_t prefix = 0;
861 NumberFieldWidths spec;
862 long x;
863
864 /* Locale settings, either from the actual locale or
865 from a hard-code pseudo-locale */
866 LocaleInfo locale = LocaleInfo_STATIC_INIT;
867
868 /* no precision allowed on integers */
869 if (format->precision != -1) {
870 PyErr_SetString(PyExc_ValueError,
871 "Precision not allowed in integer format specifier");
872 goto done;
873 }
874
875 /* special case for character formatting */
876 if (format->type == 'c') {
877 /* error to specify a sign */
878 if (format->sign != '\0') {
879 PyErr_SetString(PyExc_ValueError,
880 "Sign not allowed with integer"
881 " format specifier 'c'");
882 goto done;
883 }
884 /* error to request alternate format */
885 if (format->alternate) {
886 PyErr_SetString(PyExc_ValueError,
887 "Alternate form (#) not allowed with integer"
888 " format specifier 'c'");
889 goto done;
890 }
891
892 /* taken from unicodeobject.c formatchar() */
893 /* Integer input truncated to a character */
894 x = PyLong_AsLong(value);
895 if (x == -1 && PyErr_Occurred())
896 goto done;
897 if (x < 0 || x > 0x10ffff) {
898 PyErr_SetString(PyExc_OverflowError,
899 "%c arg not in range(0x110000)");
900 goto done;
901 }
902 tmp = PyUnicode_FromOrdinal(x);
903 inumeric_chars = 0;
904 n_digits = 1;
905 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
906
907 /* As a sort-of hack, we tell calc_number_widths that we only
908 have "remainder" characters. calc_number_widths thinks
909 these are characters that don't get formatted, only copied
910 into the output string. We do this for 'c' formatting,
911 because the characters are likely to be non-digits. */
912 n_remainder = 1;
913 }
914 else {
915 int base;
916 int leading_chars_to_skip = 0; /* Number of characters added by
917 PyNumber_ToBase that we want to
918 skip over. */
919
920 /* Compute the base and how many characters will be added by
921 PyNumber_ToBase */
922 switch (format->type) {
923 case 'b':
924 base = 2;
925 leading_chars_to_skip = 2; /* 0b */
926 break;
927 case 'o':
928 base = 8;
929 leading_chars_to_skip = 2; /* 0o */
930 break;
931 case 'x':
932 case 'X':
933 base = 16;
934 leading_chars_to_skip = 2; /* 0x */
935 break;
936 default: /* shouldn't be needed, but stops a compiler warning */
937 case 'd':
938 case 'n':
939 base = 10;
940 break;
941 }
942
943 if (format->sign != '+' && format->sign != ' '
944 && format->width == -1
945 && format->type != 'X' && format->type != 'n'
946 && !format->thousands_separators
947 && PyLong_CheckExact(value))
948 {
949 /* Fast path */
950 return _PyLong_FormatWriter(writer, value, base, format->alternate);
951 }
952
953 /* The number of prefix chars is the same as the leading
954 chars to skip */
955 if (format->alternate)
956 n_prefix = leading_chars_to_skip;
957
958 /* Do the hard part, converting to a string in a given base */
959 tmp = _PyLong_Format(value, base);
960 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
961 goto done;
962
963 inumeric_chars = 0;
964 n_digits = PyUnicode_GET_LENGTH(tmp);
965
966 prefix = inumeric_chars;
967
968 /* Is a sign character present in the output? If so, remember it
969 and skip it */
970 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
971 sign_char = '-';
972 ++prefix;
973 ++leading_chars_to_skip;
974 }
975
976 /* Skip over the leading chars (0x, 0b, etc.) */
977 n_digits -= leading_chars_to_skip;
978 inumeric_chars += leading_chars_to_skip;
979 }
980
981 /* Determine the grouping, separator, and decimal point, if any. */
982 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
983 format->thousands_separators,
984 &locale) == -1)
985 goto done;
986
987 /* Calculate how much memory we'll need. */
988 n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
989 inumeric_chars + n_digits, n_remainder, 0,
990 &locale, format, &maxchar);
991 if (n_total == -1) {
992 goto done;
993 }
994
995 /* Allocate the memory. */
996 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
997 goto done;
998
999 /* Populate the memory. */
1000 result = fill_number(writer, &spec,
1001 tmp, inumeric_chars,
1002 tmp, prefix, format->fill_char,
1003 &locale, format->type == 'X');
1004
1005 done:
1006 Py_XDECREF(tmp);
1007 free_locale_info(&locale);
1008 return result;
1009 }
1010
1011 /************************************************************************/
1012 /*********** float formatting *******************************************/
1013 /************************************************************************/
1014
1015 /* much of this is taken from unicodeobject.c */
1016 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1017 format_float_internal(PyObject *value,
1018 const InternalFormatSpec *format,
1019 _PyUnicodeWriter *writer)
1020 {
1021 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1022 Py_ssize_t n_digits;
1023 Py_ssize_t n_remainder;
1024 Py_ssize_t n_total;
1025 int has_decimal;
1026 double val;
1027 int precision, default_precision = 6;
1028 Py_UCS4 type = format->type;
1029 int add_pct = 0;
1030 Py_ssize_t index;
1031 NumberFieldWidths spec;
1032 int flags = 0;
1033 int result = -1;
1034 Py_UCS4 maxchar = 127;
1035 Py_UCS4 sign_char = '\0';
1036 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1037 PyObject *unicode_tmp = NULL;
1038
1039 /* Locale settings, either from the actual locale or
1040 from a hard-code pseudo-locale */
1041 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1042
1043 if (format->precision > INT_MAX) {
1044 PyErr_SetString(PyExc_ValueError, "precision too big");
1045 goto done;
1046 }
1047 precision = (int)format->precision;
1048
1049 if (format->alternate)
1050 flags |= Py_DTSF_ALT;
1051
1052 if (type == '\0') {
1053 /* Omitted type specifier. Behaves in the same way as repr(x)
1054 and str(x) if no precision is given, else like 'g', but with
1055 at least one digit after the decimal point. */
1056 flags |= Py_DTSF_ADD_DOT_0;
1057 type = 'r';
1058 default_precision = 0;
1059 }
1060
1061 if (type == 'n')
1062 /* 'n' is the same as 'g', except for the locale used to
1063 format the result. We take care of that later. */
1064 type = 'g';
1065
1066 val = PyFloat_AsDouble(value);
1067 if (val == -1.0 && PyErr_Occurred())
1068 goto done;
1069
1070 if (type == '%') {
1071 type = 'f';
1072 val *= 100;
1073 add_pct = 1;
1074 }
1075
1076 if (precision < 0)
1077 precision = default_precision;
1078 else if (type == 'r')
1079 type = 'g';
1080
1081 /* Cast "type", because if we're in unicode we need to pass an
1082 8-bit char. This is safe, because we've restricted what "type"
1083 can be. */
1084 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1085 &float_type);
1086 if (buf == NULL)
1087 goto done;
1088 n_digits = strlen(buf);
1089
1090 if (add_pct) {
1091 /* We know that buf has a trailing zero (since we just called
1092 strlen() on it), and we don't use that fact any more. So we
1093 can just write over the trailing zero. */
1094 buf[n_digits] = '%';
1095 n_digits += 1;
1096 }
1097
1098 if (format->sign != '+' && format->sign != ' '
1099 && format->width == -1
1100 && format->type != 'n'
1101 && !format->thousands_separators)
1102 {
1103 /* Fast path */
1104 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1105 PyMem_Free(buf);
1106 return result;
1107 }
1108
1109 /* Since there is no unicode version of PyOS_double_to_string,
1110 just use the 8 bit version and then convert to unicode. */
1111 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1112 PyMem_Free(buf);
1113 if (unicode_tmp == NULL)
1114 goto done;
1115
1116 /* Is a sign character present in the output? If so, remember it
1117 and skip it */
1118 index = 0;
1119 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1120 sign_char = '-';
1121 ++index;
1122 --n_digits;
1123 }
1124
1125 /* Determine if we have any "remainder" (after the digits, might include
1126 decimal or exponent or both (or neither)) */
1127 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1128
1129 /* Determine the grouping, separator, and decimal point, if any. */
1130 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1131 format->thousands_separators,
1132 &locale) == -1)
1133 goto done;
1134
1135 /* Calculate how much memory we'll need. */
1136 n_total = calc_number_widths(&spec, 0, sign_char, index,
1137 index + n_digits, n_remainder, has_decimal,
1138 &locale, format, &maxchar);
1139 if (n_total == -1) {
1140 goto done;
1141 }
1142
1143 /* Allocate the memory. */
1144 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1145 goto done;
1146
1147 /* Populate the memory. */
1148 result = fill_number(writer, &spec,
1149 unicode_tmp, index,
1150 NULL, 0, format->fill_char,
1151 &locale, 0);
1152
1153 done:
1154 Py_XDECREF(unicode_tmp);
1155 free_locale_info(&locale);
1156 return result;
1157 }
1158
1159 /************************************************************************/
1160 /*********** complex formatting *****************************************/
1161 /************************************************************************/
1162
1163 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1164 format_complex_internal(PyObject *value,
1165 const InternalFormatSpec *format,
1166 _PyUnicodeWriter *writer)
1167 {
1168 double re;
1169 double im;
1170 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1171 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1172
1173 InternalFormatSpec tmp_format = *format;
1174 Py_ssize_t n_re_digits;
1175 Py_ssize_t n_im_digits;
1176 Py_ssize_t n_re_remainder;
1177 Py_ssize_t n_im_remainder;
1178 Py_ssize_t n_re_total;
1179 Py_ssize_t n_im_total;
1180 int re_has_decimal;
1181 int im_has_decimal;
1182 int precision, default_precision = 6;
1183 Py_UCS4 type = format->type;
1184 Py_ssize_t i_re;
1185 Py_ssize_t i_im;
1186 NumberFieldWidths re_spec;
1187 NumberFieldWidths im_spec;
1188 int flags = 0;
1189 int result = -1;
1190 Py_UCS4 maxchar = 127;
1191 enum PyUnicode_Kind rkind;
1192 void *rdata;
1193 Py_UCS4 re_sign_char = '\0';
1194 Py_UCS4 im_sign_char = '\0';
1195 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1196 int im_float_type;
1197 int add_parens = 0;
1198 int skip_re = 0;
1199 Py_ssize_t lpad;
1200 Py_ssize_t rpad;
1201 Py_ssize_t total;
1202 PyObject *re_unicode_tmp = NULL;
1203 PyObject *im_unicode_tmp = NULL;
1204
1205 /* Locale settings, either from the actual locale or
1206 from a hard-code pseudo-locale */
1207 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1208
1209 if (format->precision > INT_MAX) {
1210 PyErr_SetString(PyExc_ValueError, "precision too big");
1211 goto done;
1212 }
1213 precision = (int)format->precision;
1214
1215 /* Zero padding is not allowed. */
1216 if (format->fill_char == '0') {
1217 PyErr_SetString(PyExc_ValueError,
1218 "Zero padding is not allowed in complex format "
1219 "specifier");
1220 goto done;
1221 }
1222
1223 /* Neither is '=' alignment . */
1224 if (format->align == '=') {
1225 PyErr_SetString(PyExc_ValueError,
1226 "'=' alignment flag is not allowed in complex format "
1227 "specifier");
1228 goto done;
1229 }
1230
1231 re = PyComplex_RealAsDouble(value);
1232 if (re == -1.0 && PyErr_Occurred())
1233 goto done;
1234 im = PyComplex_ImagAsDouble(value);
1235 if (im == -1.0 && PyErr_Occurred())
1236 goto done;
1237
1238 if (format->alternate)
1239 flags |= Py_DTSF_ALT;
1240
1241 if (type == '\0') {
1242 /* Omitted type specifier. Should be like str(self). */
1243 type = 'r';
1244 default_precision = 0;
1245 if (re == 0.0 && copysign(1.0, re) == 1.0)
1246 skip_re = 1;
1247 else
1248 add_parens = 1;
1249 }
1250
1251 if (type == 'n')
1252 /* 'n' is the same as 'g', except for the locale used to
1253 format the result. We take care of that later. */
1254 type = 'g';
1255
1256 if (precision < 0)
1257 precision = default_precision;
1258 else if (type == 'r')
1259 type = 'g';
1260
1261 /* Cast "type", because if we're in unicode we need to pass an
1262 8-bit char. This is safe, because we've restricted what "type"
1263 can be. */
1264 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1265 &re_float_type);
1266 if (re_buf == NULL)
1267 goto done;
1268 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1269 &im_float_type);
1270 if (im_buf == NULL)
1271 goto done;
1272
1273 n_re_digits = strlen(re_buf);
1274 n_im_digits = strlen(im_buf);
1275
1276 /* Since there is no unicode version of PyOS_double_to_string,
1277 just use the 8 bit version and then convert to unicode. */
1278 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1279 if (re_unicode_tmp == NULL)
1280 goto done;
1281 i_re = 0;
1282
1283 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1284 if (im_unicode_tmp == NULL)
1285 goto done;
1286 i_im = 0;
1287
1288 /* Is a sign character present in the output? If so, remember it
1289 and skip it */
1290 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1291 re_sign_char = '-';
1292 ++i_re;
1293 --n_re_digits;
1294 }
1295 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1296 im_sign_char = '-';
1297 ++i_im;
1298 --n_im_digits;
1299 }
1300
1301 /* Determine if we have any "remainder" (after the digits, might include
1302 decimal or exponent or both (or neither)) */
1303 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1304 &n_re_remainder, &re_has_decimal);
1305 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1306 &n_im_remainder, &im_has_decimal);
1307
1308 /* Determine the grouping, separator, and decimal point, if any. */
1309 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1310 format->thousands_separators,
1311 &locale) == -1)
1312 goto done;
1313
1314 /* Turn off any padding. We'll do it later after we've composed
1315 the numbers without padding. */
1316 tmp_format.fill_char = '\0';
1317 tmp_format.align = '<';
1318 tmp_format.width = -1;
1319
1320 /* Calculate how much memory we'll need. */
1321 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1322 i_re, i_re + n_re_digits, n_re_remainder,
1323 re_has_decimal, &locale, &tmp_format,
1324 &maxchar);
1325 if (n_re_total == -1) {
1326 goto done;
1327 }
1328
1329 /* Same formatting, but always include a sign, unless the real part is
1330 * going to be omitted, in which case we use whatever sign convention was
1331 * requested by the original format. */
1332 if (!skip_re)
1333 tmp_format.sign = '+';
1334 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1335 i_im, i_im + n_im_digits, n_im_remainder,
1336 im_has_decimal, &locale, &tmp_format,
1337 &maxchar);
1338 if (n_im_total == -1) {
1339 goto done;
1340 }
1341
1342 if (skip_re)
1343 n_re_total = 0;
1344
1345 /* Add 1 for the 'j', and optionally 2 for parens. */
1346 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1347 format->width, format->align, &lpad, &rpad, &total);
1348
1349 if (lpad || rpad)
1350 maxchar = Py_MAX(maxchar, format->fill_char);
1351
1352 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1353 goto done;
1354 rkind = writer->kind;
1355 rdata = writer->data;
1356
1357 /* Populate the memory. First, the padding. */
1358 result = fill_padding(writer,
1359 n_re_total + n_im_total + 1 + add_parens * 2,
1360 format->fill_char, lpad, rpad);
1361 if (result == -1)
1362 goto done;
1363
1364 if (add_parens) {
1365 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1366 writer->pos++;
1367 }
1368
1369 if (!skip_re) {
1370 result = fill_number(writer, &re_spec,
1371 re_unicode_tmp, i_re,
1372 NULL, 0,
1373 0,
1374 &locale, 0);
1375 if (result == -1)
1376 goto done;
1377 }
1378 result = fill_number(writer, &im_spec,
1379 im_unicode_tmp, i_im,
1380 NULL, 0,
1381 0,
1382 &locale, 0);
1383 if (result == -1)
1384 goto done;
1385 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1386 writer->pos++;
1387
1388 if (add_parens) {
1389 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1390 writer->pos++;
1391 }
1392
1393 writer->pos += rpad;
1394
1395 done:
1396 PyMem_Free(re_buf);
1397 PyMem_Free(im_buf);
1398 Py_XDECREF(re_unicode_tmp);
1399 Py_XDECREF(im_unicode_tmp);
1400 free_locale_info(&locale);
1401 return result;
1402 }
1403
1404 /************************************************************************/
1405 /*********** built in formatters ****************************************/
1406 /************************************************************************/
1407 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1408 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1409 {
1410 PyObject *str;
1411 int err;
1412
1413 str = PyObject_Str(obj);
1414 if (str == NULL)
1415 return -1;
1416 err = _PyUnicodeWriter_WriteStr(writer, str);
1417 Py_DECREF(str);
1418 return err;
1419 }
1420
1421 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1422 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1423 PyObject *obj,
1424 PyObject *format_spec,
1425 Py_ssize_t start, Py_ssize_t end)
1426 {
1427 InternalFormatSpec format;
1428
1429 assert(PyUnicode_Check(obj));
1430
1431 /* check for the special case of zero length format spec, make
1432 it equivalent to str(obj) */
1433 if (start == end) {
1434 if (PyUnicode_CheckExact(obj))
1435 return _PyUnicodeWriter_WriteStr(writer, obj);
1436 else
1437 return format_obj(obj, writer);
1438 }
1439
1440 /* parse the format_spec */
1441 if (!parse_internal_render_format_spec(format_spec, start, end,
1442 &format, 's', '<'))
1443 return -1;
1444
1445 /* type conversion? */
1446 switch (format.type) {
1447 case 's':
1448 /* no type conversion needed, already a string. do the formatting */
1449 return format_string_internal(obj, &format, writer);
1450 default:
1451 /* unknown */
1452 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1453 return -1;
1454 }
1455 }
1456
1457 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1458 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1459 PyObject *obj,
1460 PyObject *format_spec,
1461 Py_ssize_t start, Py_ssize_t end)
1462 {
1463 PyObject *tmp = NULL;
1464 InternalFormatSpec format;
1465 int result = -1;
1466
1467 /* check for the special case of zero length format spec, make
1468 it equivalent to str(obj) */
1469 if (start == end) {
1470 if (PyLong_CheckExact(obj))
1471 return _PyLong_FormatWriter(writer, obj, 10, 0);
1472 else
1473 return format_obj(obj, writer);
1474 }
1475
1476 /* parse the format_spec */
1477 if (!parse_internal_render_format_spec(format_spec, start, end,
1478 &format, 'd', '>'))
1479 goto done;
1480
1481 /* type conversion? */
1482 switch (format.type) {
1483 case 'b':
1484 case 'c':
1485 case 'd':
1486 case 'o':
1487 case 'x':
1488 case 'X':
1489 case 'n':
1490 /* no type conversion needed, already an int. do the formatting */
1491 result = format_long_internal(obj, &format, writer);
1492 break;
1493
1494 case 'e':
1495 case 'E':
1496 case 'f':
1497 case 'F':
1498 case 'g':
1499 case 'G':
1500 case '%':
1501 /* convert to float */
1502 tmp = PyNumber_Float(obj);
1503 if (tmp == NULL)
1504 goto done;
1505 result = format_float_internal(tmp, &format, writer);
1506 break;
1507
1508 default:
1509 /* unknown */
1510 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1511 goto done;
1512 }
1513
1514 done:
1515 Py_XDECREF(tmp);
1516 return result;
1517 }
1518
1519 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1520 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1521 PyObject *obj,
1522 PyObject *format_spec,
1523 Py_ssize_t start, Py_ssize_t end)
1524 {
1525 InternalFormatSpec format;
1526
1527 /* check for the special case of zero length format spec, make
1528 it equivalent to str(obj) */
1529 if (start == end)
1530 return format_obj(obj, writer);
1531
1532 /* parse the format_spec */
1533 if (!parse_internal_render_format_spec(format_spec, start, end,
1534 &format, '\0', '>'))
1535 return -1;
1536
1537 /* type conversion? */
1538 switch (format.type) {
1539 case '\0': /* No format code: like 'g', but with at least one decimal. */
1540 case 'e':
1541 case 'E':
1542 case 'f':
1543 case 'F':
1544 case 'g':
1545 case 'G':
1546 case 'n':
1547 case '%':
1548 /* no conversion, already a float. do the formatting */
1549 return format_float_internal(obj, &format, writer);
1550
1551 default:
1552 /* unknown */
1553 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1554 return -1;
1555 }
1556 }
1557
1558 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1559 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1560 PyObject *obj,
1561 PyObject *format_spec,
1562 Py_ssize_t start, Py_ssize_t end)
1563 {
1564 InternalFormatSpec format;
1565
1566 /* check for the special case of zero length format spec, make
1567 it equivalent to str(obj) */
1568 if (start == end)
1569 return format_obj(obj, writer);
1570
1571 /* parse the format_spec */
1572 if (!parse_internal_render_format_spec(format_spec, start, end,
1573 &format, '\0', '>'))
1574 return -1;
1575
1576 /* type conversion? */
1577 switch (format.type) {
1578 case '\0': /* No format code: like 'g', but with at least one decimal. */
1579 case 'e':
1580 case 'E':
1581 case 'f':
1582 case 'F':
1583 case 'g':
1584 case 'G':
1585 case 'n':
1586 /* no conversion, already a complex. do the formatting */
1587 return format_complex_internal(obj, &format, writer);
1588
1589 default:
1590 /* unknown */
1591 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1592 return -1;
1593 }
1594 }
1595