1 /* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5 #include "Python.h"
6 #include "pycore_fileutils.h"
7 #include <locale.h>
8
9 /* Raises an exception about an unknown presentation type for this
10 * type. */
11
12 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)13 unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15 {
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29 }
30
31 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)32 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33 {
34 assert(specifier == ',' || specifier == '_');
35 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
37 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
39 else
40 PyErr_Format(PyExc_ValueError,
41 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
43 }
44
45 static void
invalid_comma_and_underscore(void)46 invalid_comma_and_underscore(void)
47 {
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49 }
50
51 /*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57 */
58 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)59 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60 Py_ssize_t *result)
61 {
62 Py_ssize_t accumulator, digitval, pos = *ppos;
63 int numdigits;
64 int kind = PyUnicode_KIND(str);
65 void *data = PyUnicode_DATA(str);
66
67 accumulator = numdigits = 0;
68 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70 if (digitval < 0)
71 break;
72 /*
73 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77 */
78 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
81 *ppos = pos;
82 return -1;
83 }
84 accumulator = accumulator * 10 + digitval;
85 }
86 *ppos = pos;
87 *result = accumulator;
88 return numdigits;
89 }
90
91 /************************************************************************/
92 /*********** standard format specifier parsing **************************/
93 /************************************************************************/
94
95 /* returns true if this character is a specifier alignment token */
96 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)97 is_alignment_token(Py_UCS4 c)
98 {
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105 }
106
107 /* returns true if this character is a sign element */
108 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)109 is_sign_element(Py_UCS4 c)
110 {
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117 }
118
119 /* Locale type codes. LT_NO_LOCALE must be zero. */
120 enum LocaleType {
121 LT_NO_LOCALE = 0,
122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126 };
127
128 typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
134 enum LocaleType thousands_separators;
135 Py_ssize_t precision;
136 Py_UCS4 type;
137 } InternalFormatSpec;
138
139 #if 0
140 /* Occasionally useful for debugging. Should normally be commented out. */
141 static void
142 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143 {
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154 }
155 #endif
156
157
158 /*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163 */
164 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)165 parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170 {
171 Py_ssize_t pos = start;
172 int kind = PyUnicode_KIND(format_spec);
173 void *data = PyUnicode_DATA(format_spec);
174 /* end-pos is used throughout this code to specify the length of
175 the input string */
176 #define READ_spec(index) PyUnicode_READ(kind, data, index)
177
178 Py_ssize_t consumed;
179 int align_specified = 0;
180 int fill_char_specified = 0;
181
182 format->fill_char = ' ';
183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
187 format->thousands_separators = LT_NO_LOCALE;
188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
196 fill_char_specified = 1;
197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221 format->fill_char = '0';
222 if (!align_specified) {
223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
242 format->thousands_separators = LT_DEFAULT_LOCALE;
243 ++pos;
244 }
245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
247 if (format->thousands_separators != LT_NO_LOCALE) {
248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
255 invalid_comma_and_underscore();
256 return 0;
257 }
258
259 /* Parse field precision */
260 if (end-pos && READ_spec(pos) == '.') {
261 ++pos;
262
263 consumed = get_integer(format_spec, &pos, end, &format->precision);
264 if (consumed == -1)
265 /* Overflow error. Exception already set. */
266 return 0;
267
268 /* Not having a precision after a dot is an error. */
269 if (consumed == 0) {
270 PyErr_Format(PyExc_ValueError,
271 "Format specifier missing precision");
272 return 0;
273 }
274
275 }
276
277 /* Finally, parse the type field. */
278
279 if (end-pos > 1) {
280 /* More than one char remain, invalid format specifier. */
281 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
282 return 0;
283 }
284
285 if (end-pos == 1) {
286 format->type = READ_spec(pos);
287 ++pos;
288 }
289
290 /* Do as much validating as we can, just by looking at the format
291 specifier. Do not take into account what type of formatting
292 we're doing (int, float, string). */
293
294 if (format->thousands_separators) {
295 switch (format->type) {
296 case 'd':
297 case 'e':
298 case 'f':
299 case 'g':
300 case 'E':
301 case 'G':
302 case '%':
303 case 'F':
304 case '\0':
305 /* These are allowed. See PEP 378.*/
306 break;
307 case 'b':
308 case 'o':
309 case 'x':
310 case 'X':
311 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
312 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
313 /* Every four digits, not every three, in bin/oct/hex. */
314 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
315 break;
316 }
317 /* fall through */
318 default:
319 invalid_thousands_separator_type(format->thousands_separators, format->type);
320 return 0;
321 }
322 }
323
324 assert (format->align <= 127);
325 assert (format->sign <= 127);
326 return 1;
327 }
328
329 /* Calculate the padding needed. */
330 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)331 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
332 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
333 Py_ssize_t *n_total)
334 {
335 if (width >= 0) {
336 if (nchars > width)
337 *n_total = nchars;
338 else
339 *n_total = width;
340 }
341 else {
342 /* not specified, use all of the chars and no more */
343 *n_total = nchars;
344 }
345
346 /* Figure out how much leading space we need, based on the
347 aligning */
348 if (align == '>')
349 *n_lpadding = *n_total - nchars;
350 else if (align == '^')
351 *n_lpadding = (*n_total - nchars) / 2;
352 else if (align == '<' || align == '=')
353 *n_lpadding = 0;
354 else {
355 /* We should never have an unspecified alignment. */
356 Py_UNREACHABLE();
357 }
358
359 *n_rpadding = *n_total - nchars - *n_lpadding;
360 }
361
362 /* Do the padding, and return a pointer to where the caller-supplied
363 content goes. */
364 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)365 fill_padding(_PyUnicodeWriter *writer,
366 Py_ssize_t nchars,
367 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
368 Py_ssize_t n_rpadding)
369 {
370 Py_ssize_t pos;
371
372 /* Pad on left. */
373 if (n_lpadding) {
374 pos = writer->pos;
375 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
376 }
377
378 /* Pad on right. */
379 if (n_rpadding) {
380 pos = writer->pos + nchars + n_lpadding;
381 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
382 }
383
384 /* Pointer to the user content. */
385 writer->pos += n_lpadding;
386 return 0;
387 }
388
389 /************************************************************************/
390 /*********** common routines for numeric formatting *********************/
391 /************************************************************************/
392
393 /* Locale info needed for formatting integers and the part of floats
394 before and including the decimal. Note that locales only support
395 8-bit chars, not unicode. */
396 typedef struct {
397 PyObject *decimal_point;
398 PyObject *thousands_sep;
399 const char *grouping;
400 char *grouping_buffer;
401 } LocaleInfo;
402
403 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
404
405 /* describes the layout for an integer, see the comment in
406 calc_number_widths() for details */
407 typedef struct {
408 Py_ssize_t n_lpadding;
409 Py_ssize_t n_prefix;
410 Py_ssize_t n_spadding;
411 Py_ssize_t n_rpadding;
412 char sign;
413 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
414 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
415 any grouping chars. */
416 Py_ssize_t n_decimal; /* 0 if only an integer */
417 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
418 excluding the decimal itself, if
419 present. */
420
421 /* These 2 are not the widths of fields, but are needed by
422 STRINGLIB_GROUPING. */
423 Py_ssize_t n_digits; /* The number of digits before a decimal
424 or exponent. */
425 Py_ssize_t n_min_width; /* The min_width we used when we computed
426 the n_grouped_digits width. */
427 } NumberFieldWidths;
428
429
430 /* Given a number of the form:
431 digits[remainder]
432 where ptr points to the start and end points to the end, find where
433 the integer part ends. This could be a decimal, an exponent, both,
434 or neither.
435 If a decimal point is present, set *has_decimal and increment
436 remainder beyond it.
437 Results are undefined (but shouldn't crash) for improperly
438 formatted strings.
439 */
440 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)441 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
442 Py_ssize_t *n_remainder, int *has_decimal)
443 {
444 Py_ssize_t remainder;
445 int kind = PyUnicode_KIND(s);
446 void *data = PyUnicode_DATA(s);
447
448 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
449 ++pos;
450 remainder = pos;
451
452 /* Does remainder start with a decimal point? */
453 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
454
455 /* Skip the decimal point. */
456 if (*has_decimal)
457 remainder++;
458
459 *n_remainder = end - remainder;
460 }
461
462 /* not all fields of format are used. for example, precision is
463 unused. should this take discrete params in order to be more clear
464 about what it does? or is passing a single format parameter easier
465 and more efficient enough to justify a little obfuscation?
466 Return -1 on error. */
467 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,PyObject * number,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)468 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
469 Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
470 Py_ssize_t n_end, Py_ssize_t n_remainder,
471 int has_decimal, const LocaleInfo *locale,
472 const InternalFormatSpec *format, Py_UCS4 *maxchar)
473 {
474 Py_ssize_t n_non_digit_non_padding;
475 Py_ssize_t n_padding;
476
477 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
478 spec->n_lpadding = 0;
479 spec->n_prefix = n_prefix;
480 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
481 spec->n_remainder = n_remainder;
482 spec->n_spadding = 0;
483 spec->n_rpadding = 0;
484 spec->sign = '\0';
485 spec->n_sign = 0;
486
487 /* the output will look like:
488 | |
489 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
490 | |
491
492 sign is computed from format->sign and the actual
493 sign of the number
494
495 prefix is given (it's for the '0x' prefix)
496
497 digits is already known
498
499 the total width is either given, or computed from the
500 actual digits
501
502 only one of lpadding, spadding, and rpadding can be non-zero,
503 and it's calculated from the width and other fields
504 */
505
506 /* compute the various parts we're going to write */
507 switch (format->sign) {
508 case '+':
509 /* always put a + or - */
510 spec->n_sign = 1;
511 spec->sign = (sign_char == '-' ? '-' : '+');
512 break;
513 case ' ':
514 spec->n_sign = 1;
515 spec->sign = (sign_char == '-' ? '-' : ' ');
516 break;
517 default:
518 /* Not specified, or the default (-) */
519 if (sign_char == '-') {
520 spec->n_sign = 1;
521 spec->sign = '-';
522 }
523 }
524
525 /* The number of chars used for non-digits and non-padding. */
526 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
527 spec->n_remainder;
528
529 /* min_width can go negative, that's okay. format->width == -1 means
530 we don't care. */
531 if (format->fill_char == '0' && format->align == '=')
532 spec->n_min_width = format->width - n_non_digit_non_padding;
533 else
534 spec->n_min_width = 0;
535
536 if (spec->n_digits == 0)
537 /* This case only occurs when using 'c' formatting, we need
538 to special case it because the grouping code always wants
539 to have at least one character. */
540 spec->n_grouped_digits = 0;
541 else {
542 Py_UCS4 grouping_maxchar;
543 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
544 NULL, 0,
545 NULL, 0, spec->n_digits,
546 spec->n_min_width,
547 locale->grouping, locale->thousands_sep, &grouping_maxchar);
548 if (spec->n_grouped_digits == -1) {
549 return -1;
550 }
551 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
552 }
553
554 /* Given the desired width and the total of digit and non-digit
555 space we consume, see if we need any padding. format->width can
556 be negative (meaning no padding), but this code still works in
557 that case. */
558 n_padding = format->width -
559 (n_non_digit_non_padding + spec->n_grouped_digits);
560 if (n_padding > 0) {
561 /* Some padding is needed. Determine if it's left, space, or right. */
562 switch (format->align) {
563 case '<':
564 spec->n_rpadding = n_padding;
565 break;
566 case '^':
567 spec->n_lpadding = n_padding / 2;
568 spec->n_rpadding = n_padding - spec->n_lpadding;
569 break;
570 case '=':
571 spec->n_spadding = n_padding;
572 break;
573 case '>':
574 spec->n_lpadding = n_padding;
575 break;
576 default:
577 /* Shouldn't get here, but treat it as '>' */
578 Py_UNREACHABLE();
579 }
580 }
581
582 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
583 *maxchar = Py_MAX(*maxchar, format->fill_char);
584
585 if (spec->n_decimal)
586 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
587
588 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
589 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
590 spec->n_remainder + spec->n_rpadding;
591 }
592
593 /* Fill in the digit parts of a number's string representation,
594 as determined in calc_number_widths().
595 Return -1 on error, or 0 on success. */
596 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,Py_ssize_t d_end,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)597 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
598 PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end,
599 PyObject *prefix, Py_ssize_t p_start,
600 Py_UCS4 fill_char,
601 LocaleInfo *locale, int toupper)
602 {
603 /* Used to keep track of digits, decimal, and remainder. */
604 Py_ssize_t d_pos = d_start;
605 const unsigned int kind = writer->kind;
606 const void *data = writer->data;
607 Py_ssize_t r;
608
609 if (spec->n_lpadding) {
610 _PyUnicode_FastFill(writer->buffer,
611 writer->pos, spec->n_lpadding, fill_char);
612 writer->pos += spec->n_lpadding;
613 }
614 if (spec->n_sign == 1) {
615 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
616 writer->pos++;
617 }
618 if (spec->n_prefix) {
619 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
620 prefix, p_start,
621 spec->n_prefix);
622 if (toupper) {
623 Py_ssize_t t;
624 for (t = 0; t < spec->n_prefix; t++) {
625 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
626 c = Py_TOUPPER(c);
627 assert (c <= 127);
628 PyUnicode_WRITE(kind, data, writer->pos + t, c);
629 }
630 }
631 writer->pos += spec->n_prefix;
632 }
633 if (spec->n_spadding) {
634 _PyUnicode_FastFill(writer->buffer,
635 writer->pos, spec->n_spadding, fill_char);
636 writer->pos += spec->n_spadding;
637 }
638
639 /* Only for type 'c' special case, it has no digits. */
640 if (spec->n_digits != 0) {
641 /* Fill the digits with InsertThousandsGrouping. */
642 r = _PyUnicode_InsertThousandsGrouping(
643 writer, spec->n_grouped_digits,
644 digits, d_pos, spec->n_digits,
645 spec->n_min_width,
646 locale->grouping, locale->thousands_sep, NULL);
647 if (r == -1)
648 return -1;
649 assert(r == spec->n_grouped_digits);
650 d_pos += spec->n_digits;
651 }
652 if (toupper) {
653 Py_ssize_t t;
654 for (t = 0; t < spec->n_grouped_digits; t++) {
655 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
656 c = Py_TOUPPER(c);
657 if (c > 127) {
658 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
659 return -1;
660 }
661 PyUnicode_WRITE(kind, data, writer->pos + t, c);
662 }
663 }
664 writer->pos += spec->n_grouped_digits;
665
666 if (spec->n_decimal) {
667 _PyUnicode_FastCopyCharacters(
668 writer->buffer, writer->pos,
669 locale->decimal_point, 0, spec->n_decimal);
670 writer->pos += spec->n_decimal;
671 d_pos += 1;
672 }
673
674 if (spec->n_remainder) {
675 _PyUnicode_FastCopyCharacters(
676 writer->buffer, writer->pos,
677 digits, d_pos, spec->n_remainder);
678 writer->pos += spec->n_remainder;
679 /* d_pos += spec->n_remainder; */
680 }
681
682 if (spec->n_rpadding) {
683 _PyUnicode_FastFill(writer->buffer,
684 writer->pos, spec->n_rpadding,
685 fill_char);
686 writer->pos += spec->n_rpadding;
687 }
688 return 0;
689 }
690
691 static const char no_grouping[1] = {CHAR_MAX};
692
693 /* Find the decimal point character(s?), thousands_separator(s?), and
694 grouping description, either for the current locale if type is
695 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
696 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
697 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)698 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
699 {
700 switch (type) {
701 case LT_CURRENT_LOCALE: {
702 struct lconv *lc = localeconv();
703 if (_Py_GetLocaleconvNumeric(lc,
704 &locale_info->decimal_point,
705 &locale_info->thousands_sep) < 0) {
706 return -1;
707 }
708
709 /* localeconv() grouping can become a dangling pointer or point
710 to a different string if another thread calls localeconv() during
711 the string formatting. Copy the string to avoid this risk. */
712 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
713 if (locale_info->grouping_buffer == NULL) {
714 PyErr_NoMemory();
715 return -1;
716 }
717 locale_info->grouping = locale_info->grouping_buffer;
718 break;
719 }
720 case LT_DEFAULT_LOCALE:
721 case LT_UNDERSCORE_LOCALE:
722 case LT_UNDER_FOUR_LOCALE:
723 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
724 locale_info->thousands_sep = PyUnicode_FromOrdinal(
725 type == LT_DEFAULT_LOCALE ? ',' : '_');
726 if (!locale_info->decimal_point || !locale_info->thousands_sep)
727 return -1;
728 if (type != LT_UNDER_FOUR_LOCALE)
729 locale_info->grouping = "\3"; /* Group every 3 characters. The
730 (implicit) trailing 0 means repeat
731 infinitely. */
732 else
733 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
734 break;
735 case LT_NO_LOCALE:
736 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
737 locale_info->thousands_sep = PyUnicode_New(0, 0);
738 if (!locale_info->decimal_point || !locale_info->thousands_sep)
739 return -1;
740 locale_info->grouping = no_grouping;
741 break;
742 }
743 return 0;
744 }
745
746 static void
free_locale_info(LocaleInfo * locale_info)747 free_locale_info(LocaleInfo *locale_info)
748 {
749 Py_XDECREF(locale_info->decimal_point);
750 Py_XDECREF(locale_info->thousands_sep);
751 PyMem_Free(locale_info->grouping_buffer);
752 }
753
754 /************************************************************************/
755 /*********** string formatting ******************************************/
756 /************************************************************************/
757
758 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)759 format_string_internal(PyObject *value, const InternalFormatSpec *format,
760 _PyUnicodeWriter *writer)
761 {
762 Py_ssize_t lpad;
763 Py_ssize_t rpad;
764 Py_ssize_t total;
765 Py_ssize_t len;
766 int result = -1;
767 Py_UCS4 maxchar;
768
769 assert(PyUnicode_IS_READY(value));
770 len = PyUnicode_GET_LENGTH(value);
771
772 /* sign is not allowed on strings */
773 if (format->sign != '\0') {
774 PyErr_SetString(PyExc_ValueError,
775 "Sign not allowed in string format specifier");
776 goto done;
777 }
778
779 /* alternate is not allowed on strings */
780 if (format->alternate) {
781 PyErr_SetString(PyExc_ValueError,
782 "Alternate form (#) not allowed in string format "
783 "specifier");
784 goto done;
785 }
786
787 /* '=' alignment not allowed on strings */
788 if (format->align == '=') {
789 PyErr_SetString(PyExc_ValueError,
790 "'=' alignment not allowed "
791 "in string format specifier");
792 goto done;
793 }
794
795 if ((format->width == -1 || format->width <= len)
796 && (format->precision == -1 || format->precision >= len)) {
797 /* Fast path */
798 return _PyUnicodeWriter_WriteStr(writer, value);
799 }
800
801 /* if precision is specified, output no more that format.precision
802 characters */
803 if (format->precision >= 0 && len >= format->precision) {
804 len = format->precision;
805 }
806
807 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
808
809 maxchar = writer->maxchar;
810 if (lpad != 0 || rpad != 0)
811 maxchar = Py_MAX(maxchar, format->fill_char);
812 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
813 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
814 maxchar = Py_MAX(maxchar, valmaxchar);
815 }
816
817 /* allocate the resulting string */
818 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
819 goto done;
820
821 /* Write into that space. First the padding. */
822 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
823 if (result == -1)
824 goto done;
825
826 /* Then the source string. */
827 if (len) {
828 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
829 value, 0, len);
830 }
831 writer->pos += (len + rpad);
832 result = 0;
833
834 done:
835 return result;
836 }
837
838
839 /************************************************************************/
840 /*********** long formatting ********************************************/
841 /************************************************************************/
842
843 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)844 format_long_internal(PyObject *value, const InternalFormatSpec *format,
845 _PyUnicodeWriter *writer)
846 {
847 int result = -1;
848 Py_UCS4 maxchar = 127;
849 PyObject *tmp = NULL;
850 Py_ssize_t inumeric_chars;
851 Py_UCS4 sign_char = '\0';
852 Py_ssize_t n_digits; /* count of digits need from the computed
853 string */
854 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
855 produces non-digits */
856 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
857 Py_ssize_t n_total;
858 Py_ssize_t prefix = 0;
859 NumberFieldWidths spec;
860 long x;
861
862 /* Locale settings, either from the actual locale or
863 from a hard-code pseudo-locale */
864 LocaleInfo locale = LocaleInfo_STATIC_INIT;
865
866 /* no precision allowed on integers */
867 if (format->precision != -1) {
868 PyErr_SetString(PyExc_ValueError,
869 "Precision not allowed in integer format specifier");
870 goto done;
871 }
872
873 /* special case for character formatting */
874 if (format->type == 'c') {
875 /* error to specify a sign */
876 if (format->sign != '\0') {
877 PyErr_SetString(PyExc_ValueError,
878 "Sign not allowed with integer"
879 " format specifier 'c'");
880 goto done;
881 }
882 /* error to request alternate format */
883 if (format->alternate) {
884 PyErr_SetString(PyExc_ValueError,
885 "Alternate form (#) not allowed with integer"
886 " format specifier 'c'");
887 goto done;
888 }
889
890 /* taken from unicodeobject.c formatchar() */
891 /* Integer input truncated to a character */
892 x = PyLong_AsLong(value);
893 if (x == -1 && PyErr_Occurred())
894 goto done;
895 if (x < 0 || x > 0x10ffff) {
896 PyErr_SetString(PyExc_OverflowError,
897 "%c arg not in range(0x110000)");
898 goto done;
899 }
900 tmp = PyUnicode_FromOrdinal(x);
901 inumeric_chars = 0;
902 n_digits = 1;
903 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
904
905 /* As a sort-of hack, we tell calc_number_widths that we only
906 have "remainder" characters. calc_number_widths thinks
907 these are characters that don't get formatted, only copied
908 into the output string. We do this for 'c' formatting,
909 because the characters are likely to be non-digits. */
910 n_remainder = 1;
911 }
912 else {
913 int base;
914 int leading_chars_to_skip = 0; /* Number of characters added by
915 PyNumber_ToBase that we want to
916 skip over. */
917
918 /* Compute the base and how many characters will be added by
919 PyNumber_ToBase */
920 switch (format->type) {
921 case 'b':
922 base = 2;
923 leading_chars_to_skip = 2; /* 0b */
924 break;
925 case 'o':
926 base = 8;
927 leading_chars_to_skip = 2; /* 0o */
928 break;
929 case 'x':
930 case 'X':
931 base = 16;
932 leading_chars_to_skip = 2; /* 0x */
933 break;
934 default: /* shouldn't be needed, but stops a compiler warning */
935 case 'd':
936 case 'n':
937 base = 10;
938 break;
939 }
940
941 if (format->sign != '+' && format->sign != ' '
942 && format->width == -1
943 && format->type != 'X' && format->type != 'n'
944 && !format->thousands_separators
945 && PyLong_CheckExact(value))
946 {
947 /* Fast path */
948 return _PyLong_FormatWriter(writer, value, base, format->alternate);
949 }
950
951 /* The number of prefix chars is the same as the leading
952 chars to skip */
953 if (format->alternate)
954 n_prefix = leading_chars_to_skip;
955
956 /* Do the hard part, converting to a string in a given base */
957 tmp = _PyLong_Format(value, base);
958 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
959 goto done;
960
961 inumeric_chars = 0;
962 n_digits = PyUnicode_GET_LENGTH(tmp);
963
964 prefix = inumeric_chars;
965
966 /* Is a sign character present in the output? If so, remember it
967 and skip it */
968 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
969 sign_char = '-';
970 ++prefix;
971 ++leading_chars_to_skip;
972 }
973
974 /* Skip over the leading chars (0x, 0b, etc.) */
975 n_digits -= leading_chars_to_skip;
976 inumeric_chars += leading_chars_to_skip;
977 }
978
979 /* Determine the grouping, separator, and decimal point, if any. */
980 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
981 format->thousands_separators,
982 &locale) == -1)
983 goto done;
984
985 /* Calculate how much memory we'll need. */
986 n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
987 inumeric_chars + n_digits, n_remainder, 0,
988 &locale, format, &maxchar);
989 if (n_total == -1) {
990 goto done;
991 }
992
993 /* Allocate the memory. */
994 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
995 goto done;
996
997 /* Populate the memory. */
998 result = fill_number(writer, &spec,
999 tmp, inumeric_chars, inumeric_chars + n_digits,
1000 tmp, prefix, format->fill_char,
1001 &locale, format->type == 'X');
1002
1003 done:
1004 Py_XDECREF(tmp);
1005 free_locale_info(&locale);
1006 return result;
1007 }
1008
1009 /************************************************************************/
1010 /*********** float formatting *******************************************/
1011 /************************************************************************/
1012
1013 /* much of this is taken from unicodeobject.c */
1014 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1015 format_float_internal(PyObject *value,
1016 const InternalFormatSpec *format,
1017 _PyUnicodeWriter *writer)
1018 {
1019 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1020 Py_ssize_t n_digits;
1021 Py_ssize_t n_remainder;
1022 Py_ssize_t n_total;
1023 int has_decimal;
1024 double val;
1025 int precision, default_precision = 6;
1026 Py_UCS4 type = format->type;
1027 int add_pct = 0;
1028 Py_ssize_t index;
1029 NumberFieldWidths spec;
1030 int flags = 0;
1031 int result = -1;
1032 Py_UCS4 maxchar = 127;
1033 Py_UCS4 sign_char = '\0';
1034 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1035 PyObject *unicode_tmp = NULL;
1036
1037 /* Locale settings, either from the actual locale or
1038 from a hard-code pseudo-locale */
1039 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1040
1041 if (format->precision > INT_MAX) {
1042 PyErr_SetString(PyExc_ValueError, "precision too big");
1043 goto done;
1044 }
1045 precision = (int)format->precision;
1046
1047 if (format->alternate)
1048 flags |= Py_DTSF_ALT;
1049
1050 if (type == '\0') {
1051 /* Omitted type specifier. Behaves in the same way as repr(x)
1052 and str(x) if no precision is given, else like 'g', but with
1053 at least one digit after the decimal point. */
1054 flags |= Py_DTSF_ADD_DOT_0;
1055 type = 'r';
1056 default_precision = 0;
1057 }
1058
1059 if (type == 'n')
1060 /* 'n' is the same as 'g', except for the locale used to
1061 format the result. We take care of that later. */
1062 type = 'g';
1063
1064 val = PyFloat_AsDouble(value);
1065 if (val == -1.0 && PyErr_Occurred())
1066 goto done;
1067
1068 if (type == '%') {
1069 type = 'f';
1070 val *= 100;
1071 add_pct = 1;
1072 }
1073
1074 if (precision < 0)
1075 precision = default_precision;
1076 else if (type == 'r')
1077 type = 'g';
1078
1079 /* Cast "type", because if we're in unicode we need to pass an
1080 8-bit char. This is safe, because we've restricted what "type"
1081 can be. */
1082 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1083 &float_type);
1084 if (buf == NULL)
1085 goto done;
1086 n_digits = strlen(buf);
1087
1088 if (add_pct) {
1089 /* We know that buf has a trailing zero (since we just called
1090 strlen() on it), and we don't use that fact any more. So we
1091 can just write over the trailing zero. */
1092 buf[n_digits] = '%';
1093 n_digits += 1;
1094 }
1095
1096 if (format->sign != '+' && format->sign != ' '
1097 && format->width == -1
1098 && format->type != 'n'
1099 && !format->thousands_separators)
1100 {
1101 /* Fast path */
1102 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1103 PyMem_Free(buf);
1104 return result;
1105 }
1106
1107 /* Since there is no unicode version of PyOS_double_to_string,
1108 just use the 8 bit version and then convert to unicode. */
1109 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1110 PyMem_Free(buf);
1111 if (unicode_tmp == NULL)
1112 goto done;
1113
1114 /* Is a sign character present in the output? If so, remember it
1115 and skip it */
1116 index = 0;
1117 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1118 sign_char = '-';
1119 ++index;
1120 --n_digits;
1121 }
1122
1123 /* Determine if we have any "remainder" (after the digits, might include
1124 decimal or exponent or both (or neither)) */
1125 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1126
1127 /* Determine the grouping, separator, and decimal point, if any. */
1128 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1129 format->thousands_separators,
1130 &locale) == -1)
1131 goto done;
1132
1133 /* Calculate how much memory we'll need. */
1134 n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
1135 index + n_digits, n_remainder, has_decimal,
1136 &locale, format, &maxchar);
1137 if (n_total == -1) {
1138 goto done;
1139 }
1140
1141 /* Allocate the memory. */
1142 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1143 goto done;
1144
1145 /* Populate the memory. */
1146 result = fill_number(writer, &spec,
1147 unicode_tmp, index, index + n_digits,
1148 NULL, 0, format->fill_char,
1149 &locale, 0);
1150
1151 done:
1152 Py_XDECREF(unicode_tmp);
1153 free_locale_info(&locale);
1154 return result;
1155 }
1156
1157 /************************************************************************/
1158 /*********** complex formatting *****************************************/
1159 /************************************************************************/
1160
1161 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1162 format_complex_internal(PyObject *value,
1163 const InternalFormatSpec *format,
1164 _PyUnicodeWriter *writer)
1165 {
1166 double re;
1167 double im;
1168 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1169 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1170
1171 InternalFormatSpec tmp_format = *format;
1172 Py_ssize_t n_re_digits;
1173 Py_ssize_t n_im_digits;
1174 Py_ssize_t n_re_remainder;
1175 Py_ssize_t n_im_remainder;
1176 Py_ssize_t n_re_total;
1177 Py_ssize_t n_im_total;
1178 int re_has_decimal;
1179 int im_has_decimal;
1180 int precision, default_precision = 6;
1181 Py_UCS4 type = format->type;
1182 Py_ssize_t i_re;
1183 Py_ssize_t i_im;
1184 NumberFieldWidths re_spec;
1185 NumberFieldWidths im_spec;
1186 int flags = 0;
1187 int result = -1;
1188 Py_UCS4 maxchar = 127;
1189 enum PyUnicode_Kind rkind;
1190 void *rdata;
1191 Py_UCS4 re_sign_char = '\0';
1192 Py_UCS4 im_sign_char = '\0';
1193 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1194 int im_float_type;
1195 int add_parens = 0;
1196 int skip_re = 0;
1197 Py_ssize_t lpad;
1198 Py_ssize_t rpad;
1199 Py_ssize_t total;
1200 PyObject *re_unicode_tmp = NULL;
1201 PyObject *im_unicode_tmp = NULL;
1202
1203 /* Locale settings, either from the actual locale or
1204 from a hard-code pseudo-locale */
1205 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1206
1207 if (format->precision > INT_MAX) {
1208 PyErr_SetString(PyExc_ValueError, "precision too big");
1209 goto done;
1210 }
1211 precision = (int)format->precision;
1212
1213 /* Zero padding is not allowed. */
1214 if (format->fill_char == '0') {
1215 PyErr_SetString(PyExc_ValueError,
1216 "Zero padding is not allowed in complex format "
1217 "specifier");
1218 goto done;
1219 }
1220
1221 /* Neither is '=' alignment . */
1222 if (format->align == '=') {
1223 PyErr_SetString(PyExc_ValueError,
1224 "'=' alignment flag is not allowed in complex format "
1225 "specifier");
1226 goto done;
1227 }
1228
1229 re = PyComplex_RealAsDouble(value);
1230 if (re == -1.0 && PyErr_Occurred())
1231 goto done;
1232 im = PyComplex_ImagAsDouble(value);
1233 if (im == -1.0 && PyErr_Occurred())
1234 goto done;
1235
1236 if (format->alternate)
1237 flags |= Py_DTSF_ALT;
1238
1239 if (type == '\0') {
1240 /* Omitted type specifier. Should be like str(self). */
1241 type = 'r';
1242 default_precision = 0;
1243 if (re == 0.0 && copysign(1.0, re) == 1.0)
1244 skip_re = 1;
1245 else
1246 add_parens = 1;
1247 }
1248
1249 if (type == 'n')
1250 /* 'n' is the same as 'g', except for the locale used to
1251 format the result. We take care of that later. */
1252 type = 'g';
1253
1254 if (precision < 0)
1255 precision = default_precision;
1256 else if (type == 'r')
1257 type = 'g';
1258
1259 /* Cast "type", because if we're in unicode we need to pass an
1260 8-bit char. This is safe, because we've restricted what "type"
1261 can be. */
1262 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1263 &re_float_type);
1264 if (re_buf == NULL)
1265 goto done;
1266 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1267 &im_float_type);
1268 if (im_buf == NULL)
1269 goto done;
1270
1271 n_re_digits = strlen(re_buf);
1272 n_im_digits = strlen(im_buf);
1273
1274 /* Since there is no unicode version of PyOS_double_to_string,
1275 just use the 8 bit version and then convert to unicode. */
1276 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1277 if (re_unicode_tmp == NULL)
1278 goto done;
1279 i_re = 0;
1280
1281 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1282 if (im_unicode_tmp == NULL)
1283 goto done;
1284 i_im = 0;
1285
1286 /* Is a sign character present in the output? If so, remember it
1287 and skip it */
1288 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1289 re_sign_char = '-';
1290 ++i_re;
1291 --n_re_digits;
1292 }
1293 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1294 im_sign_char = '-';
1295 ++i_im;
1296 --n_im_digits;
1297 }
1298
1299 /* Determine if we have any "remainder" (after the digits, might include
1300 decimal or exponent or both (or neither)) */
1301 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1302 &n_re_remainder, &re_has_decimal);
1303 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1304 &n_im_remainder, &im_has_decimal);
1305
1306 /* Determine the grouping, separator, and decimal point, if any. */
1307 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1308 format->thousands_separators,
1309 &locale) == -1)
1310 goto done;
1311
1312 /* Turn off any padding. We'll do it later after we've composed
1313 the numbers without padding. */
1314 tmp_format.fill_char = '\0';
1315 tmp_format.align = '<';
1316 tmp_format.width = -1;
1317
1318 /* Calculate how much memory we'll need. */
1319 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp,
1320 i_re, i_re + n_re_digits, n_re_remainder,
1321 re_has_decimal, &locale, &tmp_format,
1322 &maxchar);
1323 if (n_re_total == -1) {
1324 goto done;
1325 }
1326
1327 /* Same formatting, but always include a sign, unless the real part is
1328 * going to be omitted, in which case we use whatever sign convention was
1329 * requested by the original format. */
1330 if (!skip_re)
1331 tmp_format.sign = '+';
1332 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp,
1333 i_im, i_im + n_im_digits, n_im_remainder,
1334 im_has_decimal, &locale, &tmp_format,
1335 &maxchar);
1336 if (n_im_total == -1) {
1337 goto done;
1338 }
1339
1340 if (skip_re)
1341 n_re_total = 0;
1342
1343 /* Add 1 for the 'j', and optionally 2 for parens. */
1344 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1345 format->width, format->align, &lpad, &rpad, &total);
1346
1347 if (lpad || rpad)
1348 maxchar = Py_MAX(maxchar, format->fill_char);
1349
1350 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1351 goto done;
1352 rkind = writer->kind;
1353 rdata = writer->data;
1354
1355 /* Populate the memory. First, the padding. */
1356 result = fill_padding(writer,
1357 n_re_total + n_im_total + 1 + add_parens * 2,
1358 format->fill_char, lpad, rpad);
1359 if (result == -1)
1360 goto done;
1361
1362 if (add_parens) {
1363 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1364 writer->pos++;
1365 }
1366
1367 if (!skip_re) {
1368 result = fill_number(writer, &re_spec,
1369 re_unicode_tmp, i_re, i_re + n_re_digits,
1370 NULL, 0,
1371 0,
1372 &locale, 0);
1373 if (result == -1)
1374 goto done;
1375 }
1376 result = fill_number(writer, &im_spec,
1377 im_unicode_tmp, i_im, i_im + n_im_digits,
1378 NULL, 0,
1379 0,
1380 &locale, 0);
1381 if (result == -1)
1382 goto done;
1383 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1384 writer->pos++;
1385
1386 if (add_parens) {
1387 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1388 writer->pos++;
1389 }
1390
1391 writer->pos += rpad;
1392
1393 done:
1394 PyMem_Free(re_buf);
1395 PyMem_Free(im_buf);
1396 Py_XDECREF(re_unicode_tmp);
1397 Py_XDECREF(im_unicode_tmp);
1398 free_locale_info(&locale);
1399 return result;
1400 }
1401
1402 /************************************************************************/
1403 /*********** built in formatters ****************************************/
1404 /************************************************************************/
1405 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1406 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1407 {
1408 PyObject *str;
1409 int err;
1410
1411 str = PyObject_Str(obj);
1412 if (str == NULL)
1413 return -1;
1414 err = _PyUnicodeWriter_WriteStr(writer, str);
1415 Py_DECREF(str);
1416 return err;
1417 }
1418
1419 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1420 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1421 PyObject *obj,
1422 PyObject *format_spec,
1423 Py_ssize_t start, Py_ssize_t end)
1424 {
1425 InternalFormatSpec format;
1426
1427 assert(PyUnicode_Check(obj));
1428
1429 /* check for the special case of zero length format spec, make
1430 it equivalent to str(obj) */
1431 if (start == end) {
1432 if (PyUnicode_CheckExact(obj))
1433 return _PyUnicodeWriter_WriteStr(writer, obj);
1434 else
1435 return format_obj(obj, writer);
1436 }
1437
1438 /* parse the format_spec */
1439 if (!parse_internal_render_format_spec(format_spec, start, end,
1440 &format, 's', '<'))
1441 return -1;
1442
1443 /* type conversion? */
1444 switch (format.type) {
1445 case 's':
1446 /* no type conversion needed, already a string. do the formatting */
1447 return format_string_internal(obj, &format, writer);
1448 default:
1449 /* unknown */
1450 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1451 return -1;
1452 }
1453 }
1454
1455 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1456 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1457 PyObject *obj,
1458 PyObject *format_spec,
1459 Py_ssize_t start, Py_ssize_t end)
1460 {
1461 PyObject *tmp = NULL, *str = NULL;
1462 InternalFormatSpec format;
1463 int result = -1;
1464
1465 /* check for the special case of zero length format spec, make
1466 it equivalent to str(obj) */
1467 if (start == end) {
1468 if (PyLong_CheckExact(obj))
1469 return _PyLong_FormatWriter(writer, obj, 10, 0);
1470 else
1471 return format_obj(obj, writer);
1472 }
1473
1474 /* parse the format_spec */
1475 if (!parse_internal_render_format_spec(format_spec, start, end,
1476 &format, 'd', '>'))
1477 goto done;
1478
1479 /* type conversion? */
1480 switch (format.type) {
1481 case 'b':
1482 case 'c':
1483 case 'd':
1484 case 'o':
1485 case 'x':
1486 case 'X':
1487 case 'n':
1488 /* no type conversion needed, already an int. do the formatting */
1489 result = format_long_internal(obj, &format, writer);
1490 break;
1491
1492 case 'e':
1493 case 'E':
1494 case 'f':
1495 case 'F':
1496 case 'g':
1497 case 'G':
1498 case '%':
1499 /* convert to float */
1500 tmp = PyNumber_Float(obj);
1501 if (tmp == NULL)
1502 goto done;
1503 result = format_float_internal(tmp, &format, writer);
1504 break;
1505
1506 default:
1507 /* unknown */
1508 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1509 goto done;
1510 }
1511
1512 done:
1513 Py_XDECREF(tmp);
1514 Py_XDECREF(str);
1515 return result;
1516 }
1517
1518 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1519 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1520 PyObject *obj,
1521 PyObject *format_spec,
1522 Py_ssize_t start, Py_ssize_t end)
1523 {
1524 InternalFormatSpec format;
1525
1526 /* check for the special case of zero length format spec, make
1527 it equivalent to str(obj) */
1528 if (start == end)
1529 return format_obj(obj, writer);
1530
1531 /* parse the format_spec */
1532 if (!parse_internal_render_format_spec(format_spec, start, end,
1533 &format, '\0', '>'))
1534 return -1;
1535
1536 /* type conversion? */
1537 switch (format.type) {
1538 case '\0': /* No format code: like 'g', but with at least one decimal. */
1539 case 'e':
1540 case 'E':
1541 case 'f':
1542 case 'F':
1543 case 'g':
1544 case 'G':
1545 case 'n':
1546 case '%':
1547 /* no conversion, already a float. do the formatting */
1548 return format_float_internal(obj, &format, writer);
1549
1550 default:
1551 /* unknown */
1552 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1553 return -1;
1554 }
1555 }
1556
1557 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1558 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1559 PyObject *obj,
1560 PyObject *format_spec,
1561 Py_ssize_t start, Py_ssize_t end)
1562 {
1563 InternalFormatSpec format;
1564
1565 /* check for the special case of zero length format spec, make
1566 it equivalent to str(obj) */
1567 if (start == end)
1568 return format_obj(obj, writer);
1569
1570 /* parse the format_spec */
1571 if (!parse_internal_render_format_spec(format_spec, start, end,
1572 &format, '\0', '>'))
1573 return -1;
1574
1575 /* type conversion? */
1576 switch (format.type) {
1577 case '\0': /* No format code: like 'g', but with at least one decimal. */
1578 case 'e':
1579 case 'E':
1580 case 'f':
1581 case 'F':
1582 case 'g':
1583 case 'G':
1584 case 'n':
1585 /* no conversion, already a complex. do the formatting */
1586 return format_complex_internal(obj, &format, writer);
1587
1588 default:
1589 /* unknown */
1590 unknown_presentation_type(format.type, obj->ob_type->tp_name);
1591 return -1;
1592 }
1593 }
1594