1 /* implements the unicode (as opposed to string) version of the
2 built-in formatters for string, int, float. that is, the versions
3 of int.__float__, etc., that take and return unicode objects */
4
5 #include "Python.h"
6 #include "pycore_fileutils.h"
7 #include <locale.h>
8
9 /* Raises an exception about an unknown presentation type for this
10 * type. */
11
12 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)13 unknown_presentation_type(Py_UCS4 presentation_type,
14 const char* type_name)
15 {
16 /* %c might be out-of-range, hence the two cases. */
17 if (presentation_type > 32 && presentation_type < 128)
18 PyErr_Format(PyExc_ValueError,
19 "Unknown format code '%c' "
20 "for object of type '%.200s'",
21 (char)presentation_type,
22 type_name);
23 else
24 PyErr_Format(PyExc_ValueError,
25 "Unknown format code '\\x%x' "
26 "for object of type '%.200s'",
27 (unsigned int)presentation_type,
28 type_name);
29 }
30
31 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)32 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33 {
34 assert(specifier == ',' || specifier == '_');
35 if (presentation_type > 32 && presentation_type < 128)
36 PyErr_Format(PyExc_ValueError,
37 "Cannot specify '%c' with '%c'.",
38 specifier, (char)presentation_type);
39 else
40 PyErr_Format(PyExc_ValueError,
41 "Cannot specify '%c' with '\\x%x'.",
42 specifier, (unsigned int)presentation_type);
43 }
44
45 static void
invalid_comma_and_underscore(void)46 invalid_comma_and_underscore(void)
47 {
48 PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49 }
50
51 /*
52 get_integer consumes 0 or more decimal digit characters from an
53 input string, updates *result with the corresponding positive
54 integer, and returns the number of digits consumed.
55
56 returns -1 on error.
57 */
58 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)59 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60 Py_ssize_t *result)
61 {
62 Py_ssize_t accumulator, digitval, pos = *ppos;
63 int numdigits;
64 int kind = PyUnicode_KIND(str);
65 const void *data = PyUnicode_DATA(str);
66
67 accumulator = numdigits = 0;
68 for (; pos < end; pos++, numdigits++) {
69 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70 if (digitval < 0)
71 break;
72 /*
73 Detect possible overflow before it happens:
74
75 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77 */
78 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79 PyErr_Format(PyExc_ValueError,
80 "Too many decimal digits in format string");
81 *ppos = pos;
82 return -1;
83 }
84 accumulator = accumulator * 10 + digitval;
85 }
86 *ppos = pos;
87 *result = accumulator;
88 return numdigits;
89 }
90
91 /************************************************************************/
92 /*********** standard format specifier parsing **************************/
93 /************************************************************************/
94
95 /* returns true if this character is a specifier alignment token */
96 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)97 is_alignment_token(Py_UCS4 c)
98 {
99 switch (c) {
100 case '<': case '>': case '=': case '^':
101 return 1;
102 default:
103 return 0;
104 }
105 }
106
107 /* returns true if this character is a sign element */
108 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)109 is_sign_element(Py_UCS4 c)
110 {
111 switch (c) {
112 case ' ': case '+': case '-':
113 return 1;
114 default:
115 return 0;
116 }
117 }
118
119 /* Locale type codes. LT_NO_LOCALE must be zero. */
120 enum LocaleType {
121 LT_NO_LOCALE = 0,
122 LT_DEFAULT_LOCALE = ',',
123 LT_UNDERSCORE_LOCALE = '_',
124 LT_UNDER_FOUR_LOCALE,
125 LT_CURRENT_LOCALE
126 };
127
128 typedef struct {
129 Py_UCS4 fill_char;
130 Py_UCS4 align;
131 int alternate;
132 Py_UCS4 sign;
133 Py_ssize_t width;
134 enum LocaleType thousands_separators;
135 Py_ssize_t precision;
136 Py_UCS4 type;
137 } InternalFormatSpec;
138
139 #if 0
140 /* Occasionally useful for debugging. Should normally be commented out. */
141 static void
142 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143 {
144 printf("internal format spec: fill_char %d\n", format->fill_char);
145 printf("internal format spec: align %d\n", format->align);
146 printf("internal format spec: alternate %d\n", format->alternate);
147 printf("internal format spec: sign %d\n", format->sign);
148 printf("internal format spec: width %zd\n", format->width);
149 printf("internal format spec: thousands_separators %d\n",
150 format->thousands_separators);
151 printf("internal format spec: precision %zd\n", format->precision);
152 printf("internal format spec: type %c\n", format->type);
153 printf("\n");
154 }
155 #endif
156
157
158 /*
159 ptr points to the start of the format_spec, end points just past its end.
160 fills in format with the parsed information.
161 returns 1 on success, 0 on failure.
162 if failure, sets the exception
163 */
164 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)165 parse_internal_render_format_spec(PyObject *format_spec,
166 Py_ssize_t start, Py_ssize_t end,
167 InternalFormatSpec *format,
168 char default_type,
169 char default_align)
170 {
171 Py_ssize_t pos = start;
172 int kind = PyUnicode_KIND(format_spec);
173 const void *data = PyUnicode_DATA(format_spec);
174 /* end-pos is used throughout this code to specify the length of
175 the input string */
176 #define READ_spec(index) PyUnicode_READ(kind, data, index)
177
178 Py_ssize_t consumed;
179 int align_specified = 0;
180 int fill_char_specified = 0;
181
182 format->fill_char = ' ';
183 format->align = default_align;
184 format->alternate = 0;
185 format->sign = '\0';
186 format->width = -1;
187 format->thousands_separators = LT_NO_LOCALE;
188 format->precision = -1;
189 format->type = default_type;
190
191 /* If the second char is an alignment token,
192 then parse the fill char */
193 if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194 format->align = READ_spec(pos+1);
195 format->fill_char = READ_spec(pos);
196 fill_char_specified = 1;
197 align_specified = 1;
198 pos += 2;
199 }
200 else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201 format->align = READ_spec(pos);
202 align_specified = 1;
203 ++pos;
204 }
205
206 /* Parse the various sign options */
207 if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208 format->sign = READ_spec(pos);
209 ++pos;
210 }
211
212 /* If the next character is #, we're in alternate mode. This only
213 applies to integers. */
214 if (end-pos >= 1 && READ_spec(pos) == '#') {
215 format->alternate = 1;
216 ++pos;
217 }
218
219 /* The special case for 0-padding (backwards compat) */
220 if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221 format->fill_char = '0';
222 if (!align_specified && default_align == '>') {
223 format->align = '=';
224 }
225 ++pos;
226 }
227
228 consumed = get_integer(format_spec, &pos, end, &format->width);
229 if (consumed == -1)
230 /* Overflow error. Exception already set. */
231 return 0;
232
233 /* If consumed is 0, we didn't consume any characters for the
234 width. In that case, reset the width to -1, because
235 get_integer() will have set it to zero. -1 is how we record
236 that the width wasn't specified. */
237 if (consumed == 0)
238 format->width = -1;
239
240 /* Comma signifies add thousands separators */
241 if (end-pos && READ_spec(pos) == ',') {
242 format->thousands_separators = LT_DEFAULT_LOCALE;
243 ++pos;
244 }
245 /* Underscore signifies add thousands separators */
246 if (end-pos && READ_spec(pos) == '_') {
247 if (format->thousands_separators != LT_NO_LOCALE) {
248 invalid_comma_and_underscore();
249 return 0;
250 }
251 format->thousands_separators = LT_UNDERSCORE_LOCALE;
252 ++pos;
253 }
254 if (end-pos && READ_spec(pos) == ',') {
255 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
256 invalid_comma_and_underscore();
257 return 0;
258 }
259 }
260
261 /* Parse field precision */
262 if (end-pos && READ_spec(pos) == '.') {
263 ++pos;
264
265 consumed = get_integer(format_spec, &pos, end, &format->precision);
266 if (consumed == -1)
267 /* Overflow error. Exception already set. */
268 return 0;
269
270 /* Not having a precision after a dot is an error. */
271 if (consumed == 0) {
272 PyErr_Format(PyExc_ValueError,
273 "Format specifier missing precision");
274 return 0;
275 }
276
277 }
278
279 /* Finally, parse the type field. */
280
281 if (end-pos > 1) {
282 /* More than one char remain, invalid format specifier. */
283 PyErr_Format(PyExc_ValueError, "Invalid format specifier");
284 return 0;
285 }
286
287 if (end-pos == 1) {
288 format->type = READ_spec(pos);
289 ++pos;
290 }
291
292 /* Do as much validating as we can, just by looking at the format
293 specifier. Do not take into account what type of formatting
294 we're doing (int, float, string). */
295
296 if (format->thousands_separators) {
297 switch (format->type) {
298 case 'd':
299 case 'e':
300 case 'f':
301 case 'g':
302 case 'E':
303 case 'G':
304 case '%':
305 case 'F':
306 case '\0':
307 /* These are allowed. See PEP 378.*/
308 break;
309 case 'b':
310 case 'o':
311 case 'x':
312 case 'X':
313 /* Underscores are allowed in bin/oct/hex. See PEP 515. */
314 if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
315 /* Every four digits, not every three, in bin/oct/hex. */
316 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
317 break;
318 }
319 /* fall through */
320 default:
321 invalid_thousands_separator_type(format->thousands_separators, format->type);
322 return 0;
323 }
324 }
325
326 assert (format->align <= 127);
327 assert (format->sign <= 127);
328 return 1;
329 }
330
331 /* Calculate the padding needed. */
332 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)333 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
334 Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
335 Py_ssize_t *n_total)
336 {
337 if (width >= 0) {
338 if (nchars > width)
339 *n_total = nchars;
340 else
341 *n_total = width;
342 }
343 else {
344 /* not specified, use all of the chars and no more */
345 *n_total = nchars;
346 }
347
348 /* Figure out how much leading space we need, based on the
349 aligning */
350 if (align == '>')
351 *n_lpadding = *n_total - nchars;
352 else if (align == '^')
353 *n_lpadding = (*n_total - nchars) / 2;
354 else if (align == '<' || align == '=')
355 *n_lpadding = 0;
356 else {
357 /* We should never have an unspecified alignment. */
358 Py_UNREACHABLE();
359 }
360
361 *n_rpadding = *n_total - nchars - *n_lpadding;
362 }
363
364 /* Do the padding, and return a pointer to where the caller-supplied
365 content goes. */
366 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)367 fill_padding(_PyUnicodeWriter *writer,
368 Py_ssize_t nchars,
369 Py_UCS4 fill_char, Py_ssize_t n_lpadding,
370 Py_ssize_t n_rpadding)
371 {
372 Py_ssize_t pos;
373
374 /* Pad on left. */
375 if (n_lpadding) {
376 pos = writer->pos;
377 _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
378 }
379
380 /* Pad on right. */
381 if (n_rpadding) {
382 pos = writer->pos + nchars + n_lpadding;
383 _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
384 }
385
386 /* Pointer to the user content. */
387 writer->pos += n_lpadding;
388 return 0;
389 }
390
391 /************************************************************************/
392 /*********** common routines for numeric formatting *********************/
393 /************************************************************************/
394
395 /* Locale info needed for formatting integers and the part of floats
396 before and including the decimal. Note that locales only support
397 8-bit chars, not unicode. */
398 typedef struct {
399 PyObject *decimal_point;
400 PyObject *thousands_sep;
401 const char *grouping;
402 char *grouping_buffer;
403 } LocaleInfo;
404
405 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
406
407 /* describes the layout for an integer, see the comment in
408 calc_number_widths() for details */
409 typedef struct {
410 Py_ssize_t n_lpadding;
411 Py_ssize_t n_prefix;
412 Py_ssize_t n_spadding;
413 Py_ssize_t n_rpadding;
414 char sign;
415 Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
416 Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
417 any grouping chars. */
418 Py_ssize_t n_decimal; /* 0 if only an integer */
419 Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
420 excluding the decimal itself, if
421 present. */
422
423 /* These 2 are not the widths of fields, but are needed by
424 STRINGLIB_GROUPING. */
425 Py_ssize_t n_digits; /* The number of digits before a decimal
426 or exponent. */
427 Py_ssize_t n_min_width; /* The min_width we used when we computed
428 the n_grouped_digits width. */
429 } NumberFieldWidths;
430
431
432 /* Given a number of the form:
433 digits[remainder]
434 where ptr points to the start and end points to the end, find where
435 the integer part ends. This could be a decimal, an exponent, both,
436 or neither.
437 If a decimal point is present, set *has_decimal and increment
438 remainder beyond it.
439 Results are undefined (but shouldn't crash) for improperly
440 formatted strings.
441 */
442 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)443 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
444 Py_ssize_t *n_remainder, int *has_decimal)
445 {
446 Py_ssize_t remainder;
447 int kind = PyUnicode_KIND(s);
448 const void *data = PyUnicode_DATA(s);
449
450 while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
451 ++pos;
452 remainder = pos;
453
454 /* Does remainder start with a decimal point? */
455 *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
456
457 /* Skip the decimal point. */
458 if (*has_decimal)
459 remainder++;
460
461 *n_remainder = end - remainder;
462 }
463
464 /* not all fields of format are used. for example, precision is
465 unused. should this take discrete params in order to be more clear
466 about what it does? or is passing a single format parameter easier
467 and more efficient enough to justify a little obfuscation?
468 Return -1 on error. */
469 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)470 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
471 Py_UCS4 sign_char, Py_ssize_t n_start,
472 Py_ssize_t n_end, Py_ssize_t n_remainder,
473 int has_decimal, const LocaleInfo *locale,
474 const InternalFormatSpec *format, Py_UCS4 *maxchar)
475 {
476 Py_ssize_t n_non_digit_non_padding;
477 Py_ssize_t n_padding;
478
479 spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
480 spec->n_lpadding = 0;
481 spec->n_prefix = n_prefix;
482 spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
483 spec->n_remainder = n_remainder;
484 spec->n_spadding = 0;
485 spec->n_rpadding = 0;
486 spec->sign = '\0';
487 spec->n_sign = 0;
488
489 /* the output will look like:
490 | |
491 | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
492 | |
493
494 sign is computed from format->sign and the actual
495 sign of the number
496
497 prefix is given (it's for the '0x' prefix)
498
499 digits is already known
500
501 the total width is either given, or computed from the
502 actual digits
503
504 only one of lpadding, spadding, and rpadding can be non-zero,
505 and it's calculated from the width and other fields
506 */
507
508 /* compute the various parts we're going to write */
509 switch (format->sign) {
510 case '+':
511 /* always put a + or - */
512 spec->n_sign = 1;
513 spec->sign = (sign_char == '-' ? '-' : '+');
514 break;
515 case ' ':
516 spec->n_sign = 1;
517 spec->sign = (sign_char == '-' ? '-' : ' ');
518 break;
519 default:
520 /* Not specified, or the default (-) */
521 if (sign_char == '-') {
522 spec->n_sign = 1;
523 spec->sign = '-';
524 }
525 }
526
527 /* The number of chars used for non-digits and non-padding. */
528 n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
529 spec->n_remainder;
530
531 /* min_width can go negative, that's okay. format->width == -1 means
532 we don't care. */
533 if (format->fill_char == '0' && format->align == '=')
534 spec->n_min_width = format->width - n_non_digit_non_padding;
535 else
536 spec->n_min_width = 0;
537
538 if (spec->n_digits == 0)
539 /* This case only occurs when using 'c' formatting, we need
540 to special case it because the grouping code always wants
541 to have at least one character. */
542 spec->n_grouped_digits = 0;
543 else {
544 Py_UCS4 grouping_maxchar;
545 spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
546 NULL, 0,
547 NULL, 0, spec->n_digits,
548 spec->n_min_width,
549 locale->grouping, locale->thousands_sep, &grouping_maxchar);
550 if (spec->n_grouped_digits == -1) {
551 return -1;
552 }
553 *maxchar = Py_MAX(*maxchar, grouping_maxchar);
554 }
555
556 /* Given the desired width and the total of digit and non-digit
557 space we consume, see if we need any padding. format->width can
558 be negative (meaning no padding), but this code still works in
559 that case. */
560 n_padding = format->width -
561 (n_non_digit_non_padding + spec->n_grouped_digits);
562 if (n_padding > 0) {
563 /* Some padding is needed. Determine if it's left, space, or right. */
564 switch (format->align) {
565 case '<':
566 spec->n_rpadding = n_padding;
567 break;
568 case '^':
569 spec->n_lpadding = n_padding / 2;
570 spec->n_rpadding = n_padding - spec->n_lpadding;
571 break;
572 case '=':
573 spec->n_spadding = n_padding;
574 break;
575 case '>':
576 spec->n_lpadding = n_padding;
577 break;
578 default:
579 /* Shouldn't get here */
580 Py_UNREACHABLE();
581 }
582 }
583
584 if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
585 *maxchar = Py_MAX(*maxchar, format->fill_char);
586
587 if (spec->n_decimal)
588 *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
589
590 return spec->n_lpadding + spec->n_sign + spec->n_prefix +
591 spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
592 spec->n_remainder + spec->n_rpadding;
593 }
594
595 /* Fill in the digit parts of a number's string representation,
596 as determined in calc_number_widths().
597 Return -1 on error, or 0 on success. */
598 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)599 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
600 PyObject *digits, Py_ssize_t d_start,
601 PyObject *prefix, Py_ssize_t p_start,
602 Py_UCS4 fill_char,
603 LocaleInfo *locale, int toupper)
604 {
605 /* Used to keep track of digits, decimal, and remainder. */
606 Py_ssize_t d_pos = d_start;
607 const unsigned int kind = writer->kind;
608 const void *data = writer->data;
609 Py_ssize_t r;
610
611 if (spec->n_lpadding) {
612 _PyUnicode_FastFill(writer->buffer,
613 writer->pos, spec->n_lpadding, fill_char);
614 writer->pos += spec->n_lpadding;
615 }
616 if (spec->n_sign == 1) {
617 PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
618 writer->pos++;
619 }
620 if (spec->n_prefix) {
621 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
622 prefix, p_start,
623 spec->n_prefix);
624 if (toupper) {
625 Py_ssize_t t;
626 for (t = 0; t < spec->n_prefix; t++) {
627 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
628 c = Py_TOUPPER(c);
629 assert (c <= 127);
630 PyUnicode_WRITE(kind, data, writer->pos + t, c);
631 }
632 }
633 writer->pos += spec->n_prefix;
634 }
635 if (spec->n_spadding) {
636 _PyUnicode_FastFill(writer->buffer,
637 writer->pos, spec->n_spadding, fill_char);
638 writer->pos += spec->n_spadding;
639 }
640
641 /* Only for type 'c' special case, it has no digits. */
642 if (spec->n_digits != 0) {
643 /* Fill the digits with InsertThousandsGrouping. */
644 r = _PyUnicode_InsertThousandsGrouping(
645 writer, spec->n_grouped_digits,
646 digits, d_pos, spec->n_digits,
647 spec->n_min_width,
648 locale->grouping, locale->thousands_sep, NULL);
649 if (r == -1)
650 return -1;
651 assert(r == spec->n_grouped_digits);
652 d_pos += spec->n_digits;
653 }
654 if (toupper) {
655 Py_ssize_t t;
656 for (t = 0; t < spec->n_grouped_digits; t++) {
657 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
658 c = Py_TOUPPER(c);
659 if (c > 127) {
660 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
661 return -1;
662 }
663 PyUnicode_WRITE(kind, data, writer->pos + t, c);
664 }
665 }
666 writer->pos += spec->n_grouped_digits;
667
668 if (spec->n_decimal) {
669 _PyUnicode_FastCopyCharacters(
670 writer->buffer, writer->pos,
671 locale->decimal_point, 0, spec->n_decimal);
672 writer->pos += spec->n_decimal;
673 d_pos += 1;
674 }
675
676 if (spec->n_remainder) {
677 _PyUnicode_FastCopyCharacters(
678 writer->buffer, writer->pos,
679 digits, d_pos, spec->n_remainder);
680 writer->pos += spec->n_remainder;
681 /* d_pos += spec->n_remainder; */
682 }
683
684 if (spec->n_rpadding) {
685 _PyUnicode_FastFill(writer->buffer,
686 writer->pos, spec->n_rpadding,
687 fill_char);
688 writer->pos += spec->n_rpadding;
689 }
690 return 0;
691 }
692
693 static const char no_grouping[1] = {CHAR_MAX};
694
695 /* Find the decimal point character(s?), thousands_separator(s?), and
696 grouping description, either for the current locale if type is
697 LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
698 LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
699 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)700 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
701 {
702 switch (type) {
703 case LT_CURRENT_LOCALE: {
704 struct lconv *lc = localeconv();
705 if (_Py_GetLocaleconvNumeric(lc,
706 &locale_info->decimal_point,
707 &locale_info->thousands_sep) < 0) {
708 return -1;
709 }
710
711 /* localeconv() grouping can become a dangling pointer or point
712 to a different string if another thread calls localeconv() during
713 the string formatting. Copy the string to avoid this risk. */
714 locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
715 if (locale_info->grouping_buffer == NULL) {
716 PyErr_NoMemory();
717 return -1;
718 }
719 locale_info->grouping = locale_info->grouping_buffer;
720 break;
721 }
722 case LT_DEFAULT_LOCALE:
723 case LT_UNDERSCORE_LOCALE:
724 case LT_UNDER_FOUR_LOCALE:
725 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
726 locale_info->thousands_sep = PyUnicode_FromOrdinal(
727 type == LT_DEFAULT_LOCALE ? ',' : '_');
728 if (!locale_info->decimal_point || !locale_info->thousands_sep)
729 return -1;
730 if (type != LT_UNDER_FOUR_LOCALE)
731 locale_info->grouping = "\3"; /* Group every 3 characters. The
732 (implicit) trailing 0 means repeat
733 infinitely. */
734 else
735 locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
736 break;
737 case LT_NO_LOCALE:
738 locale_info->decimal_point = PyUnicode_FromOrdinal('.');
739 locale_info->thousands_sep = PyUnicode_New(0, 0);
740 if (!locale_info->decimal_point || !locale_info->thousands_sep)
741 return -1;
742 locale_info->grouping = no_grouping;
743 break;
744 }
745 return 0;
746 }
747
748 static void
free_locale_info(LocaleInfo * locale_info)749 free_locale_info(LocaleInfo *locale_info)
750 {
751 Py_XDECREF(locale_info->decimal_point);
752 Py_XDECREF(locale_info->thousands_sep);
753 PyMem_Free(locale_info->grouping_buffer);
754 }
755
756 /************************************************************************/
757 /*********** string formatting ******************************************/
758 /************************************************************************/
759
760 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
762 _PyUnicodeWriter *writer)
763 {
764 Py_ssize_t lpad;
765 Py_ssize_t rpad;
766 Py_ssize_t total;
767 Py_ssize_t len;
768 int result = -1;
769 Py_UCS4 maxchar;
770
771 assert(PyUnicode_IS_READY(value));
772 len = PyUnicode_GET_LENGTH(value);
773
774 /* sign is not allowed on strings */
775 if (format->sign != '\0') {
776 if (format->sign == ' ') {
777 PyErr_SetString(PyExc_ValueError,
778 "Space not allowed in string format specifier");
779 }
780 else {
781 PyErr_SetString(PyExc_ValueError,
782 "Sign not allowed in string format specifier");
783 }
784 goto done;
785 }
786
787 /* alternate is not allowed on strings */
788 if (format->alternate) {
789 PyErr_SetString(PyExc_ValueError,
790 "Alternate form (#) not allowed in string format "
791 "specifier");
792 goto done;
793 }
794
795 /* '=' alignment not allowed on strings */
796 if (format->align == '=') {
797 PyErr_SetString(PyExc_ValueError,
798 "'=' alignment not allowed "
799 "in string format specifier");
800 goto done;
801 }
802
803 if ((format->width == -1 || format->width <= len)
804 && (format->precision == -1 || format->precision >= len)) {
805 /* Fast path */
806 return _PyUnicodeWriter_WriteStr(writer, value);
807 }
808
809 /* if precision is specified, output no more that format.precision
810 characters */
811 if (format->precision >= 0 && len >= format->precision) {
812 len = format->precision;
813 }
814
815 calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
816
817 maxchar = writer->maxchar;
818 if (lpad != 0 || rpad != 0)
819 maxchar = Py_MAX(maxchar, format->fill_char);
820 if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
821 Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
822 maxchar = Py_MAX(maxchar, valmaxchar);
823 }
824
825 /* allocate the resulting string */
826 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
827 goto done;
828
829 /* Write into that space. First the padding. */
830 result = fill_padding(writer, len, format->fill_char, lpad, rpad);
831 if (result == -1)
832 goto done;
833
834 /* Then the source string. */
835 if (len) {
836 _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
837 value, 0, len);
838 }
839 writer->pos += (len + rpad);
840 result = 0;
841
842 done:
843 return result;
844 }
845
846
847 /************************************************************************/
848 /*********** long formatting ********************************************/
849 /************************************************************************/
850
851 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)852 format_long_internal(PyObject *value, const InternalFormatSpec *format,
853 _PyUnicodeWriter *writer)
854 {
855 int result = -1;
856 Py_UCS4 maxchar = 127;
857 PyObject *tmp = NULL;
858 Py_ssize_t inumeric_chars;
859 Py_UCS4 sign_char = '\0';
860 Py_ssize_t n_digits; /* count of digits need from the computed
861 string */
862 Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
863 produces non-digits */
864 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
865 Py_ssize_t n_total;
866 Py_ssize_t prefix = 0;
867 NumberFieldWidths spec;
868 long x;
869
870 /* Locale settings, either from the actual locale or
871 from a hard-code pseudo-locale */
872 LocaleInfo locale = LocaleInfo_STATIC_INIT;
873
874 /* no precision allowed on integers */
875 if (format->precision != -1) {
876 PyErr_SetString(PyExc_ValueError,
877 "Precision not allowed in integer format specifier");
878 goto done;
879 }
880
881 /* special case for character formatting */
882 if (format->type == 'c') {
883 /* error to specify a sign */
884 if (format->sign != '\0') {
885 PyErr_SetString(PyExc_ValueError,
886 "Sign not allowed with integer"
887 " format specifier 'c'");
888 goto done;
889 }
890 /* error to request alternate format */
891 if (format->alternate) {
892 PyErr_SetString(PyExc_ValueError,
893 "Alternate form (#) not allowed with integer"
894 " format specifier 'c'");
895 goto done;
896 }
897
898 /* taken from unicodeobject.c formatchar() */
899 /* Integer input truncated to a character */
900 x = PyLong_AsLong(value);
901 if (x == -1 && PyErr_Occurred())
902 goto done;
903 if (x < 0 || x > 0x10ffff) {
904 PyErr_SetString(PyExc_OverflowError,
905 "%c arg not in range(0x110000)");
906 goto done;
907 }
908 tmp = PyUnicode_FromOrdinal(x);
909 inumeric_chars = 0;
910 n_digits = 1;
911 maxchar = Py_MAX(maxchar, (Py_UCS4)x);
912
913 /* As a sort-of hack, we tell calc_number_widths that we only
914 have "remainder" characters. calc_number_widths thinks
915 these are characters that don't get formatted, only copied
916 into the output string. We do this for 'c' formatting,
917 because the characters are likely to be non-digits. */
918 n_remainder = 1;
919 }
920 else {
921 int base;
922 int leading_chars_to_skip = 0; /* Number of characters added by
923 PyNumber_ToBase that we want to
924 skip over. */
925
926 /* Compute the base and how many characters will be added by
927 PyNumber_ToBase */
928 switch (format->type) {
929 case 'b':
930 base = 2;
931 leading_chars_to_skip = 2; /* 0b */
932 break;
933 case 'o':
934 base = 8;
935 leading_chars_to_skip = 2; /* 0o */
936 break;
937 case 'x':
938 case 'X':
939 base = 16;
940 leading_chars_to_skip = 2; /* 0x */
941 break;
942 default: /* shouldn't be needed, but stops a compiler warning */
943 case 'd':
944 case 'n':
945 base = 10;
946 break;
947 }
948
949 if (format->sign != '+' && format->sign != ' '
950 && format->width == -1
951 && format->type != 'X' && format->type != 'n'
952 && !format->thousands_separators
953 && PyLong_CheckExact(value))
954 {
955 /* Fast path */
956 return _PyLong_FormatWriter(writer, value, base, format->alternate);
957 }
958
959 /* The number of prefix chars is the same as the leading
960 chars to skip */
961 if (format->alternate)
962 n_prefix = leading_chars_to_skip;
963
964 /* Do the hard part, converting to a string in a given base */
965 tmp = _PyLong_Format(value, base);
966 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
967 goto done;
968
969 inumeric_chars = 0;
970 n_digits = PyUnicode_GET_LENGTH(tmp);
971
972 prefix = inumeric_chars;
973
974 /* Is a sign character present in the output? If so, remember it
975 and skip it */
976 if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
977 sign_char = '-';
978 ++prefix;
979 ++leading_chars_to_skip;
980 }
981
982 /* Skip over the leading chars (0x, 0b, etc.) */
983 n_digits -= leading_chars_to_skip;
984 inumeric_chars += leading_chars_to_skip;
985 }
986
987 /* Determine the grouping, separator, and decimal point, if any. */
988 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
989 format->thousands_separators,
990 &locale) == -1)
991 goto done;
992
993 /* Calculate how much memory we'll need. */
994 n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
995 inumeric_chars + n_digits, n_remainder, 0,
996 &locale, format, &maxchar);
997 if (n_total == -1) {
998 goto done;
999 }
1000
1001 /* Allocate the memory. */
1002 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1003 goto done;
1004
1005 /* Populate the memory. */
1006 result = fill_number(writer, &spec,
1007 tmp, inumeric_chars,
1008 tmp, prefix, format->fill_char,
1009 &locale, format->type == 'X');
1010
1011 done:
1012 Py_XDECREF(tmp);
1013 free_locale_info(&locale);
1014 return result;
1015 }
1016
1017 /************************************************************************/
1018 /*********** float formatting *******************************************/
1019 /************************************************************************/
1020
1021 /* much of this is taken from unicodeobject.c */
1022 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1023 format_float_internal(PyObject *value,
1024 const InternalFormatSpec *format,
1025 _PyUnicodeWriter *writer)
1026 {
1027 char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1028 Py_ssize_t n_digits;
1029 Py_ssize_t n_remainder;
1030 Py_ssize_t n_total;
1031 int has_decimal;
1032 double val;
1033 int precision, default_precision = 6;
1034 Py_UCS4 type = format->type;
1035 int add_pct = 0;
1036 Py_ssize_t index;
1037 NumberFieldWidths spec;
1038 int flags = 0;
1039 int result = -1;
1040 Py_UCS4 maxchar = 127;
1041 Py_UCS4 sign_char = '\0';
1042 int float_type; /* Used to see if we have a nan, inf, or regular float. */
1043 PyObject *unicode_tmp = NULL;
1044
1045 /* Locale settings, either from the actual locale or
1046 from a hard-code pseudo-locale */
1047 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1048
1049 if (format->precision > INT_MAX) {
1050 PyErr_SetString(PyExc_ValueError, "precision too big");
1051 goto done;
1052 }
1053 precision = (int)format->precision;
1054
1055 if (format->alternate)
1056 flags |= Py_DTSF_ALT;
1057
1058 if (type == '\0') {
1059 /* Omitted type specifier. Behaves in the same way as repr(x)
1060 and str(x) if no precision is given, else like 'g', but with
1061 at least one digit after the decimal point. */
1062 flags |= Py_DTSF_ADD_DOT_0;
1063 type = 'r';
1064 default_precision = 0;
1065 }
1066
1067 if (type == 'n')
1068 /* 'n' is the same as 'g', except for the locale used to
1069 format the result. We take care of that later. */
1070 type = 'g';
1071
1072 val = PyFloat_AsDouble(value);
1073 if (val == -1.0 && PyErr_Occurred())
1074 goto done;
1075
1076 if (type == '%') {
1077 type = 'f';
1078 val *= 100;
1079 add_pct = 1;
1080 }
1081
1082 if (precision < 0)
1083 precision = default_precision;
1084 else if (type == 'r')
1085 type = 'g';
1086
1087 /* Cast "type", because if we're in unicode we need to pass an
1088 8-bit char. This is safe, because we've restricted what "type"
1089 can be. */
1090 buf = PyOS_double_to_string(val, (char)type, precision, flags,
1091 &float_type);
1092 if (buf == NULL)
1093 goto done;
1094 n_digits = strlen(buf);
1095
1096 if (add_pct) {
1097 /* We know that buf has a trailing zero (since we just called
1098 strlen() on it), and we don't use that fact any more. So we
1099 can just write over the trailing zero. */
1100 buf[n_digits] = '%';
1101 n_digits += 1;
1102 }
1103
1104 if (format->sign != '+' && format->sign != ' '
1105 && format->width == -1
1106 && format->type != 'n'
1107 && !format->thousands_separators)
1108 {
1109 /* Fast path */
1110 result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1111 PyMem_Free(buf);
1112 return result;
1113 }
1114
1115 /* Since there is no unicode version of PyOS_double_to_string,
1116 just use the 8 bit version and then convert to unicode. */
1117 unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1118 PyMem_Free(buf);
1119 if (unicode_tmp == NULL)
1120 goto done;
1121
1122 /* Is a sign character present in the output? If so, remember it
1123 and skip it */
1124 index = 0;
1125 if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1126 sign_char = '-';
1127 ++index;
1128 --n_digits;
1129 }
1130
1131 /* Determine if we have any "remainder" (after the digits, might include
1132 decimal or exponent or both (or neither)) */
1133 parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1134
1135 /* Determine the grouping, separator, and decimal point, if any. */
1136 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1137 format->thousands_separators,
1138 &locale) == -1)
1139 goto done;
1140
1141 /* Calculate how much memory we'll need. */
1142 n_total = calc_number_widths(&spec, 0, sign_char, index,
1143 index + n_digits, n_remainder, has_decimal,
1144 &locale, format, &maxchar);
1145 if (n_total == -1) {
1146 goto done;
1147 }
1148
1149 /* Allocate the memory. */
1150 if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1151 goto done;
1152
1153 /* Populate the memory. */
1154 result = fill_number(writer, &spec,
1155 unicode_tmp, index,
1156 NULL, 0, format->fill_char,
1157 &locale, 0);
1158
1159 done:
1160 Py_XDECREF(unicode_tmp);
1161 free_locale_info(&locale);
1162 return result;
1163 }
1164
1165 /************************************************************************/
1166 /*********** complex formatting *****************************************/
1167 /************************************************************************/
1168
1169 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1170 format_complex_internal(PyObject *value,
1171 const InternalFormatSpec *format,
1172 _PyUnicodeWriter *writer)
1173 {
1174 double re;
1175 double im;
1176 char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1177 char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1178
1179 InternalFormatSpec tmp_format = *format;
1180 Py_ssize_t n_re_digits;
1181 Py_ssize_t n_im_digits;
1182 Py_ssize_t n_re_remainder;
1183 Py_ssize_t n_im_remainder;
1184 Py_ssize_t n_re_total;
1185 Py_ssize_t n_im_total;
1186 int re_has_decimal;
1187 int im_has_decimal;
1188 int precision, default_precision = 6;
1189 Py_UCS4 type = format->type;
1190 Py_ssize_t i_re;
1191 Py_ssize_t i_im;
1192 NumberFieldWidths re_spec;
1193 NumberFieldWidths im_spec;
1194 int flags = 0;
1195 int result = -1;
1196 Py_UCS4 maxchar = 127;
1197 enum PyUnicode_Kind rkind;
1198 void *rdata;
1199 Py_UCS4 re_sign_char = '\0';
1200 Py_UCS4 im_sign_char = '\0';
1201 int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1202 int im_float_type;
1203 int add_parens = 0;
1204 int skip_re = 0;
1205 Py_ssize_t lpad;
1206 Py_ssize_t rpad;
1207 Py_ssize_t total;
1208 PyObject *re_unicode_tmp = NULL;
1209 PyObject *im_unicode_tmp = NULL;
1210
1211 /* Locale settings, either from the actual locale or
1212 from a hard-code pseudo-locale */
1213 LocaleInfo locale = LocaleInfo_STATIC_INIT;
1214
1215 if (format->precision > INT_MAX) {
1216 PyErr_SetString(PyExc_ValueError, "precision too big");
1217 goto done;
1218 }
1219 precision = (int)format->precision;
1220
1221 /* Zero padding is not allowed. */
1222 if (format->fill_char == '0') {
1223 PyErr_SetString(PyExc_ValueError,
1224 "Zero padding is not allowed in complex format "
1225 "specifier");
1226 goto done;
1227 }
1228
1229 /* Neither is '=' alignment . */
1230 if (format->align == '=') {
1231 PyErr_SetString(PyExc_ValueError,
1232 "'=' alignment flag is not allowed in complex format "
1233 "specifier");
1234 goto done;
1235 }
1236
1237 re = PyComplex_RealAsDouble(value);
1238 if (re == -1.0 && PyErr_Occurred())
1239 goto done;
1240 im = PyComplex_ImagAsDouble(value);
1241 if (im == -1.0 && PyErr_Occurred())
1242 goto done;
1243
1244 if (format->alternate)
1245 flags |= Py_DTSF_ALT;
1246
1247 if (type == '\0') {
1248 /* Omitted type specifier. Should be like str(self). */
1249 type = 'r';
1250 default_precision = 0;
1251 if (re == 0.0 && copysign(1.0, re) == 1.0)
1252 skip_re = 1;
1253 else
1254 add_parens = 1;
1255 }
1256
1257 if (type == 'n')
1258 /* 'n' is the same as 'g', except for the locale used to
1259 format the result. We take care of that later. */
1260 type = 'g';
1261
1262 if (precision < 0)
1263 precision = default_precision;
1264 else if (type == 'r')
1265 type = 'g';
1266
1267 /* Cast "type", because if we're in unicode we need to pass an
1268 8-bit char. This is safe, because we've restricted what "type"
1269 can be. */
1270 re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1271 &re_float_type);
1272 if (re_buf == NULL)
1273 goto done;
1274 im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1275 &im_float_type);
1276 if (im_buf == NULL)
1277 goto done;
1278
1279 n_re_digits = strlen(re_buf);
1280 n_im_digits = strlen(im_buf);
1281
1282 /* Since there is no unicode version of PyOS_double_to_string,
1283 just use the 8 bit version and then convert to unicode. */
1284 re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1285 if (re_unicode_tmp == NULL)
1286 goto done;
1287 i_re = 0;
1288
1289 im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1290 if (im_unicode_tmp == NULL)
1291 goto done;
1292 i_im = 0;
1293
1294 /* Is a sign character present in the output? If so, remember it
1295 and skip it */
1296 if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1297 re_sign_char = '-';
1298 ++i_re;
1299 --n_re_digits;
1300 }
1301 if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1302 im_sign_char = '-';
1303 ++i_im;
1304 --n_im_digits;
1305 }
1306
1307 /* Determine if we have any "remainder" (after the digits, might include
1308 decimal or exponent or both (or neither)) */
1309 parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1310 &n_re_remainder, &re_has_decimal);
1311 parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1312 &n_im_remainder, &im_has_decimal);
1313
1314 /* Determine the grouping, separator, and decimal point, if any. */
1315 if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1316 format->thousands_separators,
1317 &locale) == -1)
1318 goto done;
1319
1320 /* Turn off any padding. We'll do it later after we've composed
1321 the numbers without padding. */
1322 tmp_format.fill_char = '\0';
1323 tmp_format.align = '<';
1324 tmp_format.width = -1;
1325
1326 /* Calculate how much memory we'll need. */
1327 n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1328 i_re, i_re + n_re_digits, n_re_remainder,
1329 re_has_decimal, &locale, &tmp_format,
1330 &maxchar);
1331 if (n_re_total == -1) {
1332 goto done;
1333 }
1334
1335 /* Same formatting, but always include a sign, unless the real part is
1336 * going to be omitted, in which case we use whatever sign convention was
1337 * requested by the original format. */
1338 if (!skip_re)
1339 tmp_format.sign = '+';
1340 n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1341 i_im, i_im + n_im_digits, n_im_remainder,
1342 im_has_decimal, &locale, &tmp_format,
1343 &maxchar);
1344 if (n_im_total == -1) {
1345 goto done;
1346 }
1347
1348 if (skip_re)
1349 n_re_total = 0;
1350
1351 /* Add 1 for the 'j', and optionally 2 for parens. */
1352 calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1353 format->width, format->align, &lpad, &rpad, &total);
1354
1355 if (lpad || rpad)
1356 maxchar = Py_MAX(maxchar, format->fill_char);
1357
1358 if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1359 goto done;
1360 rkind = writer->kind;
1361 rdata = writer->data;
1362
1363 /* Populate the memory. First, the padding. */
1364 result = fill_padding(writer,
1365 n_re_total + n_im_total + 1 + add_parens * 2,
1366 format->fill_char, lpad, rpad);
1367 if (result == -1)
1368 goto done;
1369
1370 if (add_parens) {
1371 PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1372 writer->pos++;
1373 }
1374
1375 if (!skip_re) {
1376 result = fill_number(writer, &re_spec,
1377 re_unicode_tmp, i_re,
1378 NULL, 0,
1379 0,
1380 &locale, 0);
1381 if (result == -1)
1382 goto done;
1383 }
1384 result = fill_number(writer, &im_spec,
1385 im_unicode_tmp, i_im,
1386 NULL, 0,
1387 0,
1388 &locale, 0);
1389 if (result == -1)
1390 goto done;
1391 PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1392 writer->pos++;
1393
1394 if (add_parens) {
1395 PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1396 writer->pos++;
1397 }
1398
1399 writer->pos += rpad;
1400
1401 done:
1402 PyMem_Free(re_buf);
1403 PyMem_Free(im_buf);
1404 Py_XDECREF(re_unicode_tmp);
1405 Py_XDECREF(im_unicode_tmp);
1406 free_locale_info(&locale);
1407 return result;
1408 }
1409
1410 /************************************************************************/
1411 /*********** built in formatters ****************************************/
1412 /************************************************************************/
1413 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1414 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1415 {
1416 PyObject *str;
1417 int err;
1418
1419 str = PyObject_Str(obj);
1420 if (str == NULL)
1421 return -1;
1422 err = _PyUnicodeWriter_WriteStr(writer, str);
1423 Py_DECREF(str);
1424 return err;
1425 }
1426
1427 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1428 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1429 PyObject *obj,
1430 PyObject *format_spec,
1431 Py_ssize_t start, Py_ssize_t end)
1432 {
1433 InternalFormatSpec format;
1434
1435 assert(PyUnicode_Check(obj));
1436
1437 /* check for the special case of zero length format spec, make
1438 it equivalent to str(obj) */
1439 if (start == end) {
1440 if (PyUnicode_CheckExact(obj))
1441 return _PyUnicodeWriter_WriteStr(writer, obj);
1442 else
1443 return format_obj(obj, writer);
1444 }
1445
1446 /* parse the format_spec */
1447 if (!parse_internal_render_format_spec(format_spec, start, end,
1448 &format, 's', '<'))
1449 return -1;
1450
1451 /* type conversion? */
1452 switch (format.type) {
1453 case 's':
1454 /* no type conversion needed, already a string. do the formatting */
1455 return format_string_internal(obj, &format, writer);
1456 default:
1457 /* unknown */
1458 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1459 return -1;
1460 }
1461 }
1462
1463 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1464 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1465 PyObject *obj,
1466 PyObject *format_spec,
1467 Py_ssize_t start, Py_ssize_t end)
1468 {
1469 PyObject *tmp = NULL;
1470 InternalFormatSpec format;
1471 int result = -1;
1472
1473 /* check for the special case of zero length format spec, make
1474 it equivalent to str(obj) */
1475 if (start == end) {
1476 if (PyLong_CheckExact(obj))
1477 return _PyLong_FormatWriter(writer, obj, 10, 0);
1478 else
1479 return format_obj(obj, writer);
1480 }
1481
1482 /* parse the format_spec */
1483 if (!parse_internal_render_format_spec(format_spec, start, end,
1484 &format, 'd', '>'))
1485 goto done;
1486
1487 /* type conversion? */
1488 switch (format.type) {
1489 case 'b':
1490 case 'c':
1491 case 'd':
1492 case 'o':
1493 case 'x':
1494 case 'X':
1495 case 'n':
1496 /* no type conversion needed, already an int. do the formatting */
1497 result = format_long_internal(obj, &format, writer);
1498 break;
1499
1500 case 'e':
1501 case 'E':
1502 case 'f':
1503 case 'F':
1504 case 'g':
1505 case 'G':
1506 case '%':
1507 /* convert to float */
1508 tmp = PyNumber_Float(obj);
1509 if (tmp == NULL)
1510 goto done;
1511 result = format_float_internal(tmp, &format, writer);
1512 break;
1513
1514 default:
1515 /* unknown */
1516 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1517 goto done;
1518 }
1519
1520 done:
1521 Py_XDECREF(tmp);
1522 return result;
1523 }
1524
1525 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1526 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1527 PyObject *obj,
1528 PyObject *format_spec,
1529 Py_ssize_t start, Py_ssize_t end)
1530 {
1531 InternalFormatSpec format;
1532
1533 /* check for the special case of zero length format spec, make
1534 it equivalent to str(obj) */
1535 if (start == end)
1536 return format_obj(obj, writer);
1537
1538 /* parse the format_spec */
1539 if (!parse_internal_render_format_spec(format_spec, start, end,
1540 &format, '\0', '>'))
1541 return -1;
1542
1543 /* type conversion? */
1544 switch (format.type) {
1545 case '\0': /* No format code: like 'g', but with at least one decimal. */
1546 case 'e':
1547 case 'E':
1548 case 'f':
1549 case 'F':
1550 case 'g':
1551 case 'G':
1552 case 'n':
1553 case '%':
1554 /* no conversion, already a float. do the formatting */
1555 return format_float_internal(obj, &format, writer);
1556
1557 default:
1558 /* unknown */
1559 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1560 return -1;
1561 }
1562 }
1563
1564 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1565 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1566 PyObject *obj,
1567 PyObject *format_spec,
1568 Py_ssize_t start, Py_ssize_t end)
1569 {
1570 InternalFormatSpec format;
1571
1572 /* check for the special case of zero length format spec, make
1573 it equivalent to str(obj) */
1574 if (start == end)
1575 return format_obj(obj, writer);
1576
1577 /* parse the format_spec */
1578 if (!parse_internal_render_format_spec(format_spec, start, end,
1579 &format, '\0', '>'))
1580 return -1;
1581
1582 /* type conversion? */
1583 switch (format.type) {
1584 case '\0': /* No format code: like 'g', but with at least one decimal. */
1585 case 'e':
1586 case 'E':
1587 case 'f':
1588 case 'F':
1589 case 'g':
1590 case 'G':
1591 case 'n':
1592 /* no conversion, already a complex. do the formatting */
1593 return format_complex_internal(obj, &format, writer);
1594
1595 default:
1596 /* unknown */
1597 unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1598 return -1;
1599 }
1600 }
1601