• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* implements the unicode (as opposed to string) version of the
2    built-in formatters for string, int, float.  that is, the versions
3    of int.__float__, etc., that take and return unicode objects */
4 
5 #include "Python.h"
6 #include "pycore_fileutils.h"
7 #include <locale.h>
8 
9 /* Raises an exception about an unknown presentation type for this
10  * type. */
11 
12 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)13 unknown_presentation_type(Py_UCS4 presentation_type,
14                           const char* type_name)
15 {
16     /* %c might be out-of-range, hence the two cases. */
17     if (presentation_type > 32 && presentation_type < 128)
18         PyErr_Format(PyExc_ValueError,
19                      "Unknown format code '%c' "
20                      "for object of type '%.200s'",
21                      (char)presentation_type,
22                      type_name);
23     else
24         PyErr_Format(PyExc_ValueError,
25                      "Unknown format code '\\x%x' "
26                      "for object of type '%.200s'",
27                      (unsigned int)presentation_type,
28                      type_name);
29 }
30 
31 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)32 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33 {
34     assert(specifier == ',' || specifier == '_');
35     if (presentation_type > 32 && presentation_type < 128)
36         PyErr_Format(PyExc_ValueError,
37                      "Cannot specify '%c' with '%c'.",
38                      specifier, (char)presentation_type);
39     else
40         PyErr_Format(PyExc_ValueError,
41                      "Cannot specify '%c' with '\\x%x'.",
42                      specifier, (unsigned int)presentation_type);
43 }
44 
45 static void
invalid_comma_and_underscore(void)46 invalid_comma_and_underscore(void)
47 {
48     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49 }
50 
51 /*
52     get_integer consumes 0 or more decimal digit characters from an
53     input string, updates *result with the corresponding positive
54     integer, and returns the number of digits consumed.
55 
56     returns -1 on error.
57 */
58 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)59 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60                   Py_ssize_t *result)
61 {
62     Py_ssize_t accumulator, digitval, pos = *ppos;
63     int numdigits;
64     int kind = PyUnicode_KIND(str);
65     const void *data = PyUnicode_DATA(str);
66 
67     accumulator = numdigits = 0;
68     for (; pos < end; pos++, numdigits++) {
69         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70         if (digitval < 0)
71             break;
72         /*
73            Detect possible overflow before it happens:
74 
75               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77         */
78         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79             PyErr_Format(PyExc_ValueError,
80                          "Too many decimal digits in format string");
81             *ppos = pos;
82             return -1;
83         }
84         accumulator = accumulator * 10 + digitval;
85     }
86     *ppos = pos;
87     *result = accumulator;
88     return numdigits;
89 }
90 
91 /************************************************************************/
92 /*********** standard format specifier parsing **************************/
93 /************************************************************************/
94 
95 /* returns true if this character is a specifier alignment token */
96 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)97 is_alignment_token(Py_UCS4 c)
98 {
99     switch (c) {
100     case '<': case '>': case '=': case '^':
101         return 1;
102     default:
103         return 0;
104     }
105 }
106 
107 /* returns true if this character is a sign element */
108 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)109 is_sign_element(Py_UCS4 c)
110 {
111     switch (c) {
112     case ' ': case '+': case '-':
113         return 1;
114     default:
115         return 0;
116     }
117 }
118 
119 /* Locale type codes. LT_NO_LOCALE must be zero. */
120 enum LocaleType {
121     LT_NO_LOCALE = 0,
122     LT_DEFAULT_LOCALE = ',',
123     LT_UNDERSCORE_LOCALE = '_',
124     LT_UNDER_FOUR_LOCALE,
125     LT_CURRENT_LOCALE
126 };
127 
128 typedef struct {
129     Py_UCS4 fill_char;
130     Py_UCS4 align;
131     int alternate;
132     Py_UCS4 sign;
133     Py_ssize_t width;
134     enum LocaleType thousands_separators;
135     Py_ssize_t precision;
136     Py_UCS4 type;
137 } InternalFormatSpec;
138 
139 #if 0
140 /* Occasionally useful for debugging. Should normally be commented out. */
141 static void
142 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143 {
144     printf("internal format spec: fill_char %d\n", format->fill_char);
145     printf("internal format spec: align %d\n", format->align);
146     printf("internal format spec: alternate %d\n", format->alternate);
147     printf("internal format spec: sign %d\n", format->sign);
148     printf("internal format spec: width %zd\n", format->width);
149     printf("internal format spec: thousands_separators %d\n",
150            format->thousands_separators);
151     printf("internal format spec: precision %zd\n", format->precision);
152     printf("internal format spec: type %c\n", format->type);
153     printf("\n");
154 }
155 #endif
156 
157 
158 /*
159   ptr points to the start of the format_spec, end points just past its end.
160   fills in format with the parsed information.
161   returns 1 on success, 0 on failure.
162   if failure, sets the exception
163 */
164 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)165 parse_internal_render_format_spec(PyObject *format_spec,
166                                   Py_ssize_t start, Py_ssize_t end,
167                                   InternalFormatSpec *format,
168                                   char default_type,
169                                   char default_align)
170 {
171     Py_ssize_t pos = start;
172     int kind = PyUnicode_KIND(format_spec);
173     const void *data = PyUnicode_DATA(format_spec);
174     /* end-pos is used throughout this code to specify the length of
175        the input string */
176 #define READ_spec(index) PyUnicode_READ(kind, data, index)
177 
178     Py_ssize_t consumed;
179     int align_specified = 0;
180     int fill_char_specified = 0;
181 
182     format->fill_char = ' ';
183     format->align = default_align;
184     format->alternate = 0;
185     format->sign = '\0';
186     format->width = -1;
187     format->thousands_separators = LT_NO_LOCALE;
188     format->precision = -1;
189     format->type = default_type;
190 
191     /* If the second char is an alignment token,
192        then parse the fill char */
193     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194         format->align = READ_spec(pos+1);
195         format->fill_char = READ_spec(pos);
196         fill_char_specified = 1;
197         align_specified = 1;
198         pos += 2;
199     }
200     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201         format->align = READ_spec(pos);
202         align_specified = 1;
203         ++pos;
204     }
205 
206     /* Parse the various sign options */
207     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208         format->sign = READ_spec(pos);
209         ++pos;
210     }
211 
212     /* If the next character is #, we're in alternate mode.  This only
213        applies to integers. */
214     if (end-pos >= 1 && READ_spec(pos) == '#') {
215         format->alternate = 1;
216         ++pos;
217     }
218 
219     /* The special case for 0-padding (backwards compat) */
220     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221         format->fill_char = '0';
222         if (!align_specified) {
223             format->align = '=';
224         }
225         ++pos;
226     }
227 
228     consumed = get_integer(format_spec, &pos, end, &format->width);
229     if (consumed == -1)
230         /* Overflow error. Exception already set. */
231         return 0;
232 
233     /* If consumed is 0, we didn't consume any characters for the
234        width. In that case, reset the width to -1, because
235        get_integer() will have set it to zero. -1 is how we record
236        that the width wasn't specified. */
237     if (consumed == 0)
238         format->width = -1;
239 
240     /* Comma signifies add thousands separators */
241     if (end-pos && READ_spec(pos) == ',') {
242         format->thousands_separators = LT_DEFAULT_LOCALE;
243         ++pos;
244     }
245     /* Underscore signifies add thousands separators */
246     if (end-pos && READ_spec(pos) == '_') {
247         if (format->thousands_separators != LT_NO_LOCALE) {
248             invalid_comma_and_underscore();
249             return 0;
250         }
251         format->thousands_separators = LT_UNDERSCORE_LOCALE;
252         ++pos;
253     }
254     if (end-pos && READ_spec(pos) == ',') {
255         if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
256             invalid_comma_and_underscore();
257             return 0;
258         }
259     }
260 
261     /* Parse field precision */
262     if (end-pos && READ_spec(pos) == '.') {
263         ++pos;
264 
265         consumed = get_integer(format_spec, &pos, end, &format->precision);
266         if (consumed == -1)
267             /* Overflow error. Exception already set. */
268             return 0;
269 
270         /* Not having a precision after a dot is an error. */
271         if (consumed == 0) {
272             PyErr_Format(PyExc_ValueError,
273                          "Format specifier missing precision");
274             return 0;
275         }
276 
277     }
278 
279     /* Finally, parse the type field. */
280 
281     if (end-pos > 1) {
282         /* More than one char remain, invalid format specifier. */
283         PyErr_Format(PyExc_ValueError, "Invalid format specifier");
284         return 0;
285     }
286 
287     if (end-pos == 1) {
288         format->type = READ_spec(pos);
289         ++pos;
290     }
291 
292     /* Do as much validating as we can, just by looking at the format
293        specifier.  Do not take into account what type of formatting
294        we're doing (int, float, string). */
295 
296     if (format->thousands_separators) {
297         switch (format->type) {
298         case 'd':
299         case 'e':
300         case 'f':
301         case 'g':
302         case 'E':
303         case 'G':
304         case '%':
305         case 'F':
306         case '\0':
307             /* These are allowed. See PEP 378.*/
308             break;
309         case 'b':
310         case 'o':
311         case 'x':
312         case 'X':
313             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
314             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
315                 /* Every four digits, not every three, in bin/oct/hex. */
316                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
317                 break;
318             }
319             /* fall through */
320         default:
321             invalid_thousands_separator_type(format->thousands_separators, format->type);
322             return 0;
323         }
324     }
325 
326     assert (format->align <= 127);
327     assert (format->sign <= 127);
328     return 1;
329 }
330 
331 /* Calculate the padding needed. */
332 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)333 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
334              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
335              Py_ssize_t *n_total)
336 {
337     if (width >= 0) {
338         if (nchars > width)
339             *n_total = nchars;
340         else
341             *n_total = width;
342     }
343     else {
344         /* not specified, use all of the chars and no more */
345         *n_total = nchars;
346     }
347 
348     /* Figure out how much leading space we need, based on the
349        aligning */
350     if (align == '>')
351         *n_lpadding = *n_total - nchars;
352     else if (align == '^')
353         *n_lpadding = (*n_total - nchars) / 2;
354     else if (align == '<' || align == '=')
355         *n_lpadding = 0;
356     else {
357         /* We should never have an unspecified alignment. */
358         Py_UNREACHABLE();
359     }
360 
361     *n_rpadding = *n_total - nchars - *n_lpadding;
362 }
363 
364 /* Do the padding, and return a pointer to where the caller-supplied
365    content goes. */
366 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)367 fill_padding(_PyUnicodeWriter *writer,
368              Py_ssize_t nchars,
369              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
370              Py_ssize_t n_rpadding)
371 {
372     Py_ssize_t pos;
373 
374     /* Pad on left. */
375     if (n_lpadding) {
376         pos = writer->pos;
377         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
378     }
379 
380     /* Pad on right. */
381     if (n_rpadding) {
382         pos = writer->pos + nchars + n_lpadding;
383         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
384     }
385 
386     /* Pointer to the user content. */
387     writer->pos += n_lpadding;
388     return 0;
389 }
390 
391 /************************************************************************/
392 /*********** common routines for numeric formatting *********************/
393 /************************************************************************/
394 
395 /* Locale info needed for formatting integers and the part of floats
396    before and including the decimal. Note that locales only support
397    8-bit chars, not unicode. */
398 typedef struct {
399     PyObject *decimal_point;
400     PyObject *thousands_sep;
401     const char *grouping;
402     char *grouping_buffer;
403 } LocaleInfo;
404 
405 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
406 
407 /* describes the layout for an integer, see the comment in
408    calc_number_widths() for details */
409 typedef struct {
410     Py_ssize_t n_lpadding;
411     Py_ssize_t n_prefix;
412     Py_ssize_t n_spadding;
413     Py_ssize_t n_rpadding;
414     char sign;
415     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
416     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
417                                     any grouping chars. */
418     Py_ssize_t n_decimal;   /* 0 if only an integer */
419     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
420                                excluding the decimal itself, if
421                                present. */
422 
423     /* These 2 are not the widths of fields, but are needed by
424        STRINGLIB_GROUPING. */
425     Py_ssize_t n_digits;    /* The number of digits before a decimal
426                                or exponent. */
427     Py_ssize_t n_min_width; /* The min_width we used when we computed
428                                the n_grouped_digits width. */
429 } NumberFieldWidths;
430 
431 
432 /* Given a number of the form:
433    digits[remainder]
434    where ptr points to the start and end points to the end, find where
435     the integer part ends. This could be a decimal, an exponent, both,
436     or neither.
437    If a decimal point is present, set *has_decimal and increment
438     remainder beyond it.
439    Results are undefined (but shouldn't crash) for improperly
440     formatted strings.
441 */
442 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)443 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
444              Py_ssize_t *n_remainder, int *has_decimal)
445 {
446     Py_ssize_t remainder;
447     int kind = PyUnicode_KIND(s);
448     const void *data = PyUnicode_DATA(s);
449 
450     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
451         ++pos;
452     remainder = pos;
453 
454     /* Does remainder start with a decimal point? */
455     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
456 
457     /* Skip the decimal point. */
458     if (*has_decimal)
459         remainder++;
460 
461     *n_remainder = end - remainder;
462 }
463 
464 /* not all fields of format are used.  for example, precision is
465    unused.  should this take discrete params in order to be more clear
466    about what it does?  or is passing a single format parameter easier
467    and more efficient enough to justify a little obfuscation?
468    Return -1 on error. */
469 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)470 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
471                    Py_UCS4 sign_char, Py_ssize_t n_start,
472                    Py_ssize_t n_end, Py_ssize_t n_remainder,
473                    int has_decimal, const LocaleInfo *locale,
474                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
475 {
476     Py_ssize_t n_non_digit_non_padding;
477     Py_ssize_t n_padding;
478 
479     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
480     spec->n_lpadding = 0;
481     spec->n_prefix = n_prefix;
482     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
483     spec->n_remainder = n_remainder;
484     spec->n_spadding = 0;
485     spec->n_rpadding = 0;
486     spec->sign = '\0';
487     spec->n_sign = 0;
488 
489     /* the output will look like:
490        |                                                                                         |
491        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
492        |                                                                                         |
493 
494        sign is computed from format->sign and the actual
495        sign of the number
496 
497        prefix is given (it's for the '0x' prefix)
498 
499        digits is already known
500 
501        the total width is either given, or computed from the
502        actual digits
503 
504        only one of lpadding, spadding, and rpadding can be non-zero,
505        and it's calculated from the width and other fields
506     */
507 
508     /* compute the various parts we're going to write */
509     switch (format->sign) {
510     case '+':
511         /* always put a + or - */
512         spec->n_sign = 1;
513         spec->sign = (sign_char == '-' ? '-' : '+');
514         break;
515     case ' ':
516         spec->n_sign = 1;
517         spec->sign = (sign_char == '-' ? '-' : ' ');
518         break;
519     default:
520         /* Not specified, or the default (-) */
521         if (sign_char == '-') {
522             spec->n_sign = 1;
523             spec->sign = '-';
524         }
525     }
526 
527     /* The number of chars used for non-digits and non-padding. */
528     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
529         spec->n_remainder;
530 
531     /* min_width can go negative, that's okay. format->width == -1 means
532        we don't care. */
533     if (format->fill_char == '0' && format->align == '=')
534         spec->n_min_width = format->width - n_non_digit_non_padding;
535     else
536         spec->n_min_width = 0;
537 
538     if (spec->n_digits == 0)
539         /* This case only occurs when using 'c' formatting, we need
540            to special case it because the grouping code always wants
541            to have at least one character. */
542         spec->n_grouped_digits = 0;
543     else {
544         Py_UCS4 grouping_maxchar;
545         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
546             NULL, 0,
547             NULL, 0, spec->n_digits,
548             spec->n_min_width,
549             locale->grouping, locale->thousands_sep, &grouping_maxchar);
550         if (spec->n_grouped_digits == -1) {
551             return -1;
552         }
553         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
554     }
555 
556     /* Given the desired width and the total of digit and non-digit
557        space we consume, see if we need any padding. format->width can
558        be negative (meaning no padding), but this code still works in
559        that case. */
560     n_padding = format->width -
561                         (n_non_digit_non_padding + spec->n_grouped_digits);
562     if (n_padding > 0) {
563         /* Some padding is needed. Determine if it's left, space, or right. */
564         switch (format->align) {
565         case '<':
566             spec->n_rpadding = n_padding;
567             break;
568         case '^':
569             spec->n_lpadding = n_padding / 2;
570             spec->n_rpadding = n_padding - spec->n_lpadding;
571             break;
572         case '=':
573             spec->n_spadding = n_padding;
574             break;
575         case '>':
576             spec->n_lpadding = n_padding;
577             break;
578         default:
579             /* Shouldn't get here */
580             Py_UNREACHABLE();
581         }
582     }
583 
584     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
585         *maxchar = Py_MAX(*maxchar, format->fill_char);
586 
587     if (spec->n_decimal)
588         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
589 
590     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
591         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
592         spec->n_remainder + spec->n_rpadding;
593 }
594 
595 /* Fill in the digit parts of a number's string representation,
596    as determined in calc_number_widths().
597    Return -1 on error, or 0 on success. */
598 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)599 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
600             PyObject *digits, Py_ssize_t d_start,
601             PyObject *prefix, Py_ssize_t p_start,
602             Py_UCS4 fill_char,
603             LocaleInfo *locale, int toupper)
604 {
605     /* Used to keep track of digits, decimal, and remainder. */
606     Py_ssize_t d_pos = d_start;
607     const unsigned int kind = writer->kind;
608     const void *data = writer->data;
609     Py_ssize_t r;
610 
611     if (spec->n_lpadding) {
612         _PyUnicode_FastFill(writer->buffer,
613                             writer->pos, spec->n_lpadding, fill_char);
614         writer->pos += spec->n_lpadding;
615     }
616     if (spec->n_sign == 1) {
617         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
618         writer->pos++;
619     }
620     if (spec->n_prefix) {
621         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
622                                       prefix, p_start,
623                                       spec->n_prefix);
624         if (toupper) {
625             Py_ssize_t t;
626             for (t = 0; t < spec->n_prefix; t++) {
627                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
628                 c = Py_TOUPPER(c);
629                 assert (c <= 127);
630                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
631             }
632         }
633         writer->pos += spec->n_prefix;
634     }
635     if (spec->n_spadding) {
636         _PyUnicode_FastFill(writer->buffer,
637                             writer->pos, spec->n_spadding, fill_char);
638         writer->pos += spec->n_spadding;
639     }
640 
641     /* Only for type 'c' special case, it has no digits. */
642     if (spec->n_digits != 0) {
643         /* Fill the digits with InsertThousandsGrouping. */
644         r = _PyUnicode_InsertThousandsGrouping(
645                 writer, spec->n_grouped_digits,
646                 digits, d_pos, spec->n_digits,
647                 spec->n_min_width,
648                 locale->grouping, locale->thousands_sep, NULL);
649         if (r == -1)
650             return -1;
651         assert(r == spec->n_grouped_digits);
652         d_pos += spec->n_digits;
653     }
654     if (toupper) {
655         Py_ssize_t t;
656         for (t = 0; t < spec->n_grouped_digits; t++) {
657             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
658             c = Py_TOUPPER(c);
659             if (c > 127) {
660                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
661                 return -1;
662             }
663             PyUnicode_WRITE(kind, data, writer->pos + t, c);
664         }
665     }
666     writer->pos += spec->n_grouped_digits;
667 
668     if (spec->n_decimal) {
669         _PyUnicode_FastCopyCharacters(
670             writer->buffer, writer->pos,
671             locale->decimal_point, 0, spec->n_decimal);
672         writer->pos += spec->n_decimal;
673         d_pos += 1;
674     }
675 
676     if (spec->n_remainder) {
677         _PyUnicode_FastCopyCharacters(
678             writer->buffer, writer->pos,
679             digits, d_pos, spec->n_remainder);
680         writer->pos += spec->n_remainder;
681         /* d_pos += spec->n_remainder; */
682     }
683 
684     if (spec->n_rpadding) {
685         _PyUnicode_FastFill(writer->buffer,
686                             writer->pos, spec->n_rpadding,
687                             fill_char);
688         writer->pos += spec->n_rpadding;
689     }
690     return 0;
691 }
692 
693 static const char no_grouping[1] = {CHAR_MAX};
694 
695 /* Find the decimal point character(s?), thousands_separator(s?), and
696    grouping description, either for the current locale if type is
697    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
698    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
699 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)700 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
701 {
702     switch (type) {
703     case LT_CURRENT_LOCALE: {
704         struct lconv *lc = localeconv();
705         if (_Py_GetLocaleconvNumeric(lc,
706                                      &locale_info->decimal_point,
707                                      &locale_info->thousands_sep) < 0) {
708             return -1;
709         }
710 
711         /* localeconv() grouping can become a dangling pointer or point
712            to a different string if another thread calls localeconv() during
713            the string formatting. Copy the string to avoid this risk. */
714         locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
715         if (locale_info->grouping_buffer == NULL) {
716             PyErr_NoMemory();
717             return -1;
718         }
719         locale_info->grouping = locale_info->grouping_buffer;
720         break;
721     }
722     case LT_DEFAULT_LOCALE:
723     case LT_UNDERSCORE_LOCALE:
724     case LT_UNDER_FOUR_LOCALE:
725         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
726         locale_info->thousands_sep = PyUnicode_FromOrdinal(
727             type == LT_DEFAULT_LOCALE ? ',' : '_');
728         if (!locale_info->decimal_point || !locale_info->thousands_sep)
729             return -1;
730         if (type != LT_UNDER_FOUR_LOCALE)
731             locale_info->grouping = "\3"; /* Group every 3 characters.  The
732                                          (implicit) trailing 0 means repeat
733                                          infinitely. */
734         else
735             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
736         break;
737     case LT_NO_LOCALE:
738         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
739         locale_info->thousands_sep = PyUnicode_New(0, 0);
740         if (!locale_info->decimal_point || !locale_info->thousands_sep)
741             return -1;
742         locale_info->grouping = no_grouping;
743         break;
744     }
745     return 0;
746 }
747 
748 static void
free_locale_info(LocaleInfo * locale_info)749 free_locale_info(LocaleInfo *locale_info)
750 {
751     Py_XDECREF(locale_info->decimal_point);
752     Py_XDECREF(locale_info->thousands_sep);
753     PyMem_Free(locale_info->grouping_buffer);
754 }
755 
756 /************************************************************************/
757 /*********** string formatting ******************************************/
758 /************************************************************************/
759 
760 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
762                        _PyUnicodeWriter *writer)
763 {
764     Py_ssize_t lpad;
765     Py_ssize_t rpad;
766     Py_ssize_t total;
767     Py_ssize_t len;
768     int result = -1;
769     Py_UCS4 maxchar;
770 
771     assert(PyUnicode_IS_READY(value));
772     len = PyUnicode_GET_LENGTH(value);
773 
774     /* sign is not allowed on strings */
775     if (format->sign != '\0') {
776         PyErr_SetString(PyExc_ValueError,
777                         "Sign not allowed in string format specifier");
778         goto done;
779     }
780 
781     /* alternate is not allowed on strings */
782     if (format->alternate) {
783         PyErr_SetString(PyExc_ValueError,
784                         "Alternate form (#) not allowed in string format "
785                         "specifier");
786         goto done;
787     }
788 
789     /* '=' alignment not allowed on strings */
790     if (format->align == '=') {
791         PyErr_SetString(PyExc_ValueError,
792                         "'=' alignment not allowed "
793                         "in string format specifier");
794         goto done;
795     }
796 
797     if ((format->width == -1 || format->width <= len)
798         && (format->precision == -1 || format->precision >= len)) {
799         /* Fast path */
800         return _PyUnicodeWriter_WriteStr(writer, value);
801     }
802 
803     /* if precision is specified, output no more that format.precision
804        characters */
805     if (format->precision >= 0 && len >= format->precision) {
806         len = format->precision;
807     }
808 
809     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
810 
811     maxchar = writer->maxchar;
812     if (lpad != 0 || rpad != 0)
813         maxchar = Py_MAX(maxchar, format->fill_char);
814     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
815         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
816         maxchar = Py_MAX(maxchar, valmaxchar);
817     }
818 
819     /* allocate the resulting string */
820     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
821         goto done;
822 
823     /* Write into that space. First the padding. */
824     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
825     if (result == -1)
826         goto done;
827 
828     /* Then the source string. */
829     if (len) {
830         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
831                                       value, 0, len);
832     }
833     writer->pos += (len + rpad);
834     result = 0;
835 
836 done:
837     return result;
838 }
839 
840 
841 /************************************************************************/
842 /*********** long formatting ********************************************/
843 /************************************************************************/
844 
845 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)846 format_long_internal(PyObject *value, const InternalFormatSpec *format,
847                      _PyUnicodeWriter *writer)
848 {
849     int result = -1;
850     Py_UCS4 maxchar = 127;
851     PyObject *tmp = NULL;
852     Py_ssize_t inumeric_chars;
853     Py_UCS4 sign_char = '\0';
854     Py_ssize_t n_digits;       /* count of digits need from the computed
855                                   string */
856     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
857                                    produces non-digits */
858     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
859     Py_ssize_t n_total;
860     Py_ssize_t prefix = 0;
861     NumberFieldWidths spec;
862     long x;
863 
864     /* Locale settings, either from the actual locale or
865        from a hard-code pseudo-locale */
866     LocaleInfo locale = LocaleInfo_STATIC_INIT;
867 
868     /* no precision allowed on integers */
869     if (format->precision != -1) {
870         PyErr_SetString(PyExc_ValueError,
871                         "Precision not allowed in integer format specifier");
872         goto done;
873     }
874 
875     /* special case for character formatting */
876     if (format->type == 'c') {
877         /* error to specify a sign */
878         if (format->sign != '\0') {
879             PyErr_SetString(PyExc_ValueError,
880                             "Sign not allowed with integer"
881                             " format specifier 'c'");
882             goto done;
883         }
884         /* error to request alternate format */
885         if (format->alternate) {
886             PyErr_SetString(PyExc_ValueError,
887                             "Alternate form (#) not allowed with integer"
888                             " format specifier 'c'");
889             goto done;
890         }
891 
892         /* taken from unicodeobject.c formatchar() */
893         /* Integer input truncated to a character */
894         x = PyLong_AsLong(value);
895         if (x == -1 && PyErr_Occurred())
896             goto done;
897         if (x < 0 || x > 0x10ffff) {
898             PyErr_SetString(PyExc_OverflowError,
899                             "%c arg not in range(0x110000)");
900             goto done;
901         }
902         tmp = PyUnicode_FromOrdinal(x);
903         inumeric_chars = 0;
904         n_digits = 1;
905         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
906 
907         /* As a sort-of hack, we tell calc_number_widths that we only
908            have "remainder" characters. calc_number_widths thinks
909            these are characters that don't get formatted, only copied
910            into the output string. We do this for 'c' formatting,
911            because the characters are likely to be non-digits. */
912         n_remainder = 1;
913     }
914     else {
915         int base;
916         int leading_chars_to_skip = 0;  /* Number of characters added by
917                                            PyNumber_ToBase that we want to
918                                            skip over. */
919 
920         /* Compute the base and how many characters will be added by
921            PyNumber_ToBase */
922         switch (format->type) {
923         case 'b':
924             base = 2;
925             leading_chars_to_skip = 2; /* 0b */
926             break;
927         case 'o':
928             base = 8;
929             leading_chars_to_skip = 2; /* 0o */
930             break;
931         case 'x':
932         case 'X':
933             base = 16;
934             leading_chars_to_skip = 2; /* 0x */
935             break;
936         default:  /* shouldn't be needed, but stops a compiler warning */
937         case 'd':
938         case 'n':
939             base = 10;
940             break;
941         }
942 
943         if (format->sign != '+' && format->sign != ' '
944             && format->width == -1
945             && format->type != 'X' && format->type != 'n'
946             && !format->thousands_separators
947             && PyLong_CheckExact(value))
948         {
949             /* Fast path */
950             return _PyLong_FormatWriter(writer, value, base, format->alternate);
951         }
952 
953         /* The number of prefix chars is the same as the leading
954            chars to skip */
955         if (format->alternate)
956             n_prefix = leading_chars_to_skip;
957 
958         /* Do the hard part, converting to a string in a given base */
959         tmp = _PyLong_Format(value, base);
960         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
961             goto done;
962 
963         inumeric_chars = 0;
964         n_digits = PyUnicode_GET_LENGTH(tmp);
965 
966         prefix = inumeric_chars;
967 
968         /* Is a sign character present in the output?  If so, remember it
969            and skip it */
970         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
971             sign_char = '-';
972             ++prefix;
973             ++leading_chars_to_skip;
974         }
975 
976         /* Skip over the leading chars (0x, 0b, etc.) */
977         n_digits -= leading_chars_to_skip;
978         inumeric_chars += leading_chars_to_skip;
979     }
980 
981     /* Determine the grouping, separator, and decimal point, if any. */
982     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
983                         format->thousands_separators,
984                         &locale) == -1)
985         goto done;
986 
987     /* Calculate how much memory we'll need. */
988     n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
989                                  inumeric_chars + n_digits, n_remainder, 0,
990                                  &locale, format, &maxchar);
991     if (n_total == -1) {
992         goto done;
993     }
994 
995     /* Allocate the memory. */
996     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
997         goto done;
998 
999     /* Populate the memory. */
1000     result = fill_number(writer, &spec,
1001                          tmp, inumeric_chars,
1002                          tmp, prefix, format->fill_char,
1003                          &locale, format->type == 'X');
1004 
1005 done:
1006     Py_XDECREF(tmp);
1007     free_locale_info(&locale);
1008     return result;
1009 }
1010 
1011 /************************************************************************/
1012 /*********** float formatting *******************************************/
1013 /************************************************************************/
1014 
1015 /* much of this is taken from unicodeobject.c */
1016 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1017 format_float_internal(PyObject *value,
1018                       const InternalFormatSpec *format,
1019                       _PyUnicodeWriter *writer)
1020 {
1021     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1022     Py_ssize_t n_digits;
1023     Py_ssize_t n_remainder;
1024     Py_ssize_t n_total;
1025     int has_decimal;
1026     double val;
1027     int precision, default_precision = 6;
1028     Py_UCS4 type = format->type;
1029     int add_pct = 0;
1030     Py_ssize_t index;
1031     NumberFieldWidths spec;
1032     int flags = 0;
1033     int result = -1;
1034     Py_UCS4 maxchar = 127;
1035     Py_UCS4 sign_char = '\0';
1036     int float_type; /* Used to see if we have a nan, inf, or regular float. */
1037     PyObject *unicode_tmp = NULL;
1038 
1039     /* Locale settings, either from the actual locale or
1040        from a hard-code pseudo-locale */
1041     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1042 
1043     if (format->precision > INT_MAX) {
1044         PyErr_SetString(PyExc_ValueError, "precision too big");
1045         goto done;
1046     }
1047     precision = (int)format->precision;
1048 
1049     if (format->alternate)
1050         flags |= Py_DTSF_ALT;
1051 
1052     if (type == '\0') {
1053         /* Omitted type specifier.  Behaves in the same way as repr(x)
1054            and str(x) if no precision is given, else like 'g', but with
1055            at least one digit after the decimal point. */
1056         flags |= Py_DTSF_ADD_DOT_0;
1057         type = 'r';
1058         default_precision = 0;
1059     }
1060 
1061     if (type == 'n')
1062         /* 'n' is the same as 'g', except for the locale used to
1063            format the result. We take care of that later. */
1064         type = 'g';
1065 
1066     val = PyFloat_AsDouble(value);
1067     if (val == -1.0 && PyErr_Occurred())
1068         goto done;
1069 
1070     if (type == '%') {
1071         type = 'f';
1072         val *= 100;
1073         add_pct = 1;
1074     }
1075 
1076     if (precision < 0)
1077         precision = default_precision;
1078     else if (type == 'r')
1079         type = 'g';
1080 
1081     /* Cast "type", because if we're in unicode we need to pass an
1082        8-bit char. This is safe, because we've restricted what "type"
1083        can be. */
1084     buf = PyOS_double_to_string(val, (char)type, precision, flags,
1085                                 &float_type);
1086     if (buf == NULL)
1087         goto done;
1088     n_digits = strlen(buf);
1089 
1090     if (add_pct) {
1091         /* We know that buf has a trailing zero (since we just called
1092            strlen() on it), and we don't use that fact any more. So we
1093            can just write over the trailing zero. */
1094         buf[n_digits] = '%';
1095         n_digits += 1;
1096     }
1097 
1098     if (format->sign != '+' && format->sign != ' '
1099         && format->width == -1
1100         && format->type != 'n'
1101         && !format->thousands_separators)
1102     {
1103         /* Fast path */
1104         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1105         PyMem_Free(buf);
1106         return result;
1107     }
1108 
1109     /* Since there is no unicode version of PyOS_double_to_string,
1110        just use the 8 bit version and then convert to unicode. */
1111     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1112     PyMem_Free(buf);
1113     if (unicode_tmp == NULL)
1114         goto done;
1115 
1116     /* Is a sign character present in the output?  If so, remember it
1117        and skip it */
1118     index = 0;
1119     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1120         sign_char = '-';
1121         ++index;
1122         --n_digits;
1123     }
1124 
1125     /* Determine if we have any "remainder" (after the digits, might include
1126        decimal or exponent or both (or neither)) */
1127     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1128 
1129     /* Determine the grouping, separator, and decimal point, if any. */
1130     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1131                         format->thousands_separators,
1132                         &locale) == -1)
1133         goto done;
1134 
1135     /* Calculate how much memory we'll need. */
1136     n_total = calc_number_widths(&spec, 0, sign_char, index,
1137                                  index + n_digits, n_remainder, has_decimal,
1138                                  &locale, format, &maxchar);
1139     if (n_total == -1) {
1140         goto done;
1141     }
1142 
1143     /* Allocate the memory. */
1144     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1145         goto done;
1146 
1147     /* Populate the memory. */
1148     result = fill_number(writer, &spec,
1149                          unicode_tmp, index,
1150                          NULL, 0, format->fill_char,
1151                          &locale, 0);
1152 
1153 done:
1154     Py_XDECREF(unicode_tmp);
1155     free_locale_info(&locale);
1156     return result;
1157 }
1158 
1159 /************************************************************************/
1160 /*********** complex formatting *****************************************/
1161 /************************************************************************/
1162 
1163 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1164 format_complex_internal(PyObject *value,
1165                         const InternalFormatSpec *format,
1166                         _PyUnicodeWriter *writer)
1167 {
1168     double re;
1169     double im;
1170     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1171     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1172 
1173     InternalFormatSpec tmp_format = *format;
1174     Py_ssize_t n_re_digits;
1175     Py_ssize_t n_im_digits;
1176     Py_ssize_t n_re_remainder;
1177     Py_ssize_t n_im_remainder;
1178     Py_ssize_t n_re_total;
1179     Py_ssize_t n_im_total;
1180     int re_has_decimal;
1181     int im_has_decimal;
1182     int precision, default_precision = 6;
1183     Py_UCS4 type = format->type;
1184     Py_ssize_t i_re;
1185     Py_ssize_t i_im;
1186     NumberFieldWidths re_spec;
1187     NumberFieldWidths im_spec;
1188     int flags = 0;
1189     int result = -1;
1190     Py_UCS4 maxchar = 127;
1191     enum PyUnicode_Kind rkind;
1192     void *rdata;
1193     Py_UCS4 re_sign_char = '\0';
1194     Py_UCS4 im_sign_char = '\0';
1195     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1196     int im_float_type;
1197     int add_parens = 0;
1198     int skip_re = 0;
1199     Py_ssize_t lpad;
1200     Py_ssize_t rpad;
1201     Py_ssize_t total;
1202     PyObject *re_unicode_tmp = NULL;
1203     PyObject *im_unicode_tmp = NULL;
1204 
1205     /* Locale settings, either from the actual locale or
1206        from a hard-code pseudo-locale */
1207     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1208 
1209     if (format->precision > INT_MAX) {
1210         PyErr_SetString(PyExc_ValueError, "precision too big");
1211         goto done;
1212     }
1213     precision = (int)format->precision;
1214 
1215     /* Zero padding is not allowed. */
1216     if (format->fill_char == '0') {
1217         PyErr_SetString(PyExc_ValueError,
1218                         "Zero padding is not allowed in complex format "
1219                         "specifier");
1220         goto done;
1221     }
1222 
1223     /* Neither is '=' alignment . */
1224     if (format->align == '=') {
1225         PyErr_SetString(PyExc_ValueError,
1226                         "'=' alignment flag is not allowed in complex format "
1227                         "specifier");
1228         goto done;
1229     }
1230 
1231     re = PyComplex_RealAsDouble(value);
1232     if (re == -1.0 && PyErr_Occurred())
1233         goto done;
1234     im = PyComplex_ImagAsDouble(value);
1235     if (im == -1.0 && PyErr_Occurred())
1236         goto done;
1237 
1238     if (format->alternate)
1239         flags |= Py_DTSF_ALT;
1240 
1241     if (type == '\0') {
1242         /* Omitted type specifier. Should be like str(self). */
1243         type = 'r';
1244         default_precision = 0;
1245         if (re == 0.0 && copysign(1.0, re) == 1.0)
1246             skip_re = 1;
1247         else
1248             add_parens = 1;
1249     }
1250 
1251     if (type == 'n')
1252         /* 'n' is the same as 'g', except for the locale used to
1253            format the result. We take care of that later. */
1254         type = 'g';
1255 
1256     if (precision < 0)
1257         precision = default_precision;
1258     else if (type == 'r')
1259         type = 'g';
1260 
1261     /* Cast "type", because if we're in unicode we need to pass an
1262        8-bit char. This is safe, because we've restricted what "type"
1263        can be. */
1264     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1265                                    &re_float_type);
1266     if (re_buf == NULL)
1267         goto done;
1268     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1269                                    &im_float_type);
1270     if (im_buf == NULL)
1271         goto done;
1272 
1273     n_re_digits = strlen(re_buf);
1274     n_im_digits = strlen(im_buf);
1275 
1276     /* Since there is no unicode version of PyOS_double_to_string,
1277        just use the 8 bit version and then convert to unicode. */
1278     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1279     if (re_unicode_tmp == NULL)
1280         goto done;
1281     i_re = 0;
1282 
1283     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1284     if (im_unicode_tmp == NULL)
1285         goto done;
1286     i_im = 0;
1287 
1288     /* Is a sign character present in the output?  If so, remember it
1289        and skip it */
1290     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1291         re_sign_char = '-';
1292         ++i_re;
1293         --n_re_digits;
1294     }
1295     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1296         im_sign_char = '-';
1297         ++i_im;
1298         --n_im_digits;
1299     }
1300 
1301     /* Determine if we have any "remainder" (after the digits, might include
1302        decimal or exponent or both (or neither)) */
1303     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1304                  &n_re_remainder, &re_has_decimal);
1305     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1306                  &n_im_remainder, &im_has_decimal);
1307 
1308     /* Determine the grouping, separator, and decimal point, if any. */
1309     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1310                         format->thousands_separators,
1311                         &locale) == -1)
1312         goto done;
1313 
1314     /* Turn off any padding. We'll do it later after we've composed
1315        the numbers without padding. */
1316     tmp_format.fill_char = '\0';
1317     tmp_format.align = '<';
1318     tmp_format.width = -1;
1319 
1320     /* Calculate how much memory we'll need. */
1321     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1322                                     i_re, i_re + n_re_digits, n_re_remainder,
1323                                     re_has_decimal, &locale, &tmp_format,
1324                                     &maxchar);
1325     if (n_re_total == -1) {
1326         goto done;
1327     }
1328 
1329     /* Same formatting, but always include a sign, unless the real part is
1330      * going to be omitted, in which case we use whatever sign convention was
1331      * requested by the original format. */
1332     if (!skip_re)
1333         tmp_format.sign = '+';
1334     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1335                                     i_im, i_im + n_im_digits, n_im_remainder,
1336                                     im_has_decimal, &locale, &tmp_format,
1337                                     &maxchar);
1338     if (n_im_total == -1) {
1339         goto done;
1340     }
1341 
1342     if (skip_re)
1343         n_re_total = 0;
1344 
1345     /* Add 1 for the 'j', and optionally 2 for parens. */
1346     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1347                  format->width, format->align, &lpad, &rpad, &total);
1348 
1349     if (lpad || rpad)
1350         maxchar = Py_MAX(maxchar, format->fill_char);
1351 
1352     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1353         goto done;
1354     rkind = writer->kind;
1355     rdata = writer->data;
1356 
1357     /* Populate the memory. First, the padding. */
1358     result = fill_padding(writer,
1359                           n_re_total + n_im_total + 1 + add_parens * 2,
1360                           format->fill_char, lpad, rpad);
1361     if (result == -1)
1362         goto done;
1363 
1364     if (add_parens) {
1365         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1366         writer->pos++;
1367     }
1368 
1369     if (!skip_re) {
1370         result = fill_number(writer, &re_spec,
1371                              re_unicode_tmp, i_re,
1372                              NULL, 0,
1373                              0,
1374                              &locale, 0);
1375         if (result == -1)
1376             goto done;
1377     }
1378     result = fill_number(writer, &im_spec,
1379                          im_unicode_tmp, i_im,
1380                          NULL, 0,
1381                          0,
1382                          &locale, 0);
1383     if (result == -1)
1384         goto done;
1385     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1386     writer->pos++;
1387 
1388     if (add_parens) {
1389         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1390         writer->pos++;
1391     }
1392 
1393     writer->pos += rpad;
1394 
1395 done:
1396     PyMem_Free(re_buf);
1397     PyMem_Free(im_buf);
1398     Py_XDECREF(re_unicode_tmp);
1399     Py_XDECREF(im_unicode_tmp);
1400     free_locale_info(&locale);
1401     return result;
1402 }
1403 
1404 /************************************************************************/
1405 /*********** built in formatters ****************************************/
1406 /************************************************************************/
1407 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1408 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1409 {
1410     PyObject *str;
1411     int err;
1412 
1413     str = PyObject_Str(obj);
1414     if (str == NULL)
1415         return -1;
1416     err = _PyUnicodeWriter_WriteStr(writer, str);
1417     Py_DECREF(str);
1418     return err;
1419 }
1420 
1421 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1422 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1423                                 PyObject *obj,
1424                                 PyObject *format_spec,
1425                                 Py_ssize_t start, Py_ssize_t end)
1426 {
1427     InternalFormatSpec format;
1428 
1429     assert(PyUnicode_Check(obj));
1430 
1431     /* check for the special case of zero length format spec, make
1432        it equivalent to str(obj) */
1433     if (start == end) {
1434         if (PyUnicode_CheckExact(obj))
1435             return _PyUnicodeWriter_WriteStr(writer, obj);
1436         else
1437             return format_obj(obj, writer);
1438     }
1439 
1440     /* parse the format_spec */
1441     if (!parse_internal_render_format_spec(format_spec, start, end,
1442                                            &format, 's', '<'))
1443         return -1;
1444 
1445     /* type conversion? */
1446     switch (format.type) {
1447     case 's':
1448         /* no type conversion needed, already a string.  do the formatting */
1449         return format_string_internal(obj, &format, writer);
1450     default:
1451         /* unknown */
1452         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1453         return -1;
1454     }
1455 }
1456 
1457 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1458 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1459                              PyObject *obj,
1460                              PyObject *format_spec,
1461                              Py_ssize_t start, Py_ssize_t end)
1462 {
1463     PyObject *tmp = NULL;
1464     InternalFormatSpec format;
1465     int result = -1;
1466 
1467     /* check for the special case of zero length format spec, make
1468        it equivalent to str(obj) */
1469     if (start == end) {
1470         if (PyLong_CheckExact(obj))
1471             return _PyLong_FormatWriter(writer, obj, 10, 0);
1472         else
1473             return format_obj(obj, writer);
1474     }
1475 
1476     /* parse the format_spec */
1477     if (!parse_internal_render_format_spec(format_spec, start, end,
1478                                            &format, 'd', '>'))
1479         goto done;
1480 
1481     /* type conversion? */
1482     switch (format.type) {
1483     case 'b':
1484     case 'c':
1485     case 'd':
1486     case 'o':
1487     case 'x':
1488     case 'X':
1489     case 'n':
1490         /* no type conversion needed, already an int.  do the formatting */
1491         result = format_long_internal(obj, &format, writer);
1492         break;
1493 
1494     case 'e':
1495     case 'E':
1496     case 'f':
1497     case 'F':
1498     case 'g':
1499     case 'G':
1500     case '%':
1501         /* convert to float */
1502         tmp = PyNumber_Float(obj);
1503         if (tmp == NULL)
1504             goto done;
1505         result = format_float_internal(tmp, &format, writer);
1506         break;
1507 
1508     default:
1509         /* unknown */
1510         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1511         goto done;
1512     }
1513 
1514 done:
1515     Py_XDECREF(tmp);
1516     return result;
1517 }
1518 
1519 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1520 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1521                               PyObject *obj,
1522                               PyObject *format_spec,
1523                               Py_ssize_t start, Py_ssize_t end)
1524 {
1525     InternalFormatSpec format;
1526 
1527     /* check for the special case of zero length format spec, make
1528        it equivalent to str(obj) */
1529     if (start == end)
1530         return format_obj(obj, writer);
1531 
1532     /* parse the format_spec */
1533     if (!parse_internal_render_format_spec(format_spec, start, end,
1534                                            &format, '\0', '>'))
1535         return -1;
1536 
1537     /* type conversion? */
1538     switch (format.type) {
1539     case '\0': /* No format code: like 'g', but with at least one decimal. */
1540     case 'e':
1541     case 'E':
1542     case 'f':
1543     case 'F':
1544     case 'g':
1545     case 'G':
1546     case 'n':
1547     case '%':
1548         /* no conversion, already a float.  do the formatting */
1549         return format_float_internal(obj, &format, writer);
1550 
1551     default:
1552         /* unknown */
1553         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1554         return -1;
1555     }
1556 }
1557 
1558 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1559 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1560                                 PyObject *obj,
1561                                 PyObject *format_spec,
1562                                 Py_ssize_t start, Py_ssize_t end)
1563 {
1564     InternalFormatSpec format;
1565 
1566     /* check for the special case of zero length format spec, make
1567        it equivalent to str(obj) */
1568     if (start == end)
1569         return format_obj(obj, writer);
1570 
1571     /* parse the format_spec */
1572     if (!parse_internal_render_format_spec(format_spec, start, end,
1573                                            &format, '\0', '>'))
1574         return -1;
1575 
1576     /* type conversion? */
1577     switch (format.type) {
1578     case '\0': /* No format code: like 'g', but with at least one decimal. */
1579     case 'e':
1580     case 'E':
1581     case 'f':
1582     case 'F':
1583     case 'g':
1584     case 'G':
1585     case 'n':
1586         /* no conversion, already a complex.  do the formatting */
1587         return format_complex_internal(obj, &format, writer);
1588 
1589     default:
1590         /* unknown */
1591         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1592         return -1;
1593     }
1594 }
1595