• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* implements the unicode (as opposed to string) version of the
2    built-in formatters for string, int, float.  that is, the versions
3    of int.__float__, etc., that take and return unicode objects */
4 
5 #include "Python.h"
6 #include "pycore_fileutils.h"
7 #include <locale.h>
8 
9 /* Raises an exception about an unknown presentation type for this
10  * type. */
11 
12 static void
unknown_presentation_type(Py_UCS4 presentation_type,const char * type_name)13 unknown_presentation_type(Py_UCS4 presentation_type,
14                           const char* type_name)
15 {
16     /* %c might be out-of-range, hence the two cases. */
17     if (presentation_type > 32 && presentation_type < 128)
18         PyErr_Format(PyExc_ValueError,
19                      "Unknown format code '%c' "
20                      "for object of type '%.200s'",
21                      (char)presentation_type,
22                      type_name);
23     else
24         PyErr_Format(PyExc_ValueError,
25                      "Unknown format code '\\x%x' "
26                      "for object of type '%.200s'",
27                      (unsigned int)presentation_type,
28                      type_name);
29 }
30 
31 static void
invalid_thousands_separator_type(char specifier,Py_UCS4 presentation_type)32 invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
33 {
34     assert(specifier == ',' || specifier == '_');
35     if (presentation_type > 32 && presentation_type < 128)
36         PyErr_Format(PyExc_ValueError,
37                      "Cannot specify '%c' with '%c'.",
38                      specifier, (char)presentation_type);
39     else
40         PyErr_Format(PyExc_ValueError,
41                      "Cannot specify '%c' with '\\x%x'.",
42                      specifier, (unsigned int)presentation_type);
43 }
44 
45 static void
invalid_comma_and_underscore(void)46 invalid_comma_and_underscore(void)
47 {
48     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
49 }
50 
51 /*
52     get_integer consumes 0 or more decimal digit characters from an
53     input string, updates *result with the corresponding positive
54     integer, and returns the number of digits consumed.
55 
56     returns -1 on error.
57 */
58 static int
get_integer(PyObject * str,Py_ssize_t * ppos,Py_ssize_t end,Py_ssize_t * result)59 get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
60                   Py_ssize_t *result)
61 {
62     Py_ssize_t accumulator, digitval, pos = *ppos;
63     int numdigits;
64     int kind = PyUnicode_KIND(str);
65     const void *data = PyUnicode_DATA(str);
66 
67     accumulator = numdigits = 0;
68     for (; pos < end; pos++, numdigits++) {
69         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
70         if (digitval < 0)
71             break;
72         /*
73            Detect possible overflow before it happens:
74 
75               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
76               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
77         */
78         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
79             PyErr_Format(PyExc_ValueError,
80                          "Too many decimal digits in format string");
81             *ppos = pos;
82             return -1;
83         }
84         accumulator = accumulator * 10 + digitval;
85     }
86     *ppos = pos;
87     *result = accumulator;
88     return numdigits;
89 }
90 
91 /************************************************************************/
92 /*********** standard format specifier parsing **************************/
93 /************************************************************************/
94 
95 /* returns true if this character is a specifier alignment token */
96 Py_LOCAL_INLINE(int)
is_alignment_token(Py_UCS4 c)97 is_alignment_token(Py_UCS4 c)
98 {
99     switch (c) {
100     case '<': case '>': case '=': case '^':
101         return 1;
102     default:
103         return 0;
104     }
105 }
106 
107 /* returns true if this character is a sign element */
108 Py_LOCAL_INLINE(int)
is_sign_element(Py_UCS4 c)109 is_sign_element(Py_UCS4 c)
110 {
111     switch (c) {
112     case ' ': case '+': case '-':
113         return 1;
114     default:
115         return 0;
116     }
117 }
118 
119 /* Locale type codes. LT_NO_LOCALE must be zero. */
120 enum LocaleType {
121     LT_NO_LOCALE = 0,
122     LT_DEFAULT_LOCALE = ',',
123     LT_UNDERSCORE_LOCALE = '_',
124     LT_UNDER_FOUR_LOCALE,
125     LT_CURRENT_LOCALE
126 };
127 
128 typedef struct {
129     Py_UCS4 fill_char;
130     Py_UCS4 align;
131     int alternate;
132     Py_UCS4 sign;
133     Py_ssize_t width;
134     enum LocaleType thousands_separators;
135     Py_ssize_t precision;
136     Py_UCS4 type;
137 } InternalFormatSpec;
138 
139 #if 0
140 /* Occasionally useful for debugging. Should normally be commented out. */
141 static void
142 DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
143 {
144     printf("internal format spec: fill_char %d\n", format->fill_char);
145     printf("internal format spec: align %d\n", format->align);
146     printf("internal format spec: alternate %d\n", format->alternate);
147     printf("internal format spec: sign %d\n", format->sign);
148     printf("internal format spec: width %zd\n", format->width);
149     printf("internal format spec: thousands_separators %d\n",
150            format->thousands_separators);
151     printf("internal format spec: precision %zd\n", format->precision);
152     printf("internal format spec: type %c\n", format->type);
153     printf("\n");
154 }
155 #endif
156 
157 
158 /*
159   ptr points to the start of the format_spec, end points just past its end.
160   fills in format with the parsed information.
161   returns 1 on success, 0 on failure.
162   if failure, sets the exception
163 */
164 static int
parse_internal_render_format_spec(PyObject * format_spec,Py_ssize_t start,Py_ssize_t end,InternalFormatSpec * format,char default_type,char default_align)165 parse_internal_render_format_spec(PyObject *format_spec,
166                                   Py_ssize_t start, Py_ssize_t end,
167                                   InternalFormatSpec *format,
168                                   char default_type,
169                                   char default_align)
170 {
171     Py_ssize_t pos = start;
172     int kind = PyUnicode_KIND(format_spec);
173     const void *data = PyUnicode_DATA(format_spec);
174     /* end-pos is used throughout this code to specify the length of
175        the input string */
176 #define READ_spec(index) PyUnicode_READ(kind, data, index)
177 
178     Py_ssize_t consumed;
179     int align_specified = 0;
180     int fill_char_specified = 0;
181 
182     format->fill_char = ' ';
183     format->align = default_align;
184     format->alternate = 0;
185     format->sign = '\0';
186     format->width = -1;
187     format->thousands_separators = LT_NO_LOCALE;
188     format->precision = -1;
189     format->type = default_type;
190 
191     /* If the second char is an alignment token,
192        then parse the fill char */
193     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
194         format->align = READ_spec(pos+1);
195         format->fill_char = READ_spec(pos);
196         fill_char_specified = 1;
197         align_specified = 1;
198         pos += 2;
199     }
200     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
201         format->align = READ_spec(pos);
202         align_specified = 1;
203         ++pos;
204     }
205 
206     /* Parse the various sign options */
207     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
208         format->sign = READ_spec(pos);
209         ++pos;
210     }
211 
212     /* If the next character is #, we're in alternate mode.  This only
213        applies to integers. */
214     if (end-pos >= 1 && READ_spec(pos) == '#') {
215         format->alternate = 1;
216         ++pos;
217     }
218 
219     /* The special case for 0-padding (backwards compat) */
220     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
221         format->fill_char = '0';
222         if (!align_specified && default_align == '>') {
223             format->align = '=';
224         }
225         ++pos;
226     }
227 
228     consumed = get_integer(format_spec, &pos, end, &format->width);
229     if (consumed == -1)
230         /* Overflow error. Exception already set. */
231         return 0;
232 
233     /* If consumed is 0, we didn't consume any characters for the
234        width. In that case, reset the width to -1, because
235        get_integer() will have set it to zero. -1 is how we record
236        that the width wasn't specified. */
237     if (consumed == 0)
238         format->width = -1;
239 
240     /* Comma signifies add thousands separators */
241     if (end-pos && READ_spec(pos) == ',') {
242         format->thousands_separators = LT_DEFAULT_LOCALE;
243         ++pos;
244     }
245     /* Underscore signifies add thousands separators */
246     if (end-pos && READ_spec(pos) == '_') {
247         if (format->thousands_separators != LT_NO_LOCALE) {
248             invalid_comma_and_underscore();
249             return 0;
250         }
251         format->thousands_separators = LT_UNDERSCORE_LOCALE;
252         ++pos;
253     }
254     if (end-pos && READ_spec(pos) == ',') {
255         if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
256             invalid_comma_and_underscore();
257             return 0;
258         }
259     }
260 
261     /* Parse field precision */
262     if (end-pos && READ_spec(pos) == '.') {
263         ++pos;
264 
265         consumed = get_integer(format_spec, &pos, end, &format->precision);
266         if (consumed == -1)
267             /* Overflow error. Exception already set. */
268             return 0;
269 
270         /* Not having a precision after a dot is an error. */
271         if (consumed == 0) {
272             PyErr_Format(PyExc_ValueError,
273                          "Format specifier missing precision");
274             return 0;
275         }
276 
277     }
278 
279     /* Finally, parse the type field. */
280 
281     if (end-pos > 1) {
282         /* More than one char remain, invalid format specifier. */
283         PyErr_Format(PyExc_ValueError, "Invalid format specifier");
284         return 0;
285     }
286 
287     if (end-pos == 1) {
288         format->type = READ_spec(pos);
289         ++pos;
290     }
291 
292     /* Do as much validating as we can, just by looking at the format
293        specifier.  Do not take into account what type of formatting
294        we're doing (int, float, string). */
295 
296     if (format->thousands_separators) {
297         switch (format->type) {
298         case 'd':
299         case 'e':
300         case 'f':
301         case 'g':
302         case 'E':
303         case 'G':
304         case '%':
305         case 'F':
306         case '\0':
307             /* These are allowed. See PEP 378.*/
308             break;
309         case 'b':
310         case 'o':
311         case 'x':
312         case 'X':
313             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
314             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
315                 /* Every four digits, not every three, in bin/oct/hex. */
316                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
317                 break;
318             }
319             /* fall through */
320         default:
321             invalid_thousands_separator_type(format->thousands_separators, format->type);
322             return 0;
323         }
324     }
325 
326     assert (format->align <= 127);
327     assert (format->sign <= 127);
328     return 1;
329 }
330 
331 /* Calculate the padding needed. */
332 static void
calc_padding(Py_ssize_t nchars,Py_ssize_t width,Py_UCS4 align,Py_ssize_t * n_lpadding,Py_ssize_t * n_rpadding,Py_ssize_t * n_total)333 calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
334              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
335              Py_ssize_t *n_total)
336 {
337     if (width >= 0) {
338         if (nchars > width)
339             *n_total = nchars;
340         else
341             *n_total = width;
342     }
343     else {
344         /* not specified, use all of the chars and no more */
345         *n_total = nchars;
346     }
347 
348     /* Figure out how much leading space we need, based on the
349        aligning */
350     if (align == '>')
351         *n_lpadding = *n_total - nchars;
352     else if (align == '^')
353         *n_lpadding = (*n_total - nchars) / 2;
354     else if (align == '<' || align == '=')
355         *n_lpadding = 0;
356     else {
357         /* We should never have an unspecified alignment. */
358         Py_UNREACHABLE();
359     }
360 
361     *n_rpadding = *n_total - nchars - *n_lpadding;
362 }
363 
364 /* Do the padding, and return a pointer to where the caller-supplied
365    content goes. */
366 static int
fill_padding(_PyUnicodeWriter * writer,Py_ssize_t nchars,Py_UCS4 fill_char,Py_ssize_t n_lpadding,Py_ssize_t n_rpadding)367 fill_padding(_PyUnicodeWriter *writer,
368              Py_ssize_t nchars,
369              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
370              Py_ssize_t n_rpadding)
371 {
372     Py_ssize_t pos;
373 
374     /* Pad on left. */
375     if (n_lpadding) {
376         pos = writer->pos;
377         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
378     }
379 
380     /* Pad on right. */
381     if (n_rpadding) {
382         pos = writer->pos + nchars + n_lpadding;
383         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
384     }
385 
386     /* Pointer to the user content. */
387     writer->pos += n_lpadding;
388     return 0;
389 }
390 
391 /************************************************************************/
392 /*********** common routines for numeric formatting *********************/
393 /************************************************************************/
394 
395 /* Locale info needed for formatting integers and the part of floats
396    before and including the decimal. Note that locales only support
397    8-bit chars, not unicode. */
398 typedef struct {
399     PyObject *decimal_point;
400     PyObject *thousands_sep;
401     const char *grouping;
402     char *grouping_buffer;
403 } LocaleInfo;
404 
405 #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
406 
407 /* describes the layout for an integer, see the comment in
408    calc_number_widths() for details */
409 typedef struct {
410     Py_ssize_t n_lpadding;
411     Py_ssize_t n_prefix;
412     Py_ssize_t n_spadding;
413     Py_ssize_t n_rpadding;
414     char sign;
415     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
416     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
417                                     any grouping chars. */
418     Py_ssize_t n_decimal;   /* 0 if only an integer */
419     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
420                                excluding the decimal itself, if
421                                present. */
422 
423     /* These 2 are not the widths of fields, but are needed by
424        STRINGLIB_GROUPING. */
425     Py_ssize_t n_digits;    /* The number of digits before a decimal
426                                or exponent. */
427     Py_ssize_t n_min_width; /* The min_width we used when we computed
428                                the n_grouped_digits width. */
429 } NumberFieldWidths;
430 
431 
432 /* Given a number of the form:
433    digits[remainder]
434    where ptr points to the start and end points to the end, find where
435     the integer part ends. This could be a decimal, an exponent, both,
436     or neither.
437    If a decimal point is present, set *has_decimal and increment
438     remainder beyond it.
439    Results are undefined (but shouldn't crash) for improperly
440     formatted strings.
441 */
442 static void
parse_number(PyObject * s,Py_ssize_t pos,Py_ssize_t end,Py_ssize_t * n_remainder,int * has_decimal)443 parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
444              Py_ssize_t *n_remainder, int *has_decimal)
445 {
446     Py_ssize_t remainder;
447     int kind = PyUnicode_KIND(s);
448     const void *data = PyUnicode_DATA(s);
449 
450     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
451         ++pos;
452     remainder = pos;
453 
454     /* Does remainder start with a decimal point? */
455     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
456 
457     /* Skip the decimal point. */
458     if (*has_decimal)
459         remainder++;
460 
461     *n_remainder = end - remainder;
462 }
463 
464 /* not all fields of format are used.  for example, precision is
465    unused.  should this take discrete params in order to be more clear
466    about what it does?  or is passing a single format parameter easier
467    and more efficient enough to justify a little obfuscation?
468    Return -1 on error. */
469 static Py_ssize_t
calc_number_widths(NumberFieldWidths * spec,Py_ssize_t n_prefix,Py_UCS4 sign_char,Py_ssize_t n_start,Py_ssize_t n_end,Py_ssize_t n_remainder,int has_decimal,const LocaleInfo * locale,const InternalFormatSpec * format,Py_UCS4 * maxchar)470 calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
471                    Py_UCS4 sign_char, Py_ssize_t n_start,
472                    Py_ssize_t n_end, Py_ssize_t n_remainder,
473                    int has_decimal, const LocaleInfo *locale,
474                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
475 {
476     Py_ssize_t n_non_digit_non_padding;
477     Py_ssize_t n_padding;
478 
479     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
480     spec->n_lpadding = 0;
481     spec->n_prefix = n_prefix;
482     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
483     spec->n_remainder = n_remainder;
484     spec->n_spadding = 0;
485     spec->n_rpadding = 0;
486     spec->sign = '\0';
487     spec->n_sign = 0;
488 
489     /* the output will look like:
490        |                                                                                         |
491        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
492        |                                                                                         |
493 
494        sign is computed from format->sign and the actual
495        sign of the number
496 
497        prefix is given (it's for the '0x' prefix)
498 
499        digits is already known
500 
501        the total width is either given, or computed from the
502        actual digits
503 
504        only one of lpadding, spadding, and rpadding can be non-zero,
505        and it's calculated from the width and other fields
506     */
507 
508     /* compute the various parts we're going to write */
509     switch (format->sign) {
510     case '+':
511         /* always put a + or - */
512         spec->n_sign = 1;
513         spec->sign = (sign_char == '-' ? '-' : '+');
514         break;
515     case ' ':
516         spec->n_sign = 1;
517         spec->sign = (sign_char == '-' ? '-' : ' ');
518         break;
519     default:
520         /* Not specified, or the default (-) */
521         if (sign_char == '-') {
522             spec->n_sign = 1;
523             spec->sign = '-';
524         }
525     }
526 
527     /* The number of chars used for non-digits and non-padding. */
528     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
529         spec->n_remainder;
530 
531     /* min_width can go negative, that's okay. format->width == -1 means
532        we don't care. */
533     if (format->fill_char == '0' && format->align == '=')
534         spec->n_min_width = format->width - n_non_digit_non_padding;
535     else
536         spec->n_min_width = 0;
537 
538     if (spec->n_digits == 0)
539         /* This case only occurs when using 'c' formatting, we need
540            to special case it because the grouping code always wants
541            to have at least one character. */
542         spec->n_grouped_digits = 0;
543     else {
544         Py_UCS4 grouping_maxchar;
545         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
546             NULL, 0,
547             NULL, 0, spec->n_digits,
548             spec->n_min_width,
549             locale->grouping, locale->thousands_sep, &grouping_maxchar);
550         if (spec->n_grouped_digits == -1) {
551             return -1;
552         }
553         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
554     }
555 
556     /* Given the desired width and the total of digit and non-digit
557        space we consume, see if we need any padding. format->width can
558        be negative (meaning no padding), but this code still works in
559        that case. */
560     n_padding = format->width -
561                         (n_non_digit_non_padding + spec->n_grouped_digits);
562     if (n_padding > 0) {
563         /* Some padding is needed. Determine if it's left, space, or right. */
564         switch (format->align) {
565         case '<':
566             spec->n_rpadding = n_padding;
567             break;
568         case '^':
569             spec->n_lpadding = n_padding / 2;
570             spec->n_rpadding = n_padding - spec->n_lpadding;
571             break;
572         case '=':
573             spec->n_spadding = n_padding;
574             break;
575         case '>':
576             spec->n_lpadding = n_padding;
577             break;
578         default:
579             /* Shouldn't get here */
580             Py_UNREACHABLE();
581         }
582     }
583 
584     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
585         *maxchar = Py_MAX(*maxchar, format->fill_char);
586 
587     if (spec->n_decimal)
588         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
589 
590     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
591         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
592         spec->n_remainder + spec->n_rpadding;
593 }
594 
595 /* Fill in the digit parts of a number's string representation,
596    as determined in calc_number_widths().
597    Return -1 on error, or 0 on success. */
598 static int
fill_number(_PyUnicodeWriter * writer,const NumberFieldWidths * spec,PyObject * digits,Py_ssize_t d_start,PyObject * prefix,Py_ssize_t p_start,Py_UCS4 fill_char,LocaleInfo * locale,int toupper)599 fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
600             PyObject *digits, Py_ssize_t d_start,
601             PyObject *prefix, Py_ssize_t p_start,
602             Py_UCS4 fill_char,
603             LocaleInfo *locale, int toupper)
604 {
605     /* Used to keep track of digits, decimal, and remainder. */
606     Py_ssize_t d_pos = d_start;
607     const unsigned int kind = writer->kind;
608     const void *data = writer->data;
609     Py_ssize_t r;
610 
611     if (spec->n_lpadding) {
612         _PyUnicode_FastFill(writer->buffer,
613                             writer->pos, spec->n_lpadding, fill_char);
614         writer->pos += spec->n_lpadding;
615     }
616     if (spec->n_sign == 1) {
617         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
618         writer->pos++;
619     }
620     if (spec->n_prefix) {
621         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
622                                       prefix, p_start,
623                                       spec->n_prefix);
624         if (toupper) {
625             Py_ssize_t t;
626             for (t = 0; t < spec->n_prefix; t++) {
627                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
628                 c = Py_TOUPPER(c);
629                 assert (c <= 127);
630                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
631             }
632         }
633         writer->pos += spec->n_prefix;
634     }
635     if (spec->n_spadding) {
636         _PyUnicode_FastFill(writer->buffer,
637                             writer->pos, spec->n_spadding, fill_char);
638         writer->pos += spec->n_spadding;
639     }
640 
641     /* Only for type 'c' special case, it has no digits. */
642     if (spec->n_digits != 0) {
643         /* Fill the digits with InsertThousandsGrouping. */
644         r = _PyUnicode_InsertThousandsGrouping(
645                 writer, spec->n_grouped_digits,
646                 digits, d_pos, spec->n_digits,
647                 spec->n_min_width,
648                 locale->grouping, locale->thousands_sep, NULL);
649         if (r == -1)
650             return -1;
651         assert(r == spec->n_grouped_digits);
652         d_pos += spec->n_digits;
653     }
654     if (toupper) {
655         Py_ssize_t t;
656         for (t = 0; t < spec->n_grouped_digits; t++) {
657             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
658             c = Py_TOUPPER(c);
659             if (c > 127) {
660                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
661                 return -1;
662             }
663             PyUnicode_WRITE(kind, data, writer->pos + t, c);
664         }
665     }
666     writer->pos += spec->n_grouped_digits;
667 
668     if (spec->n_decimal) {
669         _PyUnicode_FastCopyCharacters(
670             writer->buffer, writer->pos,
671             locale->decimal_point, 0, spec->n_decimal);
672         writer->pos += spec->n_decimal;
673         d_pos += 1;
674     }
675 
676     if (spec->n_remainder) {
677         _PyUnicode_FastCopyCharacters(
678             writer->buffer, writer->pos,
679             digits, d_pos, spec->n_remainder);
680         writer->pos += spec->n_remainder;
681         /* d_pos += spec->n_remainder; */
682     }
683 
684     if (spec->n_rpadding) {
685         _PyUnicode_FastFill(writer->buffer,
686                             writer->pos, spec->n_rpadding,
687                             fill_char);
688         writer->pos += spec->n_rpadding;
689     }
690     return 0;
691 }
692 
693 static const char no_grouping[1] = {CHAR_MAX};
694 
695 /* Find the decimal point character(s?), thousands_separator(s?), and
696    grouping description, either for the current locale if type is
697    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
698    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
699 static int
get_locale_info(enum LocaleType type,LocaleInfo * locale_info)700 get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
701 {
702     switch (type) {
703     case LT_CURRENT_LOCALE: {
704         struct lconv *lc = localeconv();
705         if (_Py_GetLocaleconvNumeric(lc,
706                                      &locale_info->decimal_point,
707                                      &locale_info->thousands_sep) < 0) {
708             return -1;
709         }
710 
711         /* localeconv() grouping can become a dangling pointer or point
712            to a different string if another thread calls localeconv() during
713            the string formatting. Copy the string to avoid this risk. */
714         locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
715         if (locale_info->grouping_buffer == NULL) {
716             PyErr_NoMemory();
717             return -1;
718         }
719         locale_info->grouping = locale_info->grouping_buffer;
720         break;
721     }
722     case LT_DEFAULT_LOCALE:
723     case LT_UNDERSCORE_LOCALE:
724     case LT_UNDER_FOUR_LOCALE:
725         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
726         locale_info->thousands_sep = PyUnicode_FromOrdinal(
727             type == LT_DEFAULT_LOCALE ? ',' : '_');
728         if (!locale_info->decimal_point || !locale_info->thousands_sep)
729             return -1;
730         if (type != LT_UNDER_FOUR_LOCALE)
731             locale_info->grouping = "\3"; /* Group every 3 characters.  The
732                                          (implicit) trailing 0 means repeat
733                                          infinitely. */
734         else
735             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
736         break;
737     case LT_NO_LOCALE:
738         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
739         locale_info->thousands_sep = PyUnicode_New(0, 0);
740         if (!locale_info->decimal_point || !locale_info->thousands_sep)
741             return -1;
742         locale_info->grouping = no_grouping;
743         break;
744     }
745     return 0;
746 }
747 
748 static void
free_locale_info(LocaleInfo * locale_info)749 free_locale_info(LocaleInfo *locale_info)
750 {
751     Py_XDECREF(locale_info->decimal_point);
752     Py_XDECREF(locale_info->thousands_sep);
753     PyMem_Free(locale_info->grouping_buffer);
754 }
755 
756 /************************************************************************/
757 /*********** string formatting ******************************************/
758 /************************************************************************/
759 
760 static int
format_string_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)761 format_string_internal(PyObject *value, const InternalFormatSpec *format,
762                        _PyUnicodeWriter *writer)
763 {
764     Py_ssize_t lpad;
765     Py_ssize_t rpad;
766     Py_ssize_t total;
767     Py_ssize_t len;
768     int result = -1;
769     Py_UCS4 maxchar;
770 
771     assert(PyUnicode_IS_READY(value));
772     len = PyUnicode_GET_LENGTH(value);
773 
774     /* sign is not allowed on strings */
775     if (format->sign != '\0') {
776         if (format->sign == ' ') {
777             PyErr_SetString(PyExc_ValueError,
778                 "Space not allowed in string format specifier");
779         }
780         else {
781             PyErr_SetString(PyExc_ValueError,
782                 "Sign not allowed in string format specifier");
783         }
784         goto done;
785     }
786 
787     /* alternate is not allowed on strings */
788     if (format->alternate) {
789         PyErr_SetString(PyExc_ValueError,
790                         "Alternate form (#) not allowed in string format "
791                         "specifier");
792         goto done;
793     }
794 
795     /* '=' alignment not allowed on strings */
796     if (format->align == '=') {
797         PyErr_SetString(PyExc_ValueError,
798                         "'=' alignment not allowed "
799                         "in string format specifier");
800         goto done;
801     }
802 
803     if ((format->width == -1 || format->width <= len)
804         && (format->precision == -1 || format->precision >= len)) {
805         /* Fast path */
806         return _PyUnicodeWriter_WriteStr(writer, value);
807     }
808 
809     /* if precision is specified, output no more that format.precision
810        characters */
811     if (format->precision >= 0 && len >= format->precision) {
812         len = format->precision;
813     }
814 
815     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
816 
817     maxchar = writer->maxchar;
818     if (lpad != 0 || rpad != 0)
819         maxchar = Py_MAX(maxchar, format->fill_char);
820     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
821         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
822         maxchar = Py_MAX(maxchar, valmaxchar);
823     }
824 
825     /* allocate the resulting string */
826     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
827         goto done;
828 
829     /* Write into that space. First the padding. */
830     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
831     if (result == -1)
832         goto done;
833 
834     /* Then the source string. */
835     if (len) {
836         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
837                                       value, 0, len);
838     }
839     writer->pos += (len + rpad);
840     result = 0;
841 
842 done:
843     return result;
844 }
845 
846 
847 /************************************************************************/
848 /*********** long formatting ********************************************/
849 /************************************************************************/
850 
851 static int
format_long_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)852 format_long_internal(PyObject *value, const InternalFormatSpec *format,
853                      _PyUnicodeWriter *writer)
854 {
855     int result = -1;
856     Py_UCS4 maxchar = 127;
857     PyObject *tmp = NULL;
858     Py_ssize_t inumeric_chars;
859     Py_UCS4 sign_char = '\0';
860     Py_ssize_t n_digits;       /* count of digits need from the computed
861                                   string */
862     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
863                                    produces non-digits */
864     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
865     Py_ssize_t n_total;
866     Py_ssize_t prefix = 0;
867     NumberFieldWidths spec;
868     long x;
869 
870     /* Locale settings, either from the actual locale or
871        from a hard-code pseudo-locale */
872     LocaleInfo locale = LocaleInfo_STATIC_INIT;
873 
874     /* no precision allowed on integers */
875     if (format->precision != -1) {
876         PyErr_SetString(PyExc_ValueError,
877                         "Precision not allowed in integer format specifier");
878         goto done;
879     }
880 
881     /* special case for character formatting */
882     if (format->type == 'c') {
883         /* error to specify a sign */
884         if (format->sign != '\0') {
885             PyErr_SetString(PyExc_ValueError,
886                             "Sign not allowed with integer"
887                             " format specifier 'c'");
888             goto done;
889         }
890         /* error to request alternate format */
891         if (format->alternate) {
892             PyErr_SetString(PyExc_ValueError,
893                             "Alternate form (#) not allowed with integer"
894                             " format specifier 'c'");
895             goto done;
896         }
897 
898         /* taken from unicodeobject.c formatchar() */
899         /* Integer input truncated to a character */
900         x = PyLong_AsLong(value);
901         if (x == -1 && PyErr_Occurred())
902             goto done;
903         if (x < 0 || x > 0x10ffff) {
904             PyErr_SetString(PyExc_OverflowError,
905                             "%c arg not in range(0x110000)");
906             goto done;
907         }
908         tmp = PyUnicode_FromOrdinal(x);
909         inumeric_chars = 0;
910         n_digits = 1;
911         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
912 
913         /* As a sort-of hack, we tell calc_number_widths that we only
914            have "remainder" characters. calc_number_widths thinks
915            these are characters that don't get formatted, only copied
916            into the output string. We do this for 'c' formatting,
917            because the characters are likely to be non-digits. */
918         n_remainder = 1;
919     }
920     else {
921         int base;
922         int leading_chars_to_skip = 0;  /* Number of characters added by
923                                            PyNumber_ToBase that we want to
924                                            skip over. */
925 
926         /* Compute the base and how many characters will be added by
927            PyNumber_ToBase */
928         switch (format->type) {
929         case 'b':
930             base = 2;
931             leading_chars_to_skip = 2; /* 0b */
932             break;
933         case 'o':
934             base = 8;
935             leading_chars_to_skip = 2; /* 0o */
936             break;
937         case 'x':
938         case 'X':
939             base = 16;
940             leading_chars_to_skip = 2; /* 0x */
941             break;
942         default:  /* shouldn't be needed, but stops a compiler warning */
943         case 'd':
944         case 'n':
945             base = 10;
946             break;
947         }
948 
949         if (format->sign != '+' && format->sign != ' '
950             && format->width == -1
951             && format->type != 'X' && format->type != 'n'
952             && !format->thousands_separators
953             && PyLong_CheckExact(value))
954         {
955             /* Fast path */
956             return _PyLong_FormatWriter(writer, value, base, format->alternate);
957         }
958 
959         /* The number of prefix chars is the same as the leading
960            chars to skip */
961         if (format->alternate)
962             n_prefix = leading_chars_to_skip;
963 
964         /* Do the hard part, converting to a string in a given base */
965         tmp = _PyLong_Format(value, base);
966         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
967             goto done;
968 
969         inumeric_chars = 0;
970         n_digits = PyUnicode_GET_LENGTH(tmp);
971 
972         prefix = inumeric_chars;
973 
974         /* Is a sign character present in the output?  If so, remember it
975            and skip it */
976         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
977             sign_char = '-';
978             ++prefix;
979             ++leading_chars_to_skip;
980         }
981 
982         /* Skip over the leading chars (0x, 0b, etc.) */
983         n_digits -= leading_chars_to_skip;
984         inumeric_chars += leading_chars_to_skip;
985     }
986 
987     /* Determine the grouping, separator, and decimal point, if any. */
988     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
989                         format->thousands_separators,
990                         &locale) == -1)
991         goto done;
992 
993     /* Calculate how much memory we'll need. */
994     n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
995                                  inumeric_chars + n_digits, n_remainder, 0,
996                                  &locale, format, &maxchar);
997     if (n_total == -1) {
998         goto done;
999     }
1000 
1001     /* Allocate the memory. */
1002     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1003         goto done;
1004 
1005     /* Populate the memory. */
1006     result = fill_number(writer, &spec,
1007                          tmp, inumeric_chars,
1008                          tmp, prefix, format->fill_char,
1009                          &locale, format->type == 'X');
1010 
1011 done:
1012     Py_XDECREF(tmp);
1013     free_locale_info(&locale);
1014     return result;
1015 }
1016 
1017 /************************************************************************/
1018 /*********** float formatting *******************************************/
1019 /************************************************************************/
1020 
1021 /* much of this is taken from unicodeobject.c */
1022 static int
format_float_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1023 format_float_internal(PyObject *value,
1024                       const InternalFormatSpec *format,
1025                       _PyUnicodeWriter *writer)
1026 {
1027     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
1028     Py_ssize_t n_digits;
1029     Py_ssize_t n_remainder;
1030     Py_ssize_t n_total;
1031     int has_decimal;
1032     double val;
1033     int precision, default_precision = 6;
1034     Py_UCS4 type = format->type;
1035     int add_pct = 0;
1036     Py_ssize_t index;
1037     NumberFieldWidths spec;
1038     int flags = 0;
1039     int result = -1;
1040     Py_UCS4 maxchar = 127;
1041     Py_UCS4 sign_char = '\0';
1042     int float_type; /* Used to see if we have a nan, inf, or regular float. */
1043     PyObject *unicode_tmp = NULL;
1044 
1045     /* Locale settings, either from the actual locale or
1046        from a hard-code pseudo-locale */
1047     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1048 
1049     if (format->precision > INT_MAX) {
1050         PyErr_SetString(PyExc_ValueError, "precision too big");
1051         goto done;
1052     }
1053     precision = (int)format->precision;
1054 
1055     if (format->alternate)
1056         flags |= Py_DTSF_ALT;
1057 
1058     if (type == '\0') {
1059         /* Omitted type specifier.  Behaves in the same way as repr(x)
1060            and str(x) if no precision is given, else like 'g', but with
1061            at least one digit after the decimal point. */
1062         flags |= Py_DTSF_ADD_DOT_0;
1063         type = 'r';
1064         default_precision = 0;
1065     }
1066 
1067     if (type == 'n')
1068         /* 'n' is the same as 'g', except for the locale used to
1069            format the result. We take care of that later. */
1070         type = 'g';
1071 
1072     val = PyFloat_AsDouble(value);
1073     if (val == -1.0 && PyErr_Occurred())
1074         goto done;
1075 
1076     if (type == '%') {
1077         type = 'f';
1078         val *= 100;
1079         add_pct = 1;
1080     }
1081 
1082     if (precision < 0)
1083         precision = default_precision;
1084     else if (type == 'r')
1085         type = 'g';
1086 
1087     /* Cast "type", because if we're in unicode we need to pass an
1088        8-bit char. This is safe, because we've restricted what "type"
1089        can be. */
1090     buf = PyOS_double_to_string(val, (char)type, precision, flags,
1091                                 &float_type);
1092     if (buf == NULL)
1093         goto done;
1094     n_digits = strlen(buf);
1095 
1096     if (add_pct) {
1097         /* We know that buf has a trailing zero (since we just called
1098            strlen() on it), and we don't use that fact any more. So we
1099            can just write over the trailing zero. */
1100         buf[n_digits] = '%';
1101         n_digits += 1;
1102     }
1103 
1104     if (format->sign != '+' && format->sign != ' '
1105         && format->width == -1
1106         && format->type != 'n'
1107         && !format->thousands_separators)
1108     {
1109         /* Fast path */
1110         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1111         PyMem_Free(buf);
1112         return result;
1113     }
1114 
1115     /* Since there is no unicode version of PyOS_double_to_string,
1116        just use the 8 bit version and then convert to unicode. */
1117     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1118     PyMem_Free(buf);
1119     if (unicode_tmp == NULL)
1120         goto done;
1121 
1122     /* Is a sign character present in the output?  If so, remember it
1123        and skip it */
1124     index = 0;
1125     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1126         sign_char = '-';
1127         ++index;
1128         --n_digits;
1129     }
1130 
1131     /* Determine if we have any "remainder" (after the digits, might include
1132        decimal or exponent or both (or neither)) */
1133     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1134 
1135     /* Determine the grouping, separator, and decimal point, if any. */
1136     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1137                         format->thousands_separators,
1138                         &locale) == -1)
1139         goto done;
1140 
1141     /* Calculate how much memory we'll need. */
1142     n_total = calc_number_widths(&spec, 0, sign_char, index,
1143                                  index + n_digits, n_remainder, has_decimal,
1144                                  &locale, format, &maxchar);
1145     if (n_total == -1) {
1146         goto done;
1147     }
1148 
1149     /* Allocate the memory. */
1150     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1151         goto done;
1152 
1153     /* Populate the memory. */
1154     result = fill_number(writer, &spec,
1155                          unicode_tmp, index,
1156                          NULL, 0, format->fill_char,
1157                          &locale, 0);
1158 
1159 done:
1160     Py_XDECREF(unicode_tmp);
1161     free_locale_info(&locale);
1162     return result;
1163 }
1164 
1165 /************************************************************************/
1166 /*********** complex formatting *****************************************/
1167 /************************************************************************/
1168 
1169 static int
format_complex_internal(PyObject * value,const InternalFormatSpec * format,_PyUnicodeWriter * writer)1170 format_complex_internal(PyObject *value,
1171                         const InternalFormatSpec *format,
1172                         _PyUnicodeWriter *writer)
1173 {
1174     double re;
1175     double im;
1176     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1177     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
1178 
1179     InternalFormatSpec tmp_format = *format;
1180     Py_ssize_t n_re_digits;
1181     Py_ssize_t n_im_digits;
1182     Py_ssize_t n_re_remainder;
1183     Py_ssize_t n_im_remainder;
1184     Py_ssize_t n_re_total;
1185     Py_ssize_t n_im_total;
1186     int re_has_decimal;
1187     int im_has_decimal;
1188     int precision, default_precision = 6;
1189     Py_UCS4 type = format->type;
1190     Py_ssize_t i_re;
1191     Py_ssize_t i_im;
1192     NumberFieldWidths re_spec;
1193     NumberFieldWidths im_spec;
1194     int flags = 0;
1195     int result = -1;
1196     Py_UCS4 maxchar = 127;
1197     enum PyUnicode_Kind rkind;
1198     void *rdata;
1199     Py_UCS4 re_sign_char = '\0';
1200     Py_UCS4 im_sign_char = '\0';
1201     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1202     int im_float_type;
1203     int add_parens = 0;
1204     int skip_re = 0;
1205     Py_ssize_t lpad;
1206     Py_ssize_t rpad;
1207     Py_ssize_t total;
1208     PyObject *re_unicode_tmp = NULL;
1209     PyObject *im_unicode_tmp = NULL;
1210 
1211     /* Locale settings, either from the actual locale or
1212        from a hard-code pseudo-locale */
1213     LocaleInfo locale = LocaleInfo_STATIC_INIT;
1214 
1215     if (format->precision > INT_MAX) {
1216         PyErr_SetString(PyExc_ValueError, "precision too big");
1217         goto done;
1218     }
1219     precision = (int)format->precision;
1220 
1221     /* Zero padding is not allowed. */
1222     if (format->fill_char == '0') {
1223         PyErr_SetString(PyExc_ValueError,
1224                         "Zero padding is not allowed in complex format "
1225                         "specifier");
1226         goto done;
1227     }
1228 
1229     /* Neither is '=' alignment . */
1230     if (format->align == '=') {
1231         PyErr_SetString(PyExc_ValueError,
1232                         "'=' alignment flag is not allowed in complex format "
1233                         "specifier");
1234         goto done;
1235     }
1236 
1237     re = PyComplex_RealAsDouble(value);
1238     if (re == -1.0 && PyErr_Occurred())
1239         goto done;
1240     im = PyComplex_ImagAsDouble(value);
1241     if (im == -1.0 && PyErr_Occurred())
1242         goto done;
1243 
1244     if (format->alternate)
1245         flags |= Py_DTSF_ALT;
1246 
1247     if (type == '\0') {
1248         /* Omitted type specifier. Should be like str(self). */
1249         type = 'r';
1250         default_precision = 0;
1251         if (re == 0.0 && copysign(1.0, re) == 1.0)
1252             skip_re = 1;
1253         else
1254             add_parens = 1;
1255     }
1256 
1257     if (type == 'n')
1258         /* 'n' is the same as 'g', except for the locale used to
1259            format the result. We take care of that later. */
1260         type = 'g';
1261 
1262     if (precision < 0)
1263         precision = default_precision;
1264     else if (type == 'r')
1265         type = 'g';
1266 
1267     /* Cast "type", because if we're in unicode we need to pass an
1268        8-bit char. This is safe, because we've restricted what "type"
1269        can be. */
1270     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1271                                    &re_float_type);
1272     if (re_buf == NULL)
1273         goto done;
1274     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1275                                    &im_float_type);
1276     if (im_buf == NULL)
1277         goto done;
1278 
1279     n_re_digits = strlen(re_buf);
1280     n_im_digits = strlen(im_buf);
1281 
1282     /* Since there is no unicode version of PyOS_double_to_string,
1283        just use the 8 bit version and then convert to unicode. */
1284     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1285     if (re_unicode_tmp == NULL)
1286         goto done;
1287     i_re = 0;
1288 
1289     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1290     if (im_unicode_tmp == NULL)
1291         goto done;
1292     i_im = 0;
1293 
1294     /* Is a sign character present in the output?  If so, remember it
1295        and skip it */
1296     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1297         re_sign_char = '-';
1298         ++i_re;
1299         --n_re_digits;
1300     }
1301     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1302         im_sign_char = '-';
1303         ++i_im;
1304         --n_im_digits;
1305     }
1306 
1307     /* Determine if we have any "remainder" (after the digits, might include
1308        decimal or exponent or both (or neither)) */
1309     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1310                  &n_re_remainder, &re_has_decimal);
1311     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1312                  &n_im_remainder, &im_has_decimal);
1313 
1314     /* Determine the grouping, separator, and decimal point, if any. */
1315     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1316                         format->thousands_separators,
1317                         &locale) == -1)
1318         goto done;
1319 
1320     /* Turn off any padding. We'll do it later after we've composed
1321        the numbers without padding. */
1322     tmp_format.fill_char = '\0';
1323     tmp_format.align = '<';
1324     tmp_format.width = -1;
1325 
1326     /* Calculate how much memory we'll need. */
1327     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1328                                     i_re, i_re + n_re_digits, n_re_remainder,
1329                                     re_has_decimal, &locale, &tmp_format,
1330                                     &maxchar);
1331     if (n_re_total == -1) {
1332         goto done;
1333     }
1334 
1335     /* Same formatting, but always include a sign, unless the real part is
1336      * going to be omitted, in which case we use whatever sign convention was
1337      * requested by the original format. */
1338     if (!skip_re)
1339         tmp_format.sign = '+';
1340     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1341                                     i_im, i_im + n_im_digits, n_im_remainder,
1342                                     im_has_decimal, &locale, &tmp_format,
1343                                     &maxchar);
1344     if (n_im_total == -1) {
1345         goto done;
1346     }
1347 
1348     if (skip_re)
1349         n_re_total = 0;
1350 
1351     /* Add 1 for the 'j', and optionally 2 for parens. */
1352     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1353                  format->width, format->align, &lpad, &rpad, &total);
1354 
1355     if (lpad || rpad)
1356         maxchar = Py_MAX(maxchar, format->fill_char);
1357 
1358     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1359         goto done;
1360     rkind = writer->kind;
1361     rdata = writer->data;
1362 
1363     /* Populate the memory. First, the padding. */
1364     result = fill_padding(writer,
1365                           n_re_total + n_im_total + 1 + add_parens * 2,
1366                           format->fill_char, lpad, rpad);
1367     if (result == -1)
1368         goto done;
1369 
1370     if (add_parens) {
1371         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1372         writer->pos++;
1373     }
1374 
1375     if (!skip_re) {
1376         result = fill_number(writer, &re_spec,
1377                              re_unicode_tmp, i_re,
1378                              NULL, 0,
1379                              0,
1380                              &locale, 0);
1381         if (result == -1)
1382             goto done;
1383     }
1384     result = fill_number(writer, &im_spec,
1385                          im_unicode_tmp, i_im,
1386                          NULL, 0,
1387                          0,
1388                          &locale, 0);
1389     if (result == -1)
1390         goto done;
1391     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1392     writer->pos++;
1393 
1394     if (add_parens) {
1395         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1396         writer->pos++;
1397     }
1398 
1399     writer->pos += rpad;
1400 
1401 done:
1402     PyMem_Free(re_buf);
1403     PyMem_Free(im_buf);
1404     Py_XDECREF(re_unicode_tmp);
1405     Py_XDECREF(im_unicode_tmp);
1406     free_locale_info(&locale);
1407     return result;
1408 }
1409 
1410 /************************************************************************/
1411 /*********** built in formatters ****************************************/
1412 /************************************************************************/
1413 static int
format_obj(PyObject * obj,_PyUnicodeWriter * writer)1414 format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1415 {
1416     PyObject *str;
1417     int err;
1418 
1419     str = PyObject_Str(obj);
1420     if (str == NULL)
1421         return -1;
1422     err = _PyUnicodeWriter_WriteStr(writer, str);
1423     Py_DECREF(str);
1424     return err;
1425 }
1426 
1427 int
_PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1428 _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1429                                 PyObject *obj,
1430                                 PyObject *format_spec,
1431                                 Py_ssize_t start, Py_ssize_t end)
1432 {
1433     InternalFormatSpec format;
1434 
1435     assert(PyUnicode_Check(obj));
1436 
1437     /* check for the special case of zero length format spec, make
1438        it equivalent to str(obj) */
1439     if (start == end) {
1440         if (PyUnicode_CheckExact(obj))
1441             return _PyUnicodeWriter_WriteStr(writer, obj);
1442         else
1443             return format_obj(obj, writer);
1444     }
1445 
1446     /* parse the format_spec */
1447     if (!parse_internal_render_format_spec(format_spec, start, end,
1448                                            &format, 's', '<'))
1449         return -1;
1450 
1451     /* type conversion? */
1452     switch (format.type) {
1453     case 's':
1454         /* no type conversion needed, already a string.  do the formatting */
1455         return format_string_internal(obj, &format, writer);
1456     default:
1457         /* unknown */
1458         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1459         return -1;
1460     }
1461 }
1462 
1463 int
_PyLong_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1464 _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1465                              PyObject *obj,
1466                              PyObject *format_spec,
1467                              Py_ssize_t start, Py_ssize_t end)
1468 {
1469     PyObject *tmp = NULL;
1470     InternalFormatSpec format;
1471     int result = -1;
1472 
1473     /* check for the special case of zero length format spec, make
1474        it equivalent to str(obj) */
1475     if (start == end) {
1476         if (PyLong_CheckExact(obj))
1477             return _PyLong_FormatWriter(writer, obj, 10, 0);
1478         else
1479             return format_obj(obj, writer);
1480     }
1481 
1482     /* parse the format_spec */
1483     if (!parse_internal_render_format_spec(format_spec, start, end,
1484                                            &format, 'd', '>'))
1485         goto done;
1486 
1487     /* type conversion? */
1488     switch (format.type) {
1489     case 'b':
1490     case 'c':
1491     case 'd':
1492     case 'o':
1493     case 'x':
1494     case 'X':
1495     case 'n':
1496         /* no type conversion needed, already an int.  do the formatting */
1497         result = format_long_internal(obj, &format, writer);
1498         break;
1499 
1500     case 'e':
1501     case 'E':
1502     case 'f':
1503     case 'F':
1504     case 'g':
1505     case 'G':
1506     case '%':
1507         /* convert to float */
1508         tmp = PyNumber_Float(obj);
1509         if (tmp == NULL)
1510             goto done;
1511         result = format_float_internal(tmp, &format, writer);
1512         break;
1513 
1514     default:
1515         /* unknown */
1516         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1517         goto done;
1518     }
1519 
1520 done:
1521     Py_XDECREF(tmp);
1522     return result;
1523 }
1524 
1525 int
_PyFloat_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1526 _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1527                               PyObject *obj,
1528                               PyObject *format_spec,
1529                               Py_ssize_t start, Py_ssize_t end)
1530 {
1531     InternalFormatSpec format;
1532 
1533     /* check for the special case of zero length format spec, make
1534        it equivalent to str(obj) */
1535     if (start == end)
1536         return format_obj(obj, writer);
1537 
1538     /* parse the format_spec */
1539     if (!parse_internal_render_format_spec(format_spec, start, end,
1540                                            &format, '\0', '>'))
1541         return -1;
1542 
1543     /* type conversion? */
1544     switch (format.type) {
1545     case '\0': /* No format code: like 'g', but with at least one decimal. */
1546     case 'e':
1547     case 'E':
1548     case 'f':
1549     case 'F':
1550     case 'g':
1551     case 'G':
1552     case 'n':
1553     case '%':
1554         /* no conversion, already a float.  do the formatting */
1555         return format_float_internal(obj, &format, writer);
1556 
1557     default:
1558         /* unknown */
1559         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1560         return -1;
1561     }
1562 }
1563 
1564 int
_PyComplex_FormatAdvancedWriter(_PyUnicodeWriter * writer,PyObject * obj,PyObject * format_spec,Py_ssize_t start,Py_ssize_t end)1565 _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1566                                 PyObject *obj,
1567                                 PyObject *format_spec,
1568                                 Py_ssize_t start, Py_ssize_t end)
1569 {
1570     InternalFormatSpec format;
1571 
1572     /* check for the special case of zero length format spec, make
1573        it equivalent to str(obj) */
1574     if (start == end)
1575         return format_obj(obj, writer);
1576 
1577     /* parse the format_spec */
1578     if (!parse_internal_render_format_spec(format_spec, start, end,
1579                                            &format, '\0', '>'))
1580         return -1;
1581 
1582     /* type conversion? */
1583     switch (format.type) {
1584     case '\0': /* No format code: like 'g', but with at least one decimal. */
1585     case 'e':
1586     case 'E':
1587     case 'f':
1588     case 'F':
1589     case 'g':
1590     case 'G':
1591     case 'n':
1592         /* no conversion, already a complex.  do the formatting */
1593         return format_complex_internal(obj, &format, writer);
1594 
1595     default:
1596         /* unknown */
1597         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1598         return -1;
1599     }
1600 }
1601