1 /*
2 unicode_format.h -- implementation of str.format().
3 */
4
5 /************************************************************************/
6 /*********** Global data structures and forward declarations *********/
7 /************************************************************************/
8
9 /*
10 A SubString consists of the characters between two string or
11 unicode pointers.
12 */
13 typedef struct {
14 PyObject *str; /* borrowed reference */
15 Py_ssize_t start, end;
16 } SubString;
17
18
19 typedef enum {
20 ANS_INIT,
21 ANS_AUTO,
22 ANS_MANUAL
23 } AutoNumberState; /* Keep track if we're auto-numbering fields */
24
25 /* Keeps track of our auto-numbering state, and which number field we're on */
26 typedef struct {
27 AutoNumberState an_state;
28 int an_field_number;
29 } AutoNumber;
30
31
32 /* forward declaration for recursion */
33 static PyObject *
34 build_string(SubString *input, PyObject *args, PyObject *kwargs,
35 int recursion_depth, AutoNumber *auto_number);
36
37
38
39 /************************************************************************/
40 /************************** Utility functions ************************/
41 /************************************************************************/
42
43 static void
AutoNumber_Init(AutoNumber * auto_number)44 AutoNumber_Init(AutoNumber *auto_number)
45 {
46 auto_number->an_state = ANS_INIT;
47 auto_number->an_field_number = 0;
48 }
49
50 /* fill in a SubString from a pointer and length */
51 Py_LOCAL_INLINE(void)
SubString_init(SubString * str,PyObject * s,Py_ssize_t start,Py_ssize_t end)52 SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
53 {
54 str->str = s;
55 str->start = start;
56 str->end = end;
57 }
58
59 /* return a new string. if str->str is NULL, return None */
60 Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString * str)61 SubString_new_object(SubString *str)
62 {
63 if (str->str == NULL)
64 Py_RETURN_NONE;
65 return PyUnicode_Substring(str->str, str->start, str->end);
66 }
67
68 /* return a new string. if str->str is NULL, return a new empty string */
69 Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString * str)70 SubString_new_object_or_empty(SubString *str)
71 {
72 if (str->str == NULL) {
73 return PyUnicode_New(0, 0);
74 }
75 return SubString_new_object(str);
76 }
77
78 /* Return 1 if an error has been detected switching between automatic
79 field numbering and manual field specification, else return 0. Set
80 ValueError on error. */
81 static int
autonumber_state_error(AutoNumberState state,int field_name_is_empty)82 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
83 {
84 if (state == ANS_MANUAL) {
85 if (field_name_is_empty) {
86 PyErr_SetString(PyExc_ValueError, "cannot switch from "
87 "manual field specification to "
88 "automatic field numbering");
89 return 1;
90 }
91 }
92 else {
93 if (!field_name_is_empty) {
94 PyErr_SetString(PyExc_ValueError, "cannot switch from "
95 "automatic field numbering to "
96 "manual field specification");
97 return 1;
98 }
99 }
100 return 0;
101 }
102
103
104 /************************************************************************/
105 /*********** Format string parsing -- integers and identifiers *********/
106 /************************************************************************/
107
108 static Py_ssize_t
get_integer(const SubString * str)109 get_integer(const SubString *str)
110 {
111 Py_ssize_t accumulator = 0;
112 Py_ssize_t digitval;
113 Py_ssize_t i;
114
115 /* empty string is an error */
116 if (str->start >= str->end)
117 return -1;
118
119 for (i = str->start; i < str->end; i++) {
120 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
121 if (digitval < 0)
122 return -1;
123 /*
124 Detect possible overflow before it happens:
125
126 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
127 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
128 */
129 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
130 PyErr_Format(PyExc_ValueError,
131 "Too many decimal digits in format string");
132 return -1;
133 }
134 accumulator = accumulator * 10 + digitval;
135 }
136 return accumulator;
137 }
138
139 /************************************************************************/
140 /******** Functions to get field objects and specification strings ******/
141 /************************************************************************/
142
143 /* do the equivalent of obj.name */
144 static PyObject *
getattr(PyObject * obj,SubString * name)145 getattr(PyObject *obj, SubString *name)
146 {
147 PyObject *newobj;
148 PyObject *str = SubString_new_object(name);
149 if (str == NULL)
150 return NULL;
151 newobj = PyObject_GetAttr(obj, str);
152 Py_DECREF(str);
153 return newobj;
154 }
155
156 /* do the equivalent of obj[idx], where obj is a sequence */
157 static PyObject *
getitem_sequence(PyObject * obj,Py_ssize_t idx)158 getitem_sequence(PyObject *obj, Py_ssize_t idx)
159 {
160 return PySequence_GetItem(obj, idx);
161 }
162
163 /* do the equivalent of obj[idx], where obj is not a sequence */
164 static PyObject *
getitem_idx(PyObject * obj,Py_ssize_t idx)165 getitem_idx(PyObject *obj, Py_ssize_t idx)
166 {
167 PyObject *newobj;
168 PyObject *idx_obj = PyLong_FromSsize_t(idx);
169 if (idx_obj == NULL)
170 return NULL;
171 newobj = PyObject_GetItem(obj, idx_obj);
172 Py_DECREF(idx_obj);
173 return newobj;
174 }
175
176 /* do the equivalent of obj[name] */
177 static PyObject *
getitem_str(PyObject * obj,SubString * name)178 getitem_str(PyObject *obj, SubString *name)
179 {
180 PyObject *newobj;
181 PyObject *str = SubString_new_object(name);
182 if (str == NULL)
183 return NULL;
184 newobj = PyObject_GetItem(obj, str);
185 Py_DECREF(str);
186 return newobj;
187 }
188
189 typedef struct {
190 /* the entire string we're parsing. we assume that someone else
191 is managing its lifetime, and that it will exist for the
192 lifetime of the iterator. can be empty */
193 SubString str;
194
195 /* index to where we are inside field_name */
196 Py_ssize_t index;
197 } FieldNameIterator;
198
199
200 static int
FieldNameIterator_init(FieldNameIterator * self,PyObject * s,Py_ssize_t start,Py_ssize_t end)201 FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
202 Py_ssize_t start, Py_ssize_t end)
203 {
204 SubString_init(&self->str, s, start, end);
205 self->index = start;
206 return 1;
207 }
208
209 static int
_FieldNameIterator_attr(FieldNameIterator * self,SubString * name)210 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
211 {
212 Py_UCS4 c;
213
214 name->str = self->str.str;
215 name->start = self->index;
216
217 /* return everything until '.' or '[' */
218 while (self->index < self->str.end) {
219 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
220 switch (c) {
221 case '[':
222 case '.':
223 /* backup so that we this character will be seen next time */
224 self->index--;
225 break;
226 default:
227 continue;
228 }
229 break;
230 }
231 /* end of string is okay */
232 name->end = self->index;
233 return 1;
234 }
235
236 static int
_FieldNameIterator_item(FieldNameIterator * self,SubString * name)237 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
238 {
239 int bracket_seen = 0;
240 Py_UCS4 c;
241
242 name->str = self->str.str;
243 name->start = self->index;
244
245 /* return everything until ']' */
246 while (self->index < self->str.end) {
247 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
248 switch (c) {
249 case ']':
250 bracket_seen = 1;
251 break;
252 default:
253 continue;
254 }
255 break;
256 }
257 /* make sure we ended with a ']' */
258 if (!bracket_seen) {
259 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
260 return 0;
261 }
262
263 /* end of string is okay */
264 /* don't include the ']' */
265 name->end = self->index-1;
266 return 1;
267 }
268
269 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
270 static int
FieldNameIterator_next(FieldNameIterator * self,int * is_attribute,Py_ssize_t * name_idx,SubString * name)271 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
272 Py_ssize_t *name_idx, SubString *name)
273 {
274 /* check at end of input */
275 if (self->index >= self->str.end)
276 return 1;
277
278 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
279 case '.':
280 *is_attribute = 1;
281 if (_FieldNameIterator_attr(self, name) == 0)
282 return 0;
283 *name_idx = -1;
284 break;
285 case '[':
286 *is_attribute = 0;
287 if (_FieldNameIterator_item(self, name) == 0)
288 return 0;
289 *name_idx = get_integer(name);
290 if (*name_idx == -1 && PyErr_Occurred())
291 return 0;
292 break;
293 default:
294 /* Invalid character follows ']' */
295 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
296 "follow ']' in format field specifier");
297 return 0;
298 }
299
300 /* empty string is an error */
301 if (name->start == name->end) {
302 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
303 return 0;
304 }
305
306 return 2;
307 }
308
309
310 /* input: field_name
311 output: 'first' points to the part before the first '[' or '.'
312 'first_idx' is -1 if 'first' is not an integer, otherwise
313 it's the value of first converted to an integer
314 'rest' is an iterator to return the rest
315 */
316 static int
field_name_split(PyObject * str,Py_ssize_t start,Py_ssize_t end,SubString * first,Py_ssize_t * first_idx,FieldNameIterator * rest,AutoNumber * auto_number)317 field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
318 Py_ssize_t *first_idx, FieldNameIterator *rest,
319 AutoNumber *auto_number)
320 {
321 Py_UCS4 c;
322 Py_ssize_t i = start;
323 int field_name_is_empty;
324 int using_numeric_index;
325
326 /* find the part up until the first '.' or '[' */
327 while (i < end) {
328 switch (c = PyUnicode_READ_CHAR(str, i++)) {
329 case '[':
330 case '.':
331 /* backup so that we this character is available to the
332 "rest" iterator */
333 i--;
334 break;
335 default:
336 continue;
337 }
338 break;
339 }
340
341 /* set up the return values */
342 SubString_init(first, str, start, i);
343 FieldNameIterator_init(rest, str, i, end);
344
345 /* see if "first" is an integer, in which case it's used as an index */
346 *first_idx = get_integer(first);
347 if (*first_idx == -1 && PyErr_Occurred())
348 return 0;
349
350 field_name_is_empty = first->start >= first->end;
351
352 /* If the field name is omitted or if we have a numeric index
353 specified, then we're doing numeric indexing into args. */
354 using_numeric_index = field_name_is_empty || *first_idx != -1;
355
356 /* We always get here exactly one time for each field we're
357 processing. And we get here in field order (counting by left
358 braces). So this is the perfect place to handle automatic field
359 numbering if the field name is omitted. */
360
361 /* Check if we need to do the auto-numbering. It's not needed if
362 we're called from string.Format routines, because it's handled
363 in that class by itself. */
364 if (auto_number) {
365 /* Initialize our auto numbering state if this is the first
366 time we're either auto-numbering or manually numbering. */
367 if (auto_number->an_state == ANS_INIT && using_numeric_index)
368 auto_number->an_state = field_name_is_empty ?
369 ANS_AUTO : ANS_MANUAL;
370
371 /* Make sure our state is consistent with what we're doing
372 this time through. Only check if we're using a numeric
373 index. */
374 if (using_numeric_index)
375 if (autonumber_state_error(auto_number->an_state,
376 field_name_is_empty))
377 return 0;
378 /* Zero length field means we want to do auto-numbering of the
379 fields. */
380 if (field_name_is_empty)
381 *first_idx = (auto_number->an_field_number)++;
382 }
383
384 return 1;
385 }
386
387
388 /*
389 get_field_object returns the object inside {}, before the
390 format_spec. It handles getindex and getattr lookups and consumes
391 the entire input string.
392 */
393 static PyObject *
get_field_object(SubString * input,PyObject * args,PyObject * kwargs,AutoNumber * auto_number)394 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
395 AutoNumber *auto_number)
396 {
397 PyObject *obj = NULL;
398 int ok;
399 int is_attribute;
400 SubString name;
401 SubString first;
402 Py_ssize_t index;
403 FieldNameIterator rest;
404
405 if (!field_name_split(input->str, input->start, input->end, &first,
406 &index, &rest, auto_number)) {
407 goto error;
408 }
409
410 if (index == -1) {
411 /* look up in kwargs */
412 PyObject *key = SubString_new_object(&first);
413 if (key == NULL) {
414 goto error;
415 }
416 if (kwargs == NULL) {
417 PyErr_SetObject(PyExc_KeyError, key);
418 Py_DECREF(key);
419 goto error;
420 }
421 /* Use PyObject_GetItem instead of PyDict_GetItem because this
422 code is no longer just used with kwargs. It might be passed
423 a non-dict when called through format_map. */
424 obj = PyObject_GetItem(kwargs, key);
425 Py_DECREF(key);
426 if (obj == NULL) {
427 goto error;
428 }
429 }
430 else {
431 /* If args is NULL, we have a format string with a positional field
432 with only kwargs to retrieve it from. This can only happen when
433 used with format_map(), where positional arguments are not
434 allowed. */
435 if (args == NULL) {
436 PyErr_SetString(PyExc_ValueError, "Format string contains "
437 "positional fields");
438 goto error;
439 }
440
441 /* look up in args */
442 obj = PySequence_GetItem(args, index);
443 if (obj == NULL)
444 goto error;
445 }
446
447 /* iterate over the rest of the field_name */
448 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
449 &name)) == 2) {
450 PyObject *tmp;
451
452 if (is_attribute)
453 /* getattr lookup "." */
454 tmp = getattr(obj, &name);
455 else
456 /* getitem lookup "[]" */
457 if (index == -1)
458 tmp = getitem_str(obj, &name);
459 else
460 if (PySequence_Check(obj))
461 tmp = getitem_sequence(obj, index);
462 else
463 /* not a sequence */
464 tmp = getitem_idx(obj, index);
465 if (tmp == NULL)
466 goto error;
467
468 /* assign to obj */
469 Py_DECREF(obj);
470 obj = tmp;
471 }
472 /* end of iterator, this is the non-error case */
473 if (ok == 1)
474 return obj;
475 error:
476 Py_XDECREF(obj);
477 return NULL;
478 }
479
480 /************************************************************************/
481 /***************** Field rendering functions **************************/
482 /************************************************************************/
483
484 /*
485 render_field() is the main function in this section. It takes the
486 field object and field specification string generated by
487 get_field_and_spec, and renders the field into the output string.
488
489 render_field calls fieldobj.__format__(format_spec) method, and
490 appends to the output.
491 */
492 static int
render_field(PyObject * fieldobj,SubString * format_spec,_PyUnicodeWriter * writer)493 render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
494 {
495 int ok = 0;
496 PyObject *result = NULL;
497 PyObject *format_spec_object = NULL;
498 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
499 int err;
500
501 /* If we know the type exactly, skip the lookup of __format__ and just
502 call the formatter directly. */
503 if (PyUnicode_CheckExact(fieldobj))
504 formatter = _PyUnicode_FormatAdvancedWriter;
505 else if (PyLong_CheckExact(fieldobj))
506 formatter = _PyLong_FormatAdvancedWriter;
507 else if (PyFloat_CheckExact(fieldobj))
508 formatter = _PyFloat_FormatAdvancedWriter;
509 else if (PyComplex_CheckExact(fieldobj))
510 formatter = _PyComplex_FormatAdvancedWriter;
511
512 if (formatter) {
513 /* we know exactly which formatter will be called when __format__ is
514 looked up, so call it directly, instead. */
515 err = formatter(writer, fieldobj, format_spec->str,
516 format_spec->start, format_spec->end);
517 return (err == 0);
518 }
519 else {
520 /* We need to create an object out of the pointers we have, because
521 __format__ takes a string/unicode object for format_spec. */
522 if (format_spec->str)
523 format_spec_object = PyUnicode_Substring(format_spec->str,
524 format_spec->start,
525 format_spec->end);
526 else
527 format_spec_object = PyUnicode_New(0, 0);
528 if (format_spec_object == NULL)
529 goto done;
530
531 result = PyObject_Format(fieldobj, format_spec_object);
532 }
533 if (result == NULL)
534 goto done;
535
536 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
537 goto done;
538 ok = 1;
539
540 done:
541 Py_XDECREF(format_spec_object);
542 Py_XDECREF(result);
543 return ok;
544 }
545
546 static int
parse_field(SubString * str,SubString * field_name,SubString * format_spec,int * format_spec_needs_expanding,Py_UCS4 * conversion)547 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
548 int *format_spec_needs_expanding, Py_UCS4 *conversion)
549 {
550 /* Note this function works if the field name is zero length,
551 which is good. Zero length field names are handled later, in
552 field_name_split. */
553
554 Py_UCS4 c = 0;
555
556 /* initialize these, as they may be empty */
557 *conversion = '\0';
558 SubString_init(format_spec, NULL, 0, 0);
559
560 /* Search for the field name. it's terminated by the end of
561 the string, or a ':' or '!' */
562 field_name->str = str->str;
563 field_name->start = str->start;
564 while (str->start < str->end) {
565 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
566 case '{':
567 PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
568 return 0;
569 case '[':
570 for (; str->start < str->end; str->start++)
571 if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
572 break;
573 continue;
574 case '}':
575 case ':':
576 case '!':
577 break;
578 default:
579 continue;
580 }
581 break;
582 }
583
584 field_name->end = str->start - 1;
585 if (c == '!' || c == ':') {
586 Py_ssize_t count;
587 /* we have a format specifier and/or a conversion */
588 /* don't include the last character */
589
590 /* see if there's a conversion specifier */
591 if (c == '!') {
592 /* there must be another character present */
593 if (str->start >= str->end) {
594 PyErr_SetString(PyExc_ValueError,
595 "end of string while looking for conversion "
596 "specifier");
597 return 0;
598 }
599 *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
600
601 if (str->start < str->end) {
602 c = PyUnicode_READ_CHAR(str->str, str->start++);
603 if (c == '}')
604 return 1;
605 if (c != ':') {
606 PyErr_SetString(PyExc_ValueError,
607 "expected ':' after conversion specifier");
608 return 0;
609 }
610 }
611 }
612 format_spec->str = str->str;
613 format_spec->start = str->start;
614 count = 1;
615 while (str->start < str->end) {
616 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
617 case '{':
618 *format_spec_needs_expanding = 1;
619 count++;
620 break;
621 case '}':
622 count--;
623 if (count == 0) {
624 format_spec->end = str->start - 1;
625 return 1;
626 }
627 break;
628 default:
629 break;
630 }
631 }
632
633 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
634 return 0;
635 }
636 else if (c != '}') {
637 PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
638 return 0;
639 }
640
641 return 1;
642 }
643
644 /************************************************************************/
645 /******* Output string allocation and escape-to-markup processing ******/
646 /************************************************************************/
647
648 /* MarkupIterator breaks the string into pieces of either literal
649 text, or things inside {} that need to be marked up. it is
650 designed to make it easy to wrap a Python iterator around it, for
651 use with the Formatter class */
652
653 typedef struct {
654 SubString str;
655 } MarkupIterator;
656
657 static int
MarkupIterator_init(MarkupIterator * self,PyObject * str,Py_ssize_t start,Py_ssize_t end)658 MarkupIterator_init(MarkupIterator *self, PyObject *str,
659 Py_ssize_t start, Py_ssize_t end)
660 {
661 SubString_init(&self->str, str, start, end);
662 return 1;
663 }
664
665 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
666 string (or something to be expanded) */
667 static int
MarkupIterator_next(MarkupIterator * self,SubString * literal,int * field_present,SubString * field_name,SubString * format_spec,Py_UCS4 * conversion,int * format_spec_needs_expanding)668 MarkupIterator_next(MarkupIterator *self, SubString *literal,
669 int *field_present, SubString *field_name,
670 SubString *format_spec, Py_UCS4 *conversion,
671 int *format_spec_needs_expanding)
672 {
673 int at_end;
674 Py_UCS4 c = 0;
675 Py_ssize_t start;
676 Py_ssize_t len;
677 int markup_follows = 0;
678
679 /* initialize all of the output variables */
680 SubString_init(literal, NULL, 0, 0);
681 SubString_init(field_name, NULL, 0, 0);
682 SubString_init(format_spec, NULL, 0, 0);
683 *conversion = '\0';
684 *format_spec_needs_expanding = 0;
685 *field_present = 0;
686
687 /* No more input, end of iterator. This is the normal exit
688 path. */
689 if (self->str.start >= self->str.end)
690 return 1;
691
692 start = self->str.start;
693
694 /* First read any literal text. Read until the end of string, an
695 escaped '{' or '}', or an unescaped '{'. In order to never
696 allocate memory and so I can just pass pointers around, if
697 there's an escaped '{' or '}' then we'll return the literal
698 including the brace, but no format object. The next time
699 through, we'll return the rest of the literal, skipping past
700 the second consecutive brace. */
701 while (self->str.start < self->str.end) {
702 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
703 case '{':
704 case '}':
705 markup_follows = 1;
706 break;
707 default:
708 continue;
709 }
710 break;
711 }
712
713 at_end = self->str.start >= self->str.end;
714 len = self->str.start - start;
715
716 if ((c == '}') && (at_end ||
717 (c != PyUnicode_READ_CHAR(self->str.str,
718 self->str.start)))) {
719 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
720 "in format string");
721 return 0;
722 }
723 if (at_end && c == '{') {
724 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
725 "in format string");
726 return 0;
727 }
728 if (!at_end) {
729 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
730 /* escaped } or {, skip it in the input. there is no
731 markup object following us, just this literal text */
732 self->str.start++;
733 markup_follows = 0;
734 }
735 else
736 len--;
737 }
738
739 /* record the literal text */
740 literal->str = self->str.str;
741 literal->start = start;
742 literal->end = start + len;
743
744 if (!markup_follows)
745 return 2;
746
747 /* this is markup; parse the field */
748 *field_present = 1;
749 if (!parse_field(&self->str, field_name, format_spec,
750 format_spec_needs_expanding, conversion))
751 return 0;
752 return 2;
753 }
754
755
756 /* do the !r or !s conversion on obj */
757 static PyObject *
do_conversion(PyObject * obj,Py_UCS4 conversion)758 do_conversion(PyObject *obj, Py_UCS4 conversion)
759 {
760 /* XXX in pre-3.0, do we need to convert this to unicode, since it
761 might have returned a string? */
762 switch (conversion) {
763 case 'r':
764 return PyObject_Repr(obj);
765 case 's':
766 return PyObject_Str(obj);
767 case 'a':
768 return PyObject_ASCII(obj);
769 default:
770 if (conversion > 32 && conversion < 127) {
771 /* It's the ASCII subrange; casting to char is safe
772 (assuming the execution character set is an ASCII
773 superset). */
774 PyErr_Format(PyExc_ValueError,
775 "Unknown conversion specifier %c",
776 (char)conversion);
777 } else
778 PyErr_Format(PyExc_ValueError,
779 "Unknown conversion specifier \\x%x",
780 (unsigned int)conversion);
781 return NULL;
782 }
783 }
784
785 /* given:
786
787 {field_name!conversion:format_spec}
788
789 compute the result and write it to output.
790 format_spec_needs_expanding is an optimization. if it's false,
791 just output the string directly, otherwise recursively expand the
792 format_spec string.
793
794 field_name is allowed to be zero length, in which case we
795 are doing auto field numbering.
796 */
797
798 static int
output_markup(SubString * field_name,SubString * format_spec,int format_spec_needs_expanding,Py_UCS4 conversion,_PyUnicodeWriter * writer,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)799 output_markup(SubString *field_name, SubString *format_spec,
800 int format_spec_needs_expanding, Py_UCS4 conversion,
801 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
802 int recursion_depth, AutoNumber *auto_number)
803 {
804 PyObject *tmp = NULL;
805 PyObject *fieldobj = NULL;
806 SubString expanded_format_spec;
807 SubString *actual_format_spec;
808 int result = 0;
809
810 /* convert field_name to an object */
811 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
812 if (fieldobj == NULL)
813 goto done;
814
815 if (conversion != '\0') {
816 tmp = do_conversion(fieldobj, conversion);
817 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
818 goto done;
819
820 /* do the assignment, transferring ownership: fieldobj = tmp */
821 Py_DECREF(fieldobj);
822 fieldobj = tmp;
823 tmp = NULL;
824 }
825
826 /* if needed, recurively compute the format_spec */
827 if (format_spec_needs_expanding) {
828 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
829 auto_number);
830 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
831 goto done;
832
833 /* note that in the case we're expanding the format string,
834 tmp must be kept around until after the call to
835 render_field. */
836 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
837 actual_format_spec = &expanded_format_spec;
838 }
839 else
840 actual_format_spec = format_spec;
841
842 if (render_field(fieldobj, actual_format_spec, writer) == 0)
843 goto done;
844
845 result = 1;
846
847 done:
848 Py_XDECREF(fieldobj);
849 Py_XDECREF(tmp);
850
851 return result;
852 }
853
854 /*
855 do_markup is the top-level loop for the format() method. It
856 searches through the format string for escapes to markup codes, and
857 calls other functions to move non-markup text to the output,
858 and to perform the markup to the output.
859 */
860 static int
do_markup(SubString * input,PyObject * args,PyObject * kwargs,_PyUnicodeWriter * writer,int recursion_depth,AutoNumber * auto_number)861 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
862 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
863 {
864 MarkupIterator iter;
865 int format_spec_needs_expanding;
866 int result;
867 int field_present;
868 SubString literal;
869 SubString field_name;
870 SubString format_spec;
871 Py_UCS4 conversion;
872
873 MarkupIterator_init(&iter, input->str, input->start, input->end);
874 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
875 &field_name, &format_spec,
876 &conversion,
877 &format_spec_needs_expanding)) == 2) {
878 if (literal.end != literal.start) {
879 if (!field_present && iter.str.start == iter.str.end)
880 writer->overallocate = 0;
881 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
882 literal.start, literal.end) < 0)
883 return 0;
884 }
885
886 if (field_present) {
887 if (iter.str.start == iter.str.end)
888 writer->overallocate = 0;
889 if (!output_markup(&field_name, &format_spec,
890 format_spec_needs_expanding, conversion, writer,
891 args, kwargs, recursion_depth, auto_number))
892 return 0;
893 }
894 }
895 return result;
896 }
897
898
899 /*
900 build_string allocates the output string and then
901 calls do_markup to do the heavy lifting.
902 */
903 static PyObject *
build_string(SubString * input,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)904 build_string(SubString *input, PyObject *args, PyObject *kwargs,
905 int recursion_depth, AutoNumber *auto_number)
906 {
907 _PyUnicodeWriter writer;
908
909 /* check the recursion level */
910 if (recursion_depth <= 0) {
911 PyErr_SetString(PyExc_ValueError,
912 "Max string recursion exceeded");
913 return NULL;
914 }
915
916 _PyUnicodeWriter_Init(&writer);
917 writer.overallocate = 1;
918 writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
919
920 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
921 auto_number)) {
922 _PyUnicodeWriter_Dealloc(&writer);
923 return NULL;
924 }
925
926 return _PyUnicodeWriter_Finish(&writer);
927 }
928
929 /************************************************************************/
930 /*********** main routine ***********************************************/
931 /************************************************************************/
932
933 /* this is the main entry point */
934 static PyObject *
do_string_format(PyObject * self,PyObject * args,PyObject * kwargs)935 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
936 {
937 SubString input;
938
939 /* PEP 3101 says only 2 levels, so that
940 "{0:{1}}".format('abc', 's') # works
941 "{0:{1:{2}}}".format('abc', 's', '') # fails
942 */
943 int recursion_depth = 2;
944
945 AutoNumber auto_number;
946
947 if (PyUnicode_READY(self) == -1)
948 return NULL;
949
950 AutoNumber_Init(&auto_number);
951 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
952 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
953 }
954
955 static PyObject *
do_string_format_map(PyObject * self,PyObject * obj)956 do_string_format_map(PyObject *self, PyObject *obj)
957 {
958 return do_string_format(self, NULL, obj);
959 }
960
961
962 /************************************************************************/
963 /*********** formatteriterator ******************************************/
964 /************************************************************************/
965
966 /* This is used to implement string.Formatter.vparse(). It exists so
967 Formatter can share code with the built in unicode.format() method.
968 It's really just a wrapper around MarkupIterator that is callable
969 from Python. */
970
971 typedef struct {
972 PyObject_HEAD
973 PyObject *str;
974 MarkupIterator it_markup;
975 } formatteriterobject;
976
977 static void
formatteriter_dealloc(formatteriterobject * it)978 formatteriter_dealloc(formatteriterobject *it)
979 {
980 Py_XDECREF(it->str);
981 PyObject_FREE(it);
982 }
983
984 /* returns a tuple:
985 (literal, field_name, format_spec, conversion)
986
987 literal is any literal text to output. might be zero length
988 field_name is the string before the ':'. might be None
989 format_spec is the string after the ':'. mibht be None
990 conversion is either None, or the string after the '!'
991 */
992 static PyObject *
formatteriter_next(formatteriterobject * it)993 formatteriter_next(formatteriterobject *it)
994 {
995 SubString literal;
996 SubString field_name;
997 SubString format_spec;
998 Py_UCS4 conversion;
999 int format_spec_needs_expanding;
1000 int field_present;
1001 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1002 &field_name, &format_spec, &conversion,
1003 &format_spec_needs_expanding);
1004
1005 /* all of the SubString objects point into it->str, so no
1006 memory management needs to be done on them */
1007 assert(0 <= result && result <= 2);
1008 if (result == 0 || result == 1)
1009 /* if 0, error has already been set, if 1, iterator is empty */
1010 return NULL;
1011 else {
1012 PyObject *literal_str = NULL;
1013 PyObject *field_name_str = NULL;
1014 PyObject *format_spec_str = NULL;
1015 PyObject *conversion_str = NULL;
1016 PyObject *tuple = NULL;
1017
1018 literal_str = SubString_new_object(&literal);
1019 if (literal_str == NULL)
1020 goto done;
1021
1022 field_name_str = SubString_new_object(&field_name);
1023 if (field_name_str == NULL)
1024 goto done;
1025
1026 /* if field_name is non-zero length, return a string for
1027 format_spec (even if zero length), else return None */
1028 format_spec_str = (field_present ?
1029 SubString_new_object_or_empty :
1030 SubString_new_object)(&format_spec);
1031 if (format_spec_str == NULL)
1032 goto done;
1033
1034 /* if the conversion is not specified, return a None,
1035 otherwise create a one length string with the conversion
1036 character */
1037 if (conversion == '\0') {
1038 conversion_str = Py_None;
1039 Py_INCREF(conversion_str);
1040 }
1041 else
1042 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1043 &conversion, 1);
1044 if (conversion_str == NULL)
1045 goto done;
1046
1047 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1048 conversion_str);
1049 done:
1050 Py_XDECREF(literal_str);
1051 Py_XDECREF(field_name_str);
1052 Py_XDECREF(format_spec_str);
1053 Py_XDECREF(conversion_str);
1054 return tuple;
1055 }
1056 }
1057
1058 static PyMethodDef formatteriter_methods[] = {
1059 {NULL, NULL} /* sentinel */
1060 };
1061
1062 static PyTypeObject PyFormatterIter_Type = {
1063 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1064 "formatteriterator", /* tp_name */
1065 sizeof(formatteriterobject), /* tp_basicsize */
1066 0, /* tp_itemsize */
1067 /* methods */
1068 (destructor)formatteriter_dealloc, /* tp_dealloc */
1069 0, /* tp_print */
1070 0, /* tp_getattr */
1071 0, /* tp_setattr */
1072 0, /* tp_reserved */
1073 0, /* tp_repr */
1074 0, /* tp_as_number */
1075 0, /* tp_as_sequence */
1076 0, /* tp_as_mapping */
1077 0, /* tp_hash */
1078 0, /* tp_call */
1079 0, /* tp_str */
1080 PyObject_GenericGetAttr, /* tp_getattro */
1081 0, /* tp_setattro */
1082 0, /* tp_as_buffer */
1083 Py_TPFLAGS_DEFAULT, /* tp_flags */
1084 0, /* tp_doc */
1085 0, /* tp_traverse */
1086 0, /* tp_clear */
1087 0, /* tp_richcompare */
1088 0, /* tp_weaklistoffset */
1089 PyObject_SelfIter, /* tp_iter */
1090 (iternextfunc)formatteriter_next, /* tp_iternext */
1091 formatteriter_methods, /* tp_methods */
1092 0,
1093 };
1094
1095 /* unicode_formatter_parser is used to implement
1096 string.Formatter.vformat. it parses a string and returns tuples
1097 describing the parsed elements. It's a wrapper around
1098 stringlib/string_format.h's MarkupIterator */
1099 static PyObject *
formatter_parser(PyObject * ignored,PyObject * self)1100 formatter_parser(PyObject *ignored, PyObject *self)
1101 {
1102 formatteriterobject *it;
1103
1104 if (!PyUnicode_Check(self)) {
1105 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1106 return NULL;
1107 }
1108
1109 if (PyUnicode_READY(self) == -1)
1110 return NULL;
1111
1112 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1113 if (it == NULL)
1114 return NULL;
1115
1116 /* take ownership, give the object to the iterator */
1117 Py_INCREF(self);
1118 it->str = self;
1119
1120 /* initialize the contained MarkupIterator */
1121 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1122 return (PyObject *)it;
1123 }
1124
1125
1126 /************************************************************************/
1127 /*********** fieldnameiterator ******************************************/
1128 /************************************************************************/
1129
1130
1131 /* This is used to implement string.Formatter.vparse(). It parses the
1132 field name into attribute and item values. It's a Python-callable
1133 wrapper around FieldNameIterator */
1134
1135 typedef struct {
1136 PyObject_HEAD
1137 PyObject *str;
1138 FieldNameIterator it_field;
1139 } fieldnameiterobject;
1140
1141 static void
fieldnameiter_dealloc(fieldnameiterobject * it)1142 fieldnameiter_dealloc(fieldnameiterobject *it)
1143 {
1144 Py_XDECREF(it->str);
1145 PyObject_FREE(it);
1146 }
1147
1148 /* returns a tuple:
1149 (is_attr, value)
1150 is_attr is true if we used attribute syntax (e.g., '.foo')
1151 false if we used index syntax (e.g., '[foo]')
1152 value is an integer or string
1153 */
1154 static PyObject *
fieldnameiter_next(fieldnameiterobject * it)1155 fieldnameiter_next(fieldnameiterobject *it)
1156 {
1157 int result;
1158 int is_attr;
1159 Py_ssize_t idx;
1160 SubString name;
1161
1162 result = FieldNameIterator_next(&it->it_field, &is_attr,
1163 &idx, &name);
1164 if (result == 0 || result == 1)
1165 /* if 0, error has already been set, if 1, iterator is empty */
1166 return NULL;
1167 else {
1168 PyObject* result = NULL;
1169 PyObject* is_attr_obj = NULL;
1170 PyObject* obj = NULL;
1171
1172 is_attr_obj = PyBool_FromLong(is_attr);
1173 if (is_attr_obj == NULL)
1174 goto done;
1175
1176 /* either an integer or a string */
1177 if (idx != -1)
1178 obj = PyLong_FromSsize_t(idx);
1179 else
1180 obj = SubString_new_object(&name);
1181 if (obj == NULL)
1182 goto done;
1183
1184 /* return a tuple of values */
1185 result = PyTuple_Pack(2, is_attr_obj, obj);
1186
1187 done:
1188 Py_XDECREF(is_attr_obj);
1189 Py_XDECREF(obj);
1190 return result;
1191 }
1192 }
1193
1194 static PyMethodDef fieldnameiter_methods[] = {
1195 {NULL, NULL} /* sentinel */
1196 };
1197
1198 static PyTypeObject PyFieldNameIter_Type = {
1199 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1200 "fieldnameiterator", /* tp_name */
1201 sizeof(fieldnameiterobject), /* tp_basicsize */
1202 0, /* tp_itemsize */
1203 /* methods */
1204 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1205 0, /* tp_print */
1206 0, /* tp_getattr */
1207 0, /* tp_setattr */
1208 0, /* tp_reserved */
1209 0, /* tp_repr */
1210 0, /* tp_as_number */
1211 0, /* tp_as_sequence */
1212 0, /* tp_as_mapping */
1213 0, /* tp_hash */
1214 0, /* tp_call */
1215 0, /* tp_str */
1216 PyObject_GenericGetAttr, /* tp_getattro */
1217 0, /* tp_setattro */
1218 0, /* tp_as_buffer */
1219 Py_TPFLAGS_DEFAULT, /* tp_flags */
1220 0, /* tp_doc */
1221 0, /* tp_traverse */
1222 0, /* tp_clear */
1223 0, /* tp_richcompare */
1224 0, /* tp_weaklistoffset */
1225 PyObject_SelfIter, /* tp_iter */
1226 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1227 fieldnameiter_methods, /* tp_methods */
1228 0};
1229
1230 /* unicode_formatter_field_name_split is used to implement
1231 string.Formatter.vformat. it takes a PEP 3101 "field name", and
1232 returns a tuple of (first, rest): "first", the part before the
1233 first '.' or '['; and "rest", an iterator for the rest of the field
1234 name. it's a wrapper around stringlib/string_format.h's
1235 field_name_split. The iterator it returns is a
1236 FieldNameIterator */
1237 static PyObject *
formatter_field_name_split(PyObject * ignored,PyObject * self)1238 formatter_field_name_split(PyObject *ignored, PyObject *self)
1239 {
1240 SubString first;
1241 Py_ssize_t first_idx;
1242 fieldnameiterobject *it;
1243
1244 PyObject *first_obj = NULL;
1245 PyObject *result = NULL;
1246
1247 if (!PyUnicode_Check(self)) {
1248 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1249 return NULL;
1250 }
1251
1252 if (PyUnicode_READY(self) == -1)
1253 return NULL;
1254
1255 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1256 if (it == NULL)
1257 return NULL;
1258
1259 /* take ownership, give the object to the iterator. this is
1260 just to keep the field_name alive */
1261 Py_INCREF(self);
1262 it->str = self;
1263
1264 /* Pass in auto_number = NULL. We'll return an empty string for
1265 first_obj in that case. */
1266 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1267 &first, &first_idx, &it->it_field, NULL))
1268 goto done;
1269
1270 /* first becomes an integer, if possible; else a string */
1271 if (first_idx != -1)
1272 first_obj = PyLong_FromSsize_t(first_idx);
1273 else
1274 /* convert "first" into a string object */
1275 first_obj = SubString_new_object(&first);
1276 if (first_obj == NULL)
1277 goto done;
1278
1279 /* return a tuple of values */
1280 result = PyTuple_Pack(2, first_obj, it);
1281
1282 done:
1283 Py_XDECREF(it);
1284 Py_XDECREF(first_obj);
1285 return result;
1286 }
1287