1 /*
2 unicode_format.h -- implementation of str.format().
3 */
4
5 /************************************************************************/
6 /*********** Global data structures and forward declarations *********/
7 /************************************************************************/
8
9 /*
10 A SubString consists of the characters between two string or
11 unicode pointers.
12 */
13 typedef struct {
14 PyObject *str; /* borrowed reference */
15 Py_ssize_t start, end;
16 } SubString;
17
18
19 typedef enum {
20 ANS_INIT,
21 ANS_AUTO,
22 ANS_MANUAL
23 } AutoNumberState; /* Keep track if we're auto-numbering fields */
24
25 /* Keeps track of our auto-numbering state, and which number field we're on */
26 typedef struct {
27 AutoNumberState an_state;
28 int an_field_number;
29 } AutoNumber;
30
31
32 /* forward declaration for recursion */
33 static PyObject *
34 build_string(SubString *input, PyObject *args, PyObject *kwargs,
35 int recursion_depth, AutoNumber *auto_number);
36
37
38
39 /************************************************************************/
40 /************************** Utility functions ************************/
41 /************************************************************************/
42
43 static void
AutoNumber_Init(AutoNumber * auto_number)44 AutoNumber_Init(AutoNumber *auto_number)
45 {
46 auto_number->an_state = ANS_INIT;
47 auto_number->an_field_number = 0;
48 }
49
50 /* fill in a SubString from a pointer and length */
51 Py_LOCAL_INLINE(void)
SubString_init(SubString * str,PyObject * s,Py_ssize_t start,Py_ssize_t end)52 SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
53 {
54 str->str = s;
55 str->start = start;
56 str->end = end;
57 }
58
59 /* return a new string. if str->str is NULL, return None */
60 Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString * str)61 SubString_new_object(SubString *str)
62 {
63 if (str->str == NULL) {
64 Py_INCREF(Py_None);
65 return Py_None;
66 }
67 return PyUnicode_Substring(str->str, str->start, str->end);
68 }
69
70 /* return a new string. if str->str is NULL, return a new empty string */
71 Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString * str)72 SubString_new_object_or_empty(SubString *str)
73 {
74 if (str->str == NULL) {
75 return PyUnicode_New(0, 0);
76 }
77 return SubString_new_object(str);
78 }
79
80 /* Return 1 if an error has been detected switching between automatic
81 field numbering and manual field specification, else return 0. Set
82 ValueError on error. */
83 static int
autonumber_state_error(AutoNumberState state,int field_name_is_empty)84 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85 {
86 if (state == ANS_MANUAL) {
87 if (field_name_is_empty) {
88 PyErr_SetString(PyExc_ValueError, "cannot switch from "
89 "manual field specification to "
90 "automatic field numbering");
91 return 1;
92 }
93 }
94 else {
95 if (!field_name_is_empty) {
96 PyErr_SetString(PyExc_ValueError, "cannot switch from "
97 "automatic field numbering to "
98 "manual field specification");
99 return 1;
100 }
101 }
102 return 0;
103 }
104
105
106 /************************************************************************/
107 /*********** Format string parsing -- integers and identifiers *********/
108 /************************************************************************/
109
110 static Py_ssize_t
get_integer(const SubString * str)111 get_integer(const SubString *str)
112 {
113 Py_ssize_t accumulator = 0;
114 Py_ssize_t digitval;
115 Py_ssize_t i;
116
117 /* empty string is an error */
118 if (str->start >= str->end)
119 return -1;
120
121 for (i = str->start; i < str->end; i++) {
122 digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
123 if (digitval < 0)
124 return -1;
125 /*
126 Detect possible overflow before it happens:
127
128 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
130 */
131 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
132 PyErr_Format(PyExc_ValueError,
133 "Too many decimal digits in format string");
134 return -1;
135 }
136 accumulator = accumulator * 10 + digitval;
137 }
138 return accumulator;
139 }
140
141 /************************************************************************/
142 /******** Functions to get field objects and specification strings ******/
143 /************************************************************************/
144
145 /* do the equivalent of obj.name */
146 static PyObject *
getattr(PyObject * obj,SubString * name)147 getattr(PyObject *obj, SubString *name)
148 {
149 PyObject *newobj;
150 PyObject *str = SubString_new_object(name);
151 if (str == NULL)
152 return NULL;
153 newobj = PyObject_GetAttr(obj, str);
154 Py_DECREF(str);
155 return newobj;
156 }
157
158 /* do the equivalent of obj[idx], where obj is a sequence */
159 static PyObject *
getitem_sequence(PyObject * obj,Py_ssize_t idx)160 getitem_sequence(PyObject *obj, Py_ssize_t idx)
161 {
162 return PySequence_GetItem(obj, idx);
163 }
164
165 /* do the equivalent of obj[idx], where obj is not a sequence */
166 static PyObject *
getitem_idx(PyObject * obj,Py_ssize_t idx)167 getitem_idx(PyObject *obj, Py_ssize_t idx)
168 {
169 PyObject *newobj;
170 PyObject *idx_obj = PyLong_FromSsize_t(idx);
171 if (idx_obj == NULL)
172 return NULL;
173 newobj = PyObject_GetItem(obj, idx_obj);
174 Py_DECREF(idx_obj);
175 return newobj;
176 }
177
178 /* do the equivalent of obj[name] */
179 static PyObject *
getitem_str(PyObject * obj,SubString * name)180 getitem_str(PyObject *obj, SubString *name)
181 {
182 PyObject *newobj;
183 PyObject *str = SubString_new_object(name);
184 if (str == NULL)
185 return NULL;
186 newobj = PyObject_GetItem(obj, str);
187 Py_DECREF(str);
188 return newobj;
189 }
190
191 typedef struct {
192 /* the entire string we're parsing. we assume that someone else
193 is managing its lifetime, and that it will exist for the
194 lifetime of the iterator. can be empty */
195 SubString str;
196
197 /* index to where we are inside field_name */
198 Py_ssize_t index;
199 } FieldNameIterator;
200
201
202 static int
FieldNameIterator_init(FieldNameIterator * self,PyObject * s,Py_ssize_t start,Py_ssize_t end)203 FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204 Py_ssize_t start, Py_ssize_t end)
205 {
206 SubString_init(&self->str, s, start, end);
207 self->index = start;
208 return 1;
209 }
210
211 static int
_FieldNameIterator_attr(FieldNameIterator * self,SubString * name)212 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213 {
214 Py_UCS4 c;
215
216 name->str = self->str.str;
217 name->start = self->index;
218
219 /* return everything until '.' or '[' */
220 while (self->index < self->str.end) {
221 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222 switch (c) {
223 case '[':
224 case '.':
225 /* backup so that we this character will be seen next time */
226 self->index--;
227 break;
228 default:
229 continue;
230 }
231 break;
232 }
233 /* end of string is okay */
234 name->end = self->index;
235 return 1;
236 }
237
238 static int
_FieldNameIterator_item(FieldNameIterator * self,SubString * name)239 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240 {
241 int bracket_seen = 0;
242 Py_UCS4 c;
243
244 name->str = self->str.str;
245 name->start = self->index;
246
247 /* return everything until ']' */
248 while (self->index < self->str.end) {
249 c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250 switch (c) {
251 case ']':
252 bracket_seen = 1;
253 break;
254 default:
255 continue;
256 }
257 break;
258 }
259 /* make sure we ended with a ']' */
260 if (!bracket_seen) {
261 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262 return 0;
263 }
264
265 /* end of string is okay */
266 /* don't include the ']' */
267 name->end = self->index-1;
268 return 1;
269 }
270
271 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272 static int
FieldNameIterator_next(FieldNameIterator * self,int * is_attribute,Py_ssize_t * name_idx,SubString * name)273 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274 Py_ssize_t *name_idx, SubString *name)
275 {
276 /* check at end of input */
277 if (self->index >= self->str.end)
278 return 1;
279
280 switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
281 case '.':
282 *is_attribute = 1;
283 if (_FieldNameIterator_attr(self, name) == 0)
284 return 0;
285 *name_idx = -1;
286 break;
287 case '[':
288 *is_attribute = 0;
289 if (_FieldNameIterator_item(self, name) == 0)
290 return 0;
291 *name_idx = get_integer(name);
292 if (*name_idx == -1 && PyErr_Occurred())
293 return 0;
294 break;
295 default:
296 /* Invalid character follows ']' */
297 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298 "follow ']' in format field specifier");
299 return 0;
300 }
301
302 /* empty string is an error */
303 if (name->start == name->end) {
304 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305 return 0;
306 }
307
308 return 2;
309 }
310
311
312 /* input: field_name
313 output: 'first' points to the part before the first '[' or '.'
314 'first_idx' is -1 if 'first' is not an integer, otherwise
315 it's the value of first converted to an integer
316 'rest' is an iterator to return the rest
317 */
318 static int
field_name_split(PyObject * str,Py_ssize_t start,Py_ssize_t end,SubString * first,Py_ssize_t * first_idx,FieldNameIterator * rest,AutoNumber * auto_number)319 field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
320 Py_ssize_t *first_idx, FieldNameIterator *rest,
321 AutoNumber *auto_number)
322 {
323 Py_UCS4 c;
324 Py_ssize_t i = start;
325 int field_name_is_empty;
326 int using_numeric_index;
327
328 /* find the part up until the first '.' or '[' */
329 while (i < end) {
330 switch (c = PyUnicode_READ_CHAR(str, i++)) {
331 case '[':
332 case '.':
333 /* backup so that we this character is available to the
334 "rest" iterator */
335 i--;
336 break;
337 default:
338 continue;
339 }
340 break;
341 }
342
343 /* set up the return values */
344 SubString_init(first, str, start, i);
345 FieldNameIterator_init(rest, str, i, end);
346
347 /* see if "first" is an integer, in which case it's used as an index */
348 *first_idx = get_integer(first);
349 if (*first_idx == -1 && PyErr_Occurred())
350 return 0;
351
352 field_name_is_empty = first->start >= first->end;
353
354 /* If the field name is omitted or if we have a numeric index
355 specified, then we're doing numeric indexing into args. */
356 using_numeric_index = field_name_is_empty || *first_idx != -1;
357
358 /* We always get here exactly one time for each field we're
359 processing. And we get here in field order (counting by left
360 braces). So this is the perfect place to handle automatic field
361 numbering if the field name is omitted. */
362
363 /* Check if we need to do the auto-numbering. It's not needed if
364 we're called from string.Format routines, because it's handled
365 in that class by itself. */
366 if (auto_number) {
367 /* Initialize our auto numbering state if this is the first
368 time we're either auto-numbering or manually numbering. */
369 if (auto_number->an_state == ANS_INIT && using_numeric_index)
370 auto_number->an_state = field_name_is_empty ?
371 ANS_AUTO : ANS_MANUAL;
372
373 /* Make sure our state is consistent with what we're doing
374 this time through. Only check if we're using a numeric
375 index. */
376 if (using_numeric_index)
377 if (autonumber_state_error(auto_number->an_state,
378 field_name_is_empty))
379 return 0;
380 /* Zero length field means we want to do auto-numbering of the
381 fields. */
382 if (field_name_is_empty)
383 *first_idx = (auto_number->an_field_number)++;
384 }
385
386 return 1;
387 }
388
389
390 /*
391 get_field_object returns the object inside {}, before the
392 format_spec. It handles getindex and getattr lookups and consumes
393 the entire input string.
394 */
395 static PyObject *
get_field_object(SubString * input,PyObject * args,PyObject * kwargs,AutoNumber * auto_number)396 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397 AutoNumber *auto_number)
398 {
399 PyObject *obj = NULL;
400 int ok;
401 int is_attribute;
402 SubString name;
403 SubString first;
404 Py_ssize_t index;
405 FieldNameIterator rest;
406
407 if (!field_name_split(input->str, input->start, input->end, &first,
408 &index, &rest, auto_number)) {
409 goto error;
410 }
411
412 if (index == -1) {
413 /* look up in kwargs */
414 PyObject *key = SubString_new_object(&first);
415 if (key == NULL)
416 goto error;
417
418 /* Use PyObject_GetItem instead of PyDict_GetItem because this
419 code is no longer just used with kwargs. It might be passed
420 a non-dict when called through format_map. */
421 if ((kwargs == NULL) || (obj = PyObject_GetItem(kwargs, key)) == NULL) {
422 PyErr_SetObject(PyExc_KeyError, key);
423 Py_DECREF(key);
424 goto error;
425 }
426 Py_DECREF(key);
427 }
428 else {
429 /* If args is NULL, we have a format string with a positional field
430 with only kwargs to retrieve it from. This can only happen when
431 used with format_map(), where positional arguments are not
432 allowed. */
433 if (args == NULL) {
434 PyErr_SetString(PyExc_ValueError, "Format string contains "
435 "positional fields");
436 goto error;
437 }
438
439 /* look up in args */
440 obj = PySequence_GetItem(args, index);
441 if (obj == NULL)
442 goto error;
443 }
444
445 /* iterate over the rest of the field_name */
446 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
447 &name)) == 2) {
448 PyObject *tmp;
449
450 if (is_attribute)
451 /* getattr lookup "." */
452 tmp = getattr(obj, &name);
453 else
454 /* getitem lookup "[]" */
455 if (index == -1)
456 tmp = getitem_str(obj, &name);
457 else
458 if (PySequence_Check(obj))
459 tmp = getitem_sequence(obj, index);
460 else
461 /* not a sequence */
462 tmp = getitem_idx(obj, index);
463 if (tmp == NULL)
464 goto error;
465
466 /* assign to obj */
467 Py_DECREF(obj);
468 obj = tmp;
469 }
470 /* end of iterator, this is the non-error case */
471 if (ok == 1)
472 return obj;
473 error:
474 Py_XDECREF(obj);
475 return NULL;
476 }
477
478 /************************************************************************/
479 /***************** Field rendering functions **************************/
480 /************************************************************************/
481
482 /*
483 render_field() is the main function in this section. It takes the
484 field object and field specification string generated by
485 get_field_and_spec, and renders the field into the output string.
486
487 render_field calls fieldobj.__format__(format_spec) method, and
488 appends to the output.
489 */
490 static int
render_field(PyObject * fieldobj,SubString * format_spec,_PyUnicodeWriter * writer)491 render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
492 {
493 int ok = 0;
494 PyObject *result = NULL;
495 PyObject *format_spec_object = NULL;
496 int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
497 int err;
498
499 /* If we know the type exactly, skip the lookup of __format__ and just
500 call the formatter directly. */
501 if (PyUnicode_CheckExact(fieldobj))
502 formatter = _PyUnicode_FormatAdvancedWriter;
503 else if (PyLong_CheckExact(fieldobj))
504 formatter = _PyLong_FormatAdvancedWriter;
505 else if (PyFloat_CheckExact(fieldobj))
506 formatter = _PyFloat_FormatAdvancedWriter;
507 else if (PyComplex_CheckExact(fieldobj))
508 formatter = _PyComplex_FormatAdvancedWriter;
509
510 if (formatter) {
511 /* we know exactly which formatter will be called when __format__ is
512 looked up, so call it directly, instead. */
513 err = formatter(writer, fieldobj, format_spec->str,
514 format_spec->start, format_spec->end);
515 return (err == 0);
516 }
517 else {
518 /* We need to create an object out of the pointers we have, because
519 __format__ takes a string/unicode object for format_spec. */
520 if (format_spec->str)
521 format_spec_object = PyUnicode_Substring(format_spec->str,
522 format_spec->start,
523 format_spec->end);
524 else
525 format_spec_object = PyUnicode_New(0, 0);
526 if (format_spec_object == NULL)
527 goto done;
528
529 result = PyObject_Format(fieldobj, format_spec_object);
530 }
531 if (result == NULL)
532 goto done;
533
534 if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
535 goto done;
536 ok = 1;
537
538 done:
539 Py_XDECREF(format_spec_object);
540 Py_XDECREF(result);
541 return ok;
542 }
543
544 static int
parse_field(SubString * str,SubString * field_name,SubString * format_spec,int * format_spec_needs_expanding,Py_UCS4 * conversion)545 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
546 int *format_spec_needs_expanding, Py_UCS4 *conversion)
547 {
548 /* Note this function works if the field name is zero length,
549 which is good. Zero length field names are handled later, in
550 field_name_split. */
551
552 Py_UCS4 c = 0;
553
554 /* initialize these, as they may be empty */
555 *conversion = '\0';
556 SubString_init(format_spec, NULL, 0, 0);
557
558 /* Search for the field name. it's terminated by the end of
559 the string, or a ':' or '!' */
560 field_name->str = str->str;
561 field_name->start = str->start;
562 while (str->start < str->end) {
563 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
564 case '{':
565 PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
566 return 0;
567 case '[':
568 for (; str->start < str->end; str->start++)
569 if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
570 break;
571 continue;
572 case '}':
573 case ':':
574 case '!':
575 break;
576 default:
577 continue;
578 }
579 break;
580 }
581
582 field_name->end = str->start - 1;
583 if (c == '!' || c == ':') {
584 Py_ssize_t count;
585 /* we have a format specifier and/or a conversion */
586 /* don't include the last character */
587
588 /* see if there's a conversion specifier */
589 if (c == '!') {
590 /* there must be another character present */
591 if (str->start >= str->end) {
592 PyErr_SetString(PyExc_ValueError,
593 "end of string while looking for conversion "
594 "specifier");
595 return 0;
596 }
597 *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
598
599 if (str->start < str->end) {
600 c = PyUnicode_READ_CHAR(str->str, str->start++);
601 if (c == '}')
602 return 1;
603 if (c != ':') {
604 PyErr_SetString(PyExc_ValueError,
605 "expected ':' after conversion specifier");
606 return 0;
607 }
608 }
609 }
610 format_spec->str = str->str;
611 format_spec->start = str->start;
612 count = 1;
613 while (str->start < str->end) {
614 switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
615 case '{':
616 *format_spec_needs_expanding = 1;
617 count++;
618 break;
619 case '}':
620 count--;
621 if (count == 0) {
622 format_spec->end = str->start - 1;
623 return 1;
624 }
625 break;
626 default:
627 break;
628 }
629 }
630
631 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
632 return 0;
633 }
634 else if (c != '}') {
635 PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
636 return 0;
637 }
638
639 return 1;
640 }
641
642 /************************************************************************/
643 /******* Output string allocation and escape-to-markup processing ******/
644 /************************************************************************/
645
646 /* MarkupIterator breaks the string into pieces of either literal
647 text, or things inside {} that need to be marked up. it is
648 designed to make it easy to wrap a Python iterator around it, for
649 use with the Formatter class */
650
651 typedef struct {
652 SubString str;
653 } MarkupIterator;
654
655 static int
MarkupIterator_init(MarkupIterator * self,PyObject * str,Py_ssize_t start,Py_ssize_t end)656 MarkupIterator_init(MarkupIterator *self, PyObject *str,
657 Py_ssize_t start, Py_ssize_t end)
658 {
659 SubString_init(&self->str, str, start, end);
660 return 1;
661 }
662
663 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
664 string (or something to be expanded) */
665 static int
MarkupIterator_next(MarkupIterator * self,SubString * literal,int * field_present,SubString * field_name,SubString * format_spec,Py_UCS4 * conversion,int * format_spec_needs_expanding)666 MarkupIterator_next(MarkupIterator *self, SubString *literal,
667 int *field_present, SubString *field_name,
668 SubString *format_spec, Py_UCS4 *conversion,
669 int *format_spec_needs_expanding)
670 {
671 int at_end;
672 Py_UCS4 c = 0;
673 Py_ssize_t start;
674 Py_ssize_t len;
675 int markup_follows = 0;
676
677 /* initialize all of the output variables */
678 SubString_init(literal, NULL, 0, 0);
679 SubString_init(field_name, NULL, 0, 0);
680 SubString_init(format_spec, NULL, 0, 0);
681 *conversion = '\0';
682 *format_spec_needs_expanding = 0;
683 *field_present = 0;
684
685 /* No more input, end of iterator. This is the normal exit
686 path. */
687 if (self->str.start >= self->str.end)
688 return 1;
689
690 start = self->str.start;
691
692 /* First read any literal text. Read until the end of string, an
693 escaped '{' or '}', or an unescaped '{'. In order to never
694 allocate memory and so I can just pass pointers around, if
695 there's an escaped '{' or '}' then we'll return the literal
696 including the brace, but no format object. The next time
697 through, we'll return the rest of the literal, skipping past
698 the second consecutive brace. */
699 while (self->str.start < self->str.end) {
700 switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
701 case '{':
702 case '}':
703 markup_follows = 1;
704 break;
705 default:
706 continue;
707 }
708 break;
709 }
710
711 at_end = self->str.start >= self->str.end;
712 len = self->str.start - start;
713
714 if ((c == '}') && (at_end ||
715 (c != PyUnicode_READ_CHAR(self->str.str,
716 self->str.start)))) {
717 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
718 "in format string");
719 return 0;
720 }
721 if (at_end && c == '{') {
722 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
723 "in format string");
724 return 0;
725 }
726 if (!at_end) {
727 if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
728 /* escaped } or {, skip it in the input. there is no
729 markup object following us, just this literal text */
730 self->str.start++;
731 markup_follows = 0;
732 }
733 else
734 len--;
735 }
736
737 /* record the literal text */
738 literal->str = self->str.str;
739 literal->start = start;
740 literal->end = start + len;
741
742 if (!markup_follows)
743 return 2;
744
745 /* this is markup; parse the field */
746 *field_present = 1;
747 if (!parse_field(&self->str, field_name, format_spec,
748 format_spec_needs_expanding, conversion))
749 return 0;
750 return 2;
751 }
752
753
754 /* do the !r or !s conversion on obj */
755 static PyObject *
do_conversion(PyObject * obj,Py_UCS4 conversion)756 do_conversion(PyObject *obj, Py_UCS4 conversion)
757 {
758 /* XXX in pre-3.0, do we need to convert this to unicode, since it
759 might have returned a string? */
760 switch (conversion) {
761 case 'r':
762 return PyObject_Repr(obj);
763 case 's':
764 return PyObject_Str(obj);
765 case 'a':
766 return PyObject_ASCII(obj);
767 default:
768 if (conversion > 32 && conversion < 127) {
769 /* It's the ASCII subrange; casting to char is safe
770 (assuming the execution character set is an ASCII
771 superset). */
772 PyErr_Format(PyExc_ValueError,
773 "Unknown conversion specifier %c",
774 (char)conversion);
775 } else
776 PyErr_Format(PyExc_ValueError,
777 "Unknown conversion specifier \\x%x",
778 (unsigned int)conversion);
779 return NULL;
780 }
781 }
782
783 /* given:
784
785 {field_name!conversion:format_spec}
786
787 compute the result and write it to output.
788 format_spec_needs_expanding is an optimization. if it's false,
789 just output the string directly, otherwise recursively expand the
790 format_spec string.
791
792 field_name is allowed to be zero length, in which case we
793 are doing auto field numbering.
794 */
795
796 static int
output_markup(SubString * field_name,SubString * format_spec,int format_spec_needs_expanding,Py_UCS4 conversion,_PyUnicodeWriter * writer,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)797 output_markup(SubString *field_name, SubString *format_spec,
798 int format_spec_needs_expanding, Py_UCS4 conversion,
799 _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
800 int recursion_depth, AutoNumber *auto_number)
801 {
802 PyObject *tmp = NULL;
803 PyObject *fieldobj = NULL;
804 SubString expanded_format_spec;
805 SubString *actual_format_spec;
806 int result = 0;
807
808 /* convert field_name to an object */
809 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
810 if (fieldobj == NULL)
811 goto done;
812
813 if (conversion != '\0') {
814 tmp = do_conversion(fieldobj, conversion);
815 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
816 goto done;
817
818 /* do the assignment, transferring ownership: fieldobj = tmp */
819 Py_DECREF(fieldobj);
820 fieldobj = tmp;
821 tmp = NULL;
822 }
823
824 /* if needed, recurively compute the format_spec */
825 if (format_spec_needs_expanding) {
826 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
827 auto_number);
828 if (tmp == NULL || PyUnicode_READY(tmp) == -1)
829 goto done;
830
831 /* note that in the case we're expanding the format string,
832 tmp must be kept around until after the call to
833 render_field. */
834 SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
835 actual_format_spec = &expanded_format_spec;
836 }
837 else
838 actual_format_spec = format_spec;
839
840 if (render_field(fieldobj, actual_format_spec, writer) == 0)
841 goto done;
842
843 result = 1;
844
845 done:
846 Py_XDECREF(fieldobj);
847 Py_XDECREF(tmp);
848
849 return result;
850 }
851
852 /*
853 do_markup is the top-level loop for the format() method. It
854 searches through the format string for escapes to markup codes, and
855 calls other functions to move non-markup text to the output,
856 and to perform the markup to the output.
857 */
858 static int
do_markup(SubString * input,PyObject * args,PyObject * kwargs,_PyUnicodeWriter * writer,int recursion_depth,AutoNumber * auto_number)859 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
860 _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
861 {
862 MarkupIterator iter;
863 int format_spec_needs_expanding;
864 int result;
865 int field_present;
866 SubString literal;
867 SubString field_name;
868 SubString format_spec;
869 Py_UCS4 conversion;
870
871 MarkupIterator_init(&iter, input->str, input->start, input->end);
872 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
873 &field_name, &format_spec,
874 &conversion,
875 &format_spec_needs_expanding)) == 2) {
876 if (literal.end != literal.start) {
877 if (!field_present && iter.str.start == iter.str.end)
878 writer->overallocate = 0;
879 if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
880 literal.start, literal.end) < 0)
881 return 0;
882 }
883
884 if (field_present) {
885 if (iter.str.start == iter.str.end)
886 writer->overallocate = 0;
887 if (!output_markup(&field_name, &format_spec,
888 format_spec_needs_expanding, conversion, writer,
889 args, kwargs, recursion_depth, auto_number))
890 return 0;
891 }
892 }
893 return result;
894 }
895
896
897 /*
898 build_string allocates the output string and then
899 calls do_markup to do the heavy lifting.
900 */
901 static PyObject *
build_string(SubString * input,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)902 build_string(SubString *input, PyObject *args, PyObject *kwargs,
903 int recursion_depth, AutoNumber *auto_number)
904 {
905 _PyUnicodeWriter writer;
906
907 /* check the recursion level */
908 if (recursion_depth <= 0) {
909 PyErr_SetString(PyExc_ValueError,
910 "Max string recursion exceeded");
911 return NULL;
912 }
913
914 _PyUnicodeWriter_Init(&writer);
915 writer.overallocate = 1;
916 writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
917
918 if (!do_markup(input, args, kwargs, &writer, recursion_depth,
919 auto_number)) {
920 _PyUnicodeWriter_Dealloc(&writer);
921 return NULL;
922 }
923
924 return _PyUnicodeWriter_Finish(&writer);
925 }
926
927 /************************************************************************/
928 /*********** main routine ***********************************************/
929 /************************************************************************/
930
931 /* this is the main entry point */
932 static PyObject *
do_string_format(PyObject * self,PyObject * args,PyObject * kwargs)933 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
934 {
935 SubString input;
936
937 /* PEP 3101 says only 2 levels, so that
938 "{0:{1}}".format('abc', 's') # works
939 "{0:{1:{2}}}".format('abc', 's', '') # fails
940 */
941 int recursion_depth = 2;
942
943 AutoNumber auto_number;
944
945 if (PyUnicode_READY(self) == -1)
946 return NULL;
947
948 AutoNumber_Init(&auto_number);
949 SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
950 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
951 }
952
953 static PyObject *
do_string_format_map(PyObject * self,PyObject * obj)954 do_string_format_map(PyObject *self, PyObject *obj)
955 {
956 return do_string_format(self, NULL, obj);
957 }
958
959
960 /************************************************************************/
961 /*********** formatteriterator ******************************************/
962 /************************************************************************/
963
964 /* This is used to implement string.Formatter.vparse(). It exists so
965 Formatter can share code with the built in unicode.format() method.
966 It's really just a wrapper around MarkupIterator that is callable
967 from Python. */
968
969 typedef struct {
970 PyObject_HEAD
971 PyObject *str;
972 MarkupIterator it_markup;
973 } formatteriterobject;
974
975 static void
formatteriter_dealloc(formatteriterobject * it)976 formatteriter_dealloc(formatteriterobject *it)
977 {
978 Py_XDECREF(it->str);
979 PyObject_FREE(it);
980 }
981
982 /* returns a tuple:
983 (literal, field_name, format_spec, conversion)
984
985 literal is any literal text to output. might be zero length
986 field_name is the string before the ':'. might be None
987 format_spec is the string after the ':'. mibht be None
988 conversion is either None, or the string after the '!'
989 */
990 static PyObject *
formatteriter_next(formatteriterobject * it)991 formatteriter_next(formatteriterobject *it)
992 {
993 SubString literal;
994 SubString field_name;
995 SubString format_spec;
996 Py_UCS4 conversion;
997 int format_spec_needs_expanding;
998 int field_present;
999 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1000 &field_name, &format_spec, &conversion,
1001 &format_spec_needs_expanding);
1002
1003 /* all of the SubString objects point into it->str, so no
1004 memory management needs to be done on them */
1005 assert(0 <= result && result <= 2);
1006 if (result == 0 || result == 1)
1007 /* if 0, error has already been set, if 1, iterator is empty */
1008 return NULL;
1009 else {
1010 PyObject *literal_str = NULL;
1011 PyObject *field_name_str = NULL;
1012 PyObject *format_spec_str = NULL;
1013 PyObject *conversion_str = NULL;
1014 PyObject *tuple = NULL;
1015
1016 literal_str = SubString_new_object(&literal);
1017 if (literal_str == NULL)
1018 goto done;
1019
1020 field_name_str = SubString_new_object(&field_name);
1021 if (field_name_str == NULL)
1022 goto done;
1023
1024 /* if field_name is non-zero length, return a string for
1025 format_spec (even if zero length), else return None */
1026 format_spec_str = (field_present ?
1027 SubString_new_object_or_empty :
1028 SubString_new_object)(&format_spec);
1029 if (format_spec_str == NULL)
1030 goto done;
1031
1032 /* if the conversion is not specified, return a None,
1033 otherwise create a one length string with the conversion
1034 character */
1035 if (conversion == '\0') {
1036 conversion_str = Py_None;
1037 Py_INCREF(conversion_str);
1038 }
1039 else
1040 conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1041 &conversion, 1);
1042 if (conversion_str == NULL)
1043 goto done;
1044
1045 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1046 conversion_str);
1047 done:
1048 Py_XDECREF(literal_str);
1049 Py_XDECREF(field_name_str);
1050 Py_XDECREF(format_spec_str);
1051 Py_XDECREF(conversion_str);
1052 return tuple;
1053 }
1054 }
1055
1056 static PyMethodDef formatteriter_methods[] = {
1057 {NULL, NULL} /* sentinel */
1058 };
1059
1060 static PyTypeObject PyFormatterIter_Type = {
1061 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1062 "formatteriterator", /* tp_name */
1063 sizeof(formatteriterobject), /* tp_basicsize */
1064 0, /* tp_itemsize */
1065 /* methods */
1066 (destructor)formatteriter_dealloc, /* tp_dealloc */
1067 0, /* tp_print */
1068 0, /* tp_getattr */
1069 0, /* tp_setattr */
1070 0, /* tp_reserved */
1071 0, /* tp_repr */
1072 0, /* tp_as_number */
1073 0, /* tp_as_sequence */
1074 0, /* tp_as_mapping */
1075 0, /* tp_hash */
1076 0, /* tp_call */
1077 0, /* tp_str */
1078 PyObject_GenericGetAttr, /* tp_getattro */
1079 0, /* tp_setattro */
1080 0, /* tp_as_buffer */
1081 Py_TPFLAGS_DEFAULT, /* tp_flags */
1082 0, /* tp_doc */
1083 0, /* tp_traverse */
1084 0, /* tp_clear */
1085 0, /* tp_richcompare */
1086 0, /* tp_weaklistoffset */
1087 PyObject_SelfIter, /* tp_iter */
1088 (iternextfunc)formatteriter_next, /* tp_iternext */
1089 formatteriter_methods, /* tp_methods */
1090 0,
1091 };
1092
1093 /* unicode_formatter_parser is used to implement
1094 string.Formatter.vformat. it parses a string and returns tuples
1095 describing the parsed elements. It's a wrapper around
1096 stringlib/string_format.h's MarkupIterator */
1097 static PyObject *
formatter_parser(PyObject * ignored,PyObject * self)1098 formatter_parser(PyObject *ignored, PyObject *self)
1099 {
1100 formatteriterobject *it;
1101
1102 if (!PyUnicode_Check(self)) {
1103 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1104 return NULL;
1105 }
1106
1107 if (PyUnicode_READY(self) == -1)
1108 return NULL;
1109
1110 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1111 if (it == NULL)
1112 return NULL;
1113
1114 /* take ownership, give the object to the iterator */
1115 Py_INCREF(self);
1116 it->str = self;
1117
1118 /* initialize the contained MarkupIterator */
1119 MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1120 return (PyObject *)it;
1121 }
1122
1123
1124 /************************************************************************/
1125 /*********** fieldnameiterator ******************************************/
1126 /************************************************************************/
1127
1128
1129 /* This is used to implement string.Formatter.vparse(). It parses the
1130 field name into attribute and item values. It's a Python-callable
1131 wrapper around FieldNameIterator */
1132
1133 typedef struct {
1134 PyObject_HEAD
1135 PyObject *str;
1136 FieldNameIterator it_field;
1137 } fieldnameiterobject;
1138
1139 static void
fieldnameiter_dealloc(fieldnameiterobject * it)1140 fieldnameiter_dealloc(fieldnameiterobject *it)
1141 {
1142 Py_XDECREF(it->str);
1143 PyObject_FREE(it);
1144 }
1145
1146 /* returns a tuple:
1147 (is_attr, value)
1148 is_attr is true if we used attribute syntax (e.g., '.foo')
1149 false if we used index syntax (e.g., '[foo]')
1150 value is an integer or string
1151 */
1152 static PyObject *
fieldnameiter_next(fieldnameiterobject * it)1153 fieldnameiter_next(fieldnameiterobject *it)
1154 {
1155 int result;
1156 int is_attr;
1157 Py_ssize_t idx;
1158 SubString name;
1159
1160 result = FieldNameIterator_next(&it->it_field, &is_attr,
1161 &idx, &name);
1162 if (result == 0 || result == 1)
1163 /* if 0, error has already been set, if 1, iterator is empty */
1164 return NULL;
1165 else {
1166 PyObject* result = NULL;
1167 PyObject* is_attr_obj = NULL;
1168 PyObject* obj = NULL;
1169
1170 is_attr_obj = PyBool_FromLong(is_attr);
1171 if (is_attr_obj == NULL)
1172 goto done;
1173
1174 /* either an integer or a string */
1175 if (idx != -1)
1176 obj = PyLong_FromSsize_t(idx);
1177 else
1178 obj = SubString_new_object(&name);
1179 if (obj == NULL)
1180 goto done;
1181
1182 /* return a tuple of values */
1183 result = PyTuple_Pack(2, is_attr_obj, obj);
1184
1185 done:
1186 Py_XDECREF(is_attr_obj);
1187 Py_XDECREF(obj);
1188 return result;
1189 }
1190 }
1191
1192 static PyMethodDef fieldnameiter_methods[] = {
1193 {NULL, NULL} /* sentinel */
1194 };
1195
1196 static PyTypeObject PyFieldNameIter_Type = {
1197 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1198 "fieldnameiterator", /* tp_name */
1199 sizeof(fieldnameiterobject), /* tp_basicsize */
1200 0, /* tp_itemsize */
1201 /* methods */
1202 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1203 0, /* tp_print */
1204 0, /* tp_getattr */
1205 0, /* tp_setattr */
1206 0, /* tp_reserved */
1207 0, /* tp_repr */
1208 0, /* tp_as_number */
1209 0, /* tp_as_sequence */
1210 0, /* tp_as_mapping */
1211 0, /* tp_hash */
1212 0, /* tp_call */
1213 0, /* tp_str */
1214 PyObject_GenericGetAttr, /* tp_getattro */
1215 0, /* tp_setattro */
1216 0, /* tp_as_buffer */
1217 Py_TPFLAGS_DEFAULT, /* tp_flags */
1218 0, /* tp_doc */
1219 0, /* tp_traverse */
1220 0, /* tp_clear */
1221 0, /* tp_richcompare */
1222 0, /* tp_weaklistoffset */
1223 PyObject_SelfIter, /* tp_iter */
1224 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1225 fieldnameiter_methods, /* tp_methods */
1226 0};
1227
1228 /* unicode_formatter_field_name_split is used to implement
1229 string.Formatter.vformat. it takes a PEP 3101 "field name", and
1230 returns a tuple of (first, rest): "first", the part before the
1231 first '.' or '['; and "rest", an iterator for the rest of the field
1232 name. it's a wrapper around stringlib/string_format.h's
1233 field_name_split. The iterator it returns is a
1234 FieldNameIterator */
1235 static PyObject *
formatter_field_name_split(PyObject * ignored,PyObject * self)1236 formatter_field_name_split(PyObject *ignored, PyObject *self)
1237 {
1238 SubString first;
1239 Py_ssize_t first_idx;
1240 fieldnameiterobject *it;
1241
1242 PyObject *first_obj = NULL;
1243 PyObject *result = NULL;
1244
1245 if (!PyUnicode_Check(self)) {
1246 PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1247 return NULL;
1248 }
1249
1250 if (PyUnicode_READY(self) == -1)
1251 return NULL;
1252
1253 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1254 if (it == NULL)
1255 return NULL;
1256
1257 /* take ownership, give the object to the iterator. this is
1258 just to keep the field_name alive */
1259 Py_INCREF(self);
1260 it->str = self;
1261
1262 /* Pass in auto_number = NULL. We'll return an empty string for
1263 first_obj in that case. */
1264 if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1265 &first, &first_idx, &it->it_field, NULL))
1266 goto done;
1267
1268 /* first becomes an integer, if possible; else a string */
1269 if (first_idx != -1)
1270 first_obj = PyLong_FromSsize_t(first_idx);
1271 else
1272 /* convert "first" into a string object */
1273 first_obj = SubString_new_object(&first);
1274 if (first_obj == NULL)
1275 goto done;
1276
1277 /* return a tuple of values */
1278 result = PyTuple_Pack(2, first_obj, it);
1279
1280 done:
1281 Py_XDECREF(it);
1282 Py_XDECREF(first_obj);
1283 return result;
1284 }
1285