1 /*
2 string_format.h -- implementation of string.format().
3
4 It uses the Objects/stringlib conventions, so that it can be
5 compiled for both unicode and string objects.
6 */
7
8
9 /* Defines for Python 2.6 compatibility */
10 #if PY_VERSION_HEX < 0x03000000
11 #define PyLong_FromSsize_t _PyLong_FromSsize_t
12 #endif
13
14 /* Defines for more efficiently reallocating the string buffer */
15 #define INITIAL_SIZE_INCREMENT 100
16 #define SIZE_MULTIPLIER 2
17 #define MAX_SIZE_INCREMENT 3200
18
19
20 /************************************************************************/
21 /*********** Global data structures and forward declarations *********/
22 /************************************************************************/
23
24 /*
25 A SubString consists of the characters between two string or
26 unicode pointers.
27 */
28 typedef struct {
29 STRINGLIB_CHAR *ptr;
30 STRINGLIB_CHAR *end;
31 } SubString;
32
33
34 typedef enum {
35 ANS_INIT,
36 ANS_AUTO,
37 ANS_MANUAL
38 } AutoNumberState; /* Keep track if we're auto-numbering fields */
39
40 /* Keeps track of our auto-numbering state, and which number field we're on */
41 typedef struct {
42 AutoNumberState an_state;
43 int an_field_number;
44 } AutoNumber;
45
46
47 /* forward declaration for recursion */
48 static PyObject *
49 build_string(SubString *input, PyObject *args, PyObject *kwargs,
50 int recursion_depth, AutoNumber *auto_number);
51
52
53
54 /************************************************************************/
55 /************************** Utility functions ************************/
56 /************************************************************************/
57
58 static void
AutoNumber_Init(AutoNumber * auto_number)59 AutoNumber_Init(AutoNumber *auto_number)
60 {
61 auto_number->an_state = ANS_INIT;
62 auto_number->an_field_number = 0;
63 }
64
65 /* fill in a SubString from a pointer and length */
66 Py_LOCAL_INLINE(void)
SubString_init(SubString * str,STRINGLIB_CHAR * p,Py_ssize_t len)67 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68 {
69 str->ptr = p;
70 if (p == NULL)
71 str->end = NULL;
72 else
73 str->end = str->ptr + len;
74 }
75
76 /* return a new string. if str->ptr is NULL, return None */
77 Py_LOCAL_INLINE(PyObject *)
SubString_new_object(SubString * str)78 SubString_new_object(SubString *str)
79 {
80 if (str->ptr == NULL) {
81 Py_INCREF(Py_None);
82 return Py_None;
83 }
84 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85 }
86
87 /* return a new string. if str->ptr is NULL, return None */
88 Py_LOCAL_INLINE(PyObject *)
SubString_new_object_or_empty(SubString * str)89 SubString_new_object_or_empty(SubString *str)
90 {
91 if (str->ptr == NULL) {
92 return STRINGLIB_NEW(NULL, 0);
93 }
94 return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95 }
96
97 /* Return 1 if an error has been detected switching between automatic
98 field numbering and manual field specification, else return 0. Set
99 ValueError on error. */
100 static int
autonumber_state_error(AutoNumberState state,int field_name_is_empty)101 autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102 {
103 if (state == ANS_MANUAL) {
104 if (field_name_is_empty) {
105 PyErr_SetString(PyExc_ValueError, "cannot switch from "
106 "manual field specification to "
107 "automatic field numbering");
108 return 1;
109 }
110 }
111 else {
112 if (!field_name_is_empty) {
113 PyErr_SetString(PyExc_ValueError, "cannot switch from "
114 "automatic field numbering to "
115 "manual field specification");
116 return 1;
117 }
118 }
119 return 0;
120 }
121
122
123 /************************************************************************/
124 /*********** Output string management functions ****************/
125 /************************************************************************/
126
127 typedef struct {
128 STRINGLIB_CHAR *ptr;
129 STRINGLIB_CHAR *end;
130 PyObject *obj;
131 Py_ssize_t size_increment;
132 } OutputString;
133
134 /* initialize an OutputString object, reserving size characters */
135 static int
output_initialize(OutputString * output,Py_ssize_t size)136 output_initialize(OutputString *output, Py_ssize_t size)
137 {
138 output->obj = STRINGLIB_NEW(NULL, size);
139 if (output->obj == NULL)
140 return 0;
141
142 output->ptr = STRINGLIB_STR(output->obj);
143 output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144 output->size_increment = INITIAL_SIZE_INCREMENT;
145
146 return 1;
147 }
148
149 /*
150 output_extend reallocates the output string buffer.
151 It returns a status: 0 for a failed reallocation,
152 1 for success.
153 */
154
155 static int
output_extend(OutputString * output,Py_ssize_t count)156 output_extend(OutputString *output, Py_ssize_t count)
157 {
158 STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159 Py_ssize_t curlen = output->ptr - startptr;
160 Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162 if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163 return 0;
164 startptr = STRINGLIB_STR(output->obj);
165 output->ptr = startptr + curlen;
166 output->end = startptr + maxlen;
167 if (output->size_increment < MAX_SIZE_INCREMENT)
168 output->size_increment *= SIZE_MULTIPLIER;
169 return 1;
170 }
171
172 /*
173 output_data dumps characters into our output string
174 buffer.
175
176 In some cases, it has to reallocate the string.
177
178 It returns a status: 0 for a failed reallocation,
179 1 for success.
180 */
181 static int
output_data(OutputString * output,const STRINGLIB_CHAR * s,Py_ssize_t count)182 output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183 {
184 if ((count > output->end - output->ptr) && !output_extend(output, count))
185 return 0;
186 memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187 output->ptr += count;
188 return 1;
189 }
190
191 /************************************************************************/
192 /*********** Format string parsing -- integers and identifiers *********/
193 /************************************************************************/
194
195 static Py_ssize_t
get_integer(const SubString * str)196 get_integer(const SubString *str)
197 {
198 Py_ssize_t accumulator = 0;
199 Py_ssize_t digitval;
200 STRINGLIB_CHAR *p;
201
202 /* empty string is an error */
203 if (str->ptr >= str->end)
204 return -1;
205
206 for (p = str->ptr; p < str->end; p++) {
207 digitval = STRINGLIB_TODECIMAL(*p);
208 if (digitval < 0)
209 return -1;
210 /*
211 Detect possible overflow before it happens:
212
213 accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
214 accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
215 */
216 if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
217 PyErr_Format(PyExc_ValueError,
218 "Too many decimal digits in format string");
219 return -1;
220 }
221 accumulator = accumulator * 10 + digitval;
222 }
223 return accumulator;
224 }
225
226 /************************************************************************/
227 /******** Functions to get field objects and specification strings ******/
228 /************************************************************************/
229
230 /* do the equivalent of obj.name */
231 static PyObject *
getattr(PyObject * obj,SubString * name)232 getattr(PyObject *obj, SubString *name)
233 {
234 PyObject *newobj;
235 PyObject *str = SubString_new_object(name);
236 if (str == NULL)
237 return NULL;
238 newobj = PyObject_GetAttr(obj, str);
239 Py_DECREF(str);
240 return newobj;
241 }
242
243 /* do the equivalent of obj[idx], where obj is a sequence */
244 static PyObject *
getitem_sequence(PyObject * obj,Py_ssize_t idx)245 getitem_sequence(PyObject *obj, Py_ssize_t idx)
246 {
247 return PySequence_GetItem(obj, idx);
248 }
249
250 /* do the equivalent of obj[idx], where obj is not a sequence */
251 static PyObject *
getitem_idx(PyObject * obj,Py_ssize_t idx)252 getitem_idx(PyObject *obj, Py_ssize_t idx)
253 {
254 PyObject *newobj;
255 PyObject *idx_obj = PyLong_FromSsize_t(idx);
256 if (idx_obj == NULL)
257 return NULL;
258 newobj = PyObject_GetItem(obj, idx_obj);
259 Py_DECREF(idx_obj);
260 return newobj;
261 }
262
263 /* do the equivalent of obj[name] */
264 static PyObject *
getitem_str(PyObject * obj,SubString * name)265 getitem_str(PyObject *obj, SubString *name)
266 {
267 PyObject *newobj;
268 PyObject *str = SubString_new_object(name);
269 if (str == NULL)
270 return NULL;
271 newobj = PyObject_GetItem(obj, str);
272 Py_DECREF(str);
273 return newobj;
274 }
275
276 typedef struct {
277 /* the entire string we're parsing. we assume that someone else
278 is managing its lifetime, and that it will exist for the
279 lifetime of the iterator. can be empty */
280 SubString str;
281
282 /* pointer to where we are inside field_name */
283 STRINGLIB_CHAR *ptr;
284 } FieldNameIterator;
285
286
287 static int
FieldNameIterator_init(FieldNameIterator * self,STRINGLIB_CHAR * ptr,Py_ssize_t len)288 FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
289 Py_ssize_t len)
290 {
291 SubString_init(&self->str, ptr, len);
292 self->ptr = self->str.ptr;
293 return 1;
294 }
295
296 static int
_FieldNameIterator_attr(FieldNameIterator * self,SubString * name)297 _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
298 {
299 STRINGLIB_CHAR c;
300
301 name->ptr = self->ptr;
302
303 /* return everything until '.' or '[' */
304 while (self->ptr < self->str.end) {
305 switch (c = *self->ptr++) {
306 case '[':
307 case '.':
308 /* backup so that we this character will be seen next time */
309 self->ptr--;
310 break;
311 default:
312 continue;
313 }
314 break;
315 }
316 /* end of string is okay */
317 name->end = self->ptr;
318 return 1;
319 }
320
321 static int
_FieldNameIterator_item(FieldNameIterator * self,SubString * name)322 _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
323 {
324 int bracket_seen = 0;
325 STRINGLIB_CHAR c;
326
327 name->ptr = self->ptr;
328
329 /* return everything until ']' */
330 while (self->ptr < self->str.end) {
331 switch (c = *self->ptr++) {
332 case ']':
333 bracket_seen = 1;
334 break;
335 default:
336 continue;
337 }
338 break;
339 }
340 /* make sure we ended with a ']' */
341 if (!bracket_seen) {
342 PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
343 return 0;
344 }
345
346 /* end of string is okay */
347 /* don't include the ']' */
348 name->end = self->ptr-1;
349 return 1;
350 }
351
352 /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
353 static int
FieldNameIterator_next(FieldNameIterator * self,int * is_attribute,Py_ssize_t * name_idx,SubString * name)354 FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
355 Py_ssize_t *name_idx, SubString *name)
356 {
357 /* check at end of input */
358 if (self->ptr >= self->str.end)
359 return 1;
360
361 switch (*self->ptr++) {
362 case '.':
363 *is_attribute = 1;
364 if (_FieldNameIterator_attr(self, name) == 0)
365 return 0;
366 *name_idx = -1;
367 break;
368 case '[':
369 *is_attribute = 0;
370 if (_FieldNameIterator_item(self, name) == 0)
371 return 0;
372 *name_idx = get_integer(name);
373 if (*name_idx == -1 && PyErr_Occurred())
374 return 0;
375 break;
376 default:
377 /* Invalid character follows ']' */
378 PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
379 "follow ']' in format field specifier");
380 return 0;
381 }
382
383 /* empty string is an error */
384 if (name->ptr == name->end) {
385 PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
386 return 0;
387 }
388
389 return 2;
390 }
391
392
393 /* input: field_name
394 output: 'first' points to the part before the first '[' or '.'
395 'first_idx' is -1 if 'first' is not an integer, otherwise
396 it's the value of first converted to an integer
397 'rest' is an iterator to return the rest
398 */
399 static int
field_name_split(STRINGLIB_CHAR * ptr,Py_ssize_t len,SubString * first,Py_ssize_t * first_idx,FieldNameIterator * rest,AutoNumber * auto_number)400 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
401 Py_ssize_t *first_idx, FieldNameIterator *rest,
402 AutoNumber *auto_number)
403 {
404 STRINGLIB_CHAR c;
405 STRINGLIB_CHAR *p = ptr;
406 STRINGLIB_CHAR *end = ptr + len;
407 int field_name_is_empty;
408 int using_numeric_index;
409
410 /* find the part up until the first '.' or '[' */
411 while (p < end) {
412 switch (c = *p++) {
413 case '[':
414 case '.':
415 /* backup so that we this character is available to the
416 "rest" iterator */
417 p--;
418 break;
419 default:
420 continue;
421 }
422 break;
423 }
424
425 /* set up the return values */
426 SubString_init(first, ptr, p - ptr);
427 FieldNameIterator_init(rest, p, end - p);
428
429 /* see if "first" is an integer, in which case it's used as an index */
430 *first_idx = get_integer(first);
431 if (*first_idx == -1 && PyErr_Occurred())
432 return 0;
433
434 field_name_is_empty = first->ptr >= first->end;
435
436 /* If the field name is omitted or if we have a numeric index
437 specified, then we're doing numeric indexing into args. */
438 using_numeric_index = field_name_is_empty || *first_idx != -1;
439
440 /* We always get here exactly one time for each field we're
441 processing. And we get here in field order (counting by left
442 braces). So this is the perfect place to handle automatic field
443 numbering if the field name is omitted. */
444
445 /* Check if we need to do the auto-numbering. It's not needed if
446 we're called from string.Format routines, because it's handled
447 in that class by itself. */
448 if (auto_number) {
449 /* Initialize our auto numbering state if this is the first
450 time we're either auto-numbering or manually numbering. */
451 if (auto_number->an_state == ANS_INIT && using_numeric_index)
452 auto_number->an_state = field_name_is_empty ?
453 ANS_AUTO : ANS_MANUAL;
454
455 /* Make sure our state is consistent with what we're doing
456 this time through. Only check if we're using a numeric
457 index. */
458 if (using_numeric_index)
459 if (autonumber_state_error(auto_number->an_state,
460 field_name_is_empty))
461 return 0;
462 /* Zero length field means we want to do auto-numbering of the
463 fields. */
464 if (field_name_is_empty)
465 *first_idx = (auto_number->an_field_number)++;
466 }
467
468 return 1;
469 }
470
471
472 /*
473 get_field_object returns the object inside {}, before the
474 format_spec. It handles getindex and getattr lookups and consumes
475 the entire input string.
476 */
477 static PyObject *
get_field_object(SubString * input,PyObject * args,PyObject * kwargs,AutoNumber * auto_number)478 get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
479 AutoNumber *auto_number)
480 {
481 PyObject *obj = NULL;
482 int ok;
483 int is_attribute;
484 SubString name;
485 SubString first;
486 Py_ssize_t index;
487 FieldNameIterator rest;
488
489 if (!field_name_split(input->ptr, input->end - input->ptr, &first,
490 &index, &rest, auto_number)) {
491 goto error;
492 }
493
494 if (index == -1) {
495 /* look up in kwargs */
496 PyObject *key = SubString_new_object(&first);
497 if (key == NULL)
498 goto error;
499 if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
500 PyErr_SetObject(PyExc_KeyError, key);
501 Py_DECREF(key);
502 goto error;
503 }
504 Py_DECREF(key);
505 Py_INCREF(obj);
506 }
507 else {
508 /* look up in args */
509 obj = PySequence_GetItem(args, index);
510 if (obj == NULL)
511 goto error;
512 }
513
514 /* iterate over the rest of the field_name */
515 while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
516 &name)) == 2) {
517 PyObject *tmp;
518
519 if (is_attribute)
520 /* getattr lookup "." */
521 tmp = getattr(obj, &name);
522 else
523 /* getitem lookup "[]" */
524 if (index == -1)
525 tmp = getitem_str(obj, &name);
526 else
527 if (PySequence_Check(obj))
528 tmp = getitem_sequence(obj, index);
529 else
530 /* not a sequence */
531 tmp = getitem_idx(obj, index);
532 if (tmp == NULL)
533 goto error;
534
535 /* assign to obj */
536 Py_DECREF(obj);
537 obj = tmp;
538 }
539 /* end of iterator, this is the non-error case */
540 if (ok == 1)
541 return obj;
542 error:
543 Py_XDECREF(obj);
544 return NULL;
545 }
546
547 /************************************************************************/
548 /***************** Field rendering functions **************************/
549 /************************************************************************/
550
551 /*
552 render_field() is the main function in this section. It takes the
553 field object and field specification string generated by
554 get_field_and_spec, and renders the field into the output string.
555
556 render_field calls fieldobj.__format__(format_spec) method, and
557 appends to the output.
558 */
559 static int
render_field(PyObject * fieldobj,SubString * format_spec,OutputString * output)560 render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
561 {
562 int ok = 0;
563 PyObject *result = NULL;
564 PyObject *format_spec_object = NULL;
565 PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
566 STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
567 format_spec->ptr : NULL;
568 Py_ssize_t format_spec_len = format_spec->ptr ?
569 format_spec->end - format_spec->ptr : 0;
570
571 /* If we know the type exactly, skip the lookup of __format__ and just
572 call the formatter directly. */
573 #if STRINGLIB_IS_UNICODE
574 if (PyUnicode_CheckExact(fieldobj))
575 formatter = _PyUnicode_FormatAdvanced;
576 /* Unfortunately, there's a problem with checking for int, long,
577 and float here. If we're being included as unicode, their
578 formatters expect string format_spec args. For now, just skip
579 this optimization for unicode. This could be fixed, but it's a
580 hassle. */
581 #else
582 if (PyString_CheckExact(fieldobj))
583 formatter = _PyBytes_FormatAdvanced;
584 else if (PyInt_CheckExact(fieldobj))
585 formatter =_PyInt_FormatAdvanced;
586 else if (PyLong_CheckExact(fieldobj))
587 formatter =_PyLong_FormatAdvanced;
588 else if (PyFloat_CheckExact(fieldobj))
589 formatter = _PyFloat_FormatAdvanced;
590 #endif
591
592 if (formatter) {
593 /* we know exactly which formatter will be called when __format__ is
594 looked up, so call it directly, instead. */
595 result = formatter(fieldobj, format_spec_start, format_spec_len);
596 }
597 else {
598 /* We need to create an object out of the pointers we have, because
599 __format__ takes a string/unicode object for format_spec. */
600 format_spec_object = STRINGLIB_NEW(format_spec_start,
601 format_spec_len);
602 if (format_spec_object == NULL)
603 goto done;
604
605 result = PyObject_Format(fieldobj, format_spec_object);
606 }
607 if (result == NULL)
608 goto done;
609
610 #if PY_VERSION_HEX >= 0x03000000
611 assert(PyUnicode_Check(result));
612 #else
613 assert(PyString_Check(result) || PyUnicode_Check(result));
614
615 /* Convert result to our type. We could be str, and result could
616 be unicode */
617 {
618 PyObject *tmp = STRINGLIB_TOSTR(result);
619 if (tmp == NULL)
620 goto done;
621 Py_DECREF(result);
622 result = tmp;
623 }
624 #endif
625
626 ok = output_data(output,
627 STRINGLIB_STR(result), STRINGLIB_LEN(result));
628 done:
629 Py_XDECREF(format_spec_object);
630 Py_XDECREF(result);
631 return ok;
632 }
633
634 static int
parse_field(SubString * str,SubString * field_name,SubString * format_spec,STRINGLIB_CHAR * conversion)635 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
636 STRINGLIB_CHAR *conversion)
637 {
638 /* Note this function works if the field name is zero length,
639 which is good. Zero length field names are handled later, in
640 field_name_split. */
641
642 STRINGLIB_CHAR c = 0;
643
644 /* initialize these, as they may be empty */
645 *conversion = '\0';
646 SubString_init(format_spec, NULL, 0);
647
648 /* Search for the field name. it's terminated by the end of
649 the string, or a ':' or '!' */
650 field_name->ptr = str->ptr;
651 while (str->ptr < str->end) {
652 switch (c = *(str->ptr++)) {
653 case ':':
654 case '!':
655 break;
656 default:
657 continue;
658 }
659 break;
660 }
661
662 if (c == '!' || c == ':') {
663 /* we have a format specifier and/or a conversion */
664 /* don't include the last character */
665 field_name->end = str->ptr-1;
666
667 /* the format specifier is the rest of the string */
668 format_spec->ptr = str->ptr;
669 format_spec->end = str->end;
670
671 /* see if there's a conversion specifier */
672 if (c == '!') {
673 /* there must be another character present */
674 if (format_spec->ptr >= format_spec->end) {
675 PyErr_SetString(PyExc_ValueError,
676 "end of format while looking for conversion "
677 "specifier");
678 return 0;
679 }
680 *conversion = *(format_spec->ptr++);
681
682 /* if there is another character, it must be a colon */
683 if (format_spec->ptr < format_spec->end) {
684 c = *(format_spec->ptr++);
685 if (c != ':') {
686 PyErr_SetString(PyExc_ValueError,
687 "expected ':' after format specifier");
688 return 0;
689 }
690 }
691 }
692 }
693 else
694 /* end of string, there's no format_spec or conversion */
695 field_name->end = str->ptr;
696
697 return 1;
698 }
699
700 /************************************************************************/
701 /******* Output string allocation and escape-to-markup processing ******/
702 /************************************************************************/
703
704 /* MarkupIterator breaks the string into pieces of either literal
705 text, or things inside {} that need to be marked up. it is
706 designed to make it easy to wrap a Python iterator around it, for
707 use with the Formatter class */
708
709 typedef struct {
710 SubString str;
711 } MarkupIterator;
712
713 static int
MarkupIterator_init(MarkupIterator * self,STRINGLIB_CHAR * ptr,Py_ssize_t len)714 MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
715 {
716 SubString_init(&self->str, ptr, len);
717 return 1;
718 }
719
720 /* returns 0 on error, 1 on non-error termination, and 2 if it got a
721 string (or something to be expanded) */
722 static int
MarkupIterator_next(MarkupIterator * self,SubString * literal,int * field_present,SubString * field_name,SubString * format_spec,STRINGLIB_CHAR * conversion,int * format_spec_needs_expanding)723 MarkupIterator_next(MarkupIterator *self, SubString *literal,
724 int *field_present, SubString *field_name,
725 SubString *format_spec, STRINGLIB_CHAR *conversion,
726 int *format_spec_needs_expanding)
727 {
728 int at_end;
729 STRINGLIB_CHAR c = 0;
730 STRINGLIB_CHAR *start;
731 int count;
732 Py_ssize_t len;
733 int markup_follows = 0;
734
735 /* initialize all of the output variables */
736 SubString_init(literal, NULL, 0);
737 SubString_init(field_name, NULL, 0);
738 SubString_init(format_spec, NULL, 0);
739 *conversion = '\0';
740 *format_spec_needs_expanding = 0;
741 *field_present = 0;
742
743 /* No more input, end of iterator. This is the normal exit
744 path. */
745 if (self->str.ptr >= self->str.end)
746 return 1;
747
748 start = self->str.ptr;
749
750 /* First read any literal text. Read until the end of string, an
751 escaped '{' or '}', or an unescaped '{'. In order to never
752 allocate memory and so I can just pass pointers around, if
753 there's an escaped '{' or '}' then we'll return the literal
754 including the brace, but no format object. The next time
755 through, we'll return the rest of the literal, skipping past
756 the second consecutive brace. */
757 while (self->str.ptr < self->str.end) {
758 switch (c = *(self->str.ptr++)) {
759 case '{':
760 case '}':
761 markup_follows = 1;
762 break;
763 default:
764 continue;
765 }
766 break;
767 }
768
769 at_end = self->str.ptr >= self->str.end;
770 len = self->str.ptr - start;
771
772 if ((c == '}') && (at_end || (c != *self->str.ptr))) {
773 PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
774 "in format string");
775 return 0;
776 }
777 if (at_end && c == '{') {
778 PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
779 "in format string");
780 return 0;
781 }
782 if (!at_end) {
783 if (c == *self->str.ptr) {
784 /* escaped } or {, skip it in the input. there is no
785 markup object following us, just this literal text */
786 self->str.ptr++;
787 markup_follows = 0;
788 }
789 else
790 len--;
791 }
792
793 /* record the literal text */
794 literal->ptr = start;
795 literal->end = start + len;
796
797 if (!markup_follows)
798 return 2;
799
800 /* this is markup, find the end of the string by counting nested
801 braces. note that this prohibits escaped braces, so that
802 format_specs cannot have braces in them. */
803 *field_present = 1;
804 count = 1;
805
806 start = self->str.ptr;
807
808 /* we know we can't have a zero length string, so don't worry
809 about that case */
810 while (self->str.ptr < self->str.end) {
811 switch (c = *(self->str.ptr++)) {
812 case '{':
813 /* the format spec needs to be recursively expanded.
814 this is an optimization, and not strictly needed */
815 *format_spec_needs_expanding = 1;
816 count++;
817 break;
818 case '}':
819 count--;
820 if (count <= 0) {
821 /* we're done. parse and get out */
822 SubString s;
823
824 SubString_init(&s, start, self->str.ptr - 1 - start);
825 if (parse_field(&s, field_name, format_spec, conversion) == 0)
826 return 0;
827
828 /* success */
829 return 2;
830 }
831 break;
832 }
833 }
834
835 /* end of string while searching for matching '}' */
836 PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
837 return 0;
838 }
839
840
841 /* do the !r or !s conversion on obj */
842 static PyObject *
do_conversion(PyObject * obj,STRINGLIB_CHAR conversion)843 do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
844 {
845 /* XXX in pre-3.0, do we need to convert this to unicode, since it
846 might have returned a string? */
847 switch (conversion) {
848 case 'r':
849 return PyObject_Repr(obj);
850 case 's':
851 return STRINGLIB_TOSTR(obj);
852 default:
853 if (conversion > 32 && conversion < 127) {
854 /* It's the ASCII subrange; casting to char is safe
855 (assuming the execution character set is an ASCII
856 superset). */
857 PyErr_Format(PyExc_ValueError,
858 "Unknown conversion specifier %c",
859 (char)conversion);
860 } else
861 PyErr_Format(PyExc_ValueError,
862 "Unknown conversion specifier \\x%x",
863 (unsigned int)conversion);
864 return NULL;
865 }
866 }
867
868 /* given:
869
870 {field_name!conversion:format_spec}
871
872 compute the result and write it to output.
873 format_spec_needs_expanding is an optimization. if it's false,
874 just output the string directly, otherwise recursively expand the
875 format_spec string.
876
877 field_name is allowed to be zero length, in which case we
878 are doing auto field numbering.
879 */
880
881 static int
output_markup(SubString * field_name,SubString * format_spec,int format_spec_needs_expanding,STRINGLIB_CHAR conversion,OutputString * output,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)882 output_markup(SubString *field_name, SubString *format_spec,
883 int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
884 OutputString *output, PyObject *args, PyObject *kwargs,
885 int recursion_depth, AutoNumber *auto_number)
886 {
887 PyObject *tmp = NULL;
888 PyObject *fieldobj = NULL;
889 SubString expanded_format_spec;
890 SubString *actual_format_spec;
891 int result = 0;
892
893 /* convert field_name to an object */
894 fieldobj = get_field_object(field_name, args, kwargs, auto_number);
895 if (fieldobj == NULL)
896 goto done;
897
898 if (conversion != '\0') {
899 tmp = do_conversion(fieldobj, conversion);
900 if (tmp == NULL)
901 goto done;
902
903 /* do the assignment, transferring ownership: fieldobj = tmp */
904 Py_DECREF(fieldobj);
905 fieldobj = tmp;
906 tmp = NULL;
907 }
908
909 /* if needed, recurively compute the format_spec */
910 if (format_spec_needs_expanding) {
911 tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
912 auto_number);
913 if (tmp == NULL)
914 goto done;
915
916 /* note that in the case we're expanding the format string,
917 tmp must be kept around until after the call to
918 render_field. */
919 SubString_init(&expanded_format_spec,
920 STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
921 actual_format_spec = &expanded_format_spec;
922 }
923 else
924 actual_format_spec = format_spec;
925
926 if (render_field(fieldobj, actual_format_spec, output) == 0)
927 goto done;
928
929 result = 1;
930
931 done:
932 Py_XDECREF(fieldobj);
933 Py_XDECREF(tmp);
934
935 return result;
936 }
937
938 /*
939 do_markup is the top-level loop for the format() method. It
940 searches through the format string for escapes to markup codes, and
941 calls other functions to move non-markup text to the output,
942 and to perform the markup to the output.
943 */
944 static int
do_markup(SubString * input,PyObject * args,PyObject * kwargs,OutputString * output,int recursion_depth,AutoNumber * auto_number)945 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
946 OutputString *output, int recursion_depth, AutoNumber *auto_number)
947 {
948 MarkupIterator iter;
949 int format_spec_needs_expanding;
950 int result;
951 int field_present;
952 SubString literal;
953 SubString field_name;
954 SubString format_spec;
955 STRINGLIB_CHAR conversion;
956
957 MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
958 while ((result = MarkupIterator_next(&iter, &literal, &field_present,
959 &field_name, &format_spec,
960 &conversion,
961 &format_spec_needs_expanding)) == 2) {
962 if (!output_data(output, literal.ptr, literal.end - literal.ptr))
963 return 0;
964 if (field_present)
965 if (!output_markup(&field_name, &format_spec,
966 format_spec_needs_expanding, conversion, output,
967 args, kwargs, recursion_depth, auto_number))
968 return 0;
969 }
970 return result;
971 }
972
973
974 /*
975 build_string allocates the output string and then
976 calls do_markup to do the heavy lifting.
977 */
978 static PyObject *
build_string(SubString * input,PyObject * args,PyObject * kwargs,int recursion_depth,AutoNumber * auto_number)979 build_string(SubString *input, PyObject *args, PyObject *kwargs,
980 int recursion_depth, AutoNumber *auto_number)
981 {
982 OutputString output;
983 PyObject *result = NULL;
984 Py_ssize_t count;
985
986 output.obj = NULL; /* needed so cleanup code always works */
987
988 /* check the recursion level */
989 if (recursion_depth <= 0) {
990 PyErr_SetString(PyExc_ValueError,
991 "Max string recursion exceeded");
992 goto done;
993 }
994
995 /* initial size is the length of the format string, plus the size
996 increment. seems like a reasonable default */
997 if (!output_initialize(&output,
998 input->end - input->ptr +
999 INITIAL_SIZE_INCREMENT))
1000 goto done;
1001
1002 if (!do_markup(input, args, kwargs, &output, recursion_depth,
1003 auto_number)) {
1004 goto done;
1005 }
1006
1007 count = output.ptr - STRINGLIB_STR(output.obj);
1008 if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1009 goto done;
1010 }
1011
1012 /* transfer ownership to result */
1013 result = output.obj;
1014 output.obj = NULL;
1015
1016 done:
1017 Py_XDECREF(output.obj);
1018 return result;
1019 }
1020
1021 /************************************************************************/
1022 /*********** main routine ***********************************************/
1023 /************************************************************************/
1024
1025 /* this is the main entry point */
1026 static PyObject *
do_string_format(PyObject * self,PyObject * args,PyObject * kwargs)1027 do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1028 {
1029 SubString input;
1030
1031 /* PEP 3101 says only 2 levels, so that
1032 "{0:{1}}".format('abc', 's') # works
1033 "{0:{1:{2}}}".format('abc', 's', '') # fails
1034 */
1035 int recursion_depth = 2;
1036
1037 AutoNumber auto_number;
1038
1039 AutoNumber_Init(&auto_number);
1040 SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1041 return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1042 }
1043
1044
1045
1046 /************************************************************************/
1047 /*********** formatteriterator ******************************************/
1048 /************************************************************************/
1049
1050 /* This is used to implement string.Formatter.vparse(). It exists so
1051 Formatter can share code with the built in unicode.format() method.
1052 It's really just a wrapper around MarkupIterator that is callable
1053 from Python. */
1054
1055 typedef struct {
1056 PyObject_HEAD
1057
1058 STRINGLIB_OBJECT *str;
1059
1060 MarkupIterator it_markup;
1061 } formatteriterobject;
1062
1063 static void
formatteriter_dealloc(formatteriterobject * it)1064 formatteriter_dealloc(formatteriterobject *it)
1065 {
1066 Py_XDECREF(it->str);
1067 PyObject_FREE(it);
1068 }
1069
1070 /* returns a tuple:
1071 (literal, field_name, format_spec, conversion)
1072
1073 literal is any literal text to output. might be zero length
1074 field_name is the string before the ':'. might be None
1075 format_spec is the string after the ':'. mibht be None
1076 conversion is either None, or the string after the '!'
1077 */
1078 static PyObject *
formatteriter_next(formatteriterobject * it)1079 formatteriter_next(formatteriterobject *it)
1080 {
1081 SubString literal;
1082 SubString field_name;
1083 SubString format_spec;
1084 STRINGLIB_CHAR conversion;
1085 int format_spec_needs_expanding;
1086 int field_present;
1087 int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1088 &field_name, &format_spec, &conversion,
1089 &format_spec_needs_expanding);
1090
1091 /* all of the SubString objects point into it->str, so no
1092 memory management needs to be done on them */
1093 assert(0 <= result && result <= 2);
1094 if (result == 0 || result == 1)
1095 /* if 0, error has already been set, if 1, iterator is empty */
1096 return NULL;
1097 else {
1098 PyObject *literal_str = NULL;
1099 PyObject *field_name_str = NULL;
1100 PyObject *format_spec_str = NULL;
1101 PyObject *conversion_str = NULL;
1102 PyObject *tuple = NULL;
1103
1104 literal_str = SubString_new_object(&literal);
1105 if (literal_str == NULL)
1106 goto done;
1107
1108 field_name_str = SubString_new_object(&field_name);
1109 if (field_name_str == NULL)
1110 goto done;
1111
1112 /* if field_name is non-zero length, return a string for
1113 format_spec (even if zero length), else return None */
1114 format_spec_str = (field_present ?
1115 SubString_new_object_or_empty :
1116 SubString_new_object)(&format_spec);
1117 if (format_spec_str == NULL)
1118 goto done;
1119
1120 /* if the conversion is not specified, return a None,
1121 otherwise create a one length string with the conversion
1122 character */
1123 if (conversion == '\0') {
1124 conversion_str = Py_None;
1125 Py_INCREF(conversion_str);
1126 }
1127 else
1128 conversion_str = STRINGLIB_NEW(&conversion, 1);
1129 if (conversion_str == NULL)
1130 goto done;
1131
1132 tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1133 conversion_str);
1134 done:
1135 Py_XDECREF(literal_str);
1136 Py_XDECREF(field_name_str);
1137 Py_XDECREF(format_spec_str);
1138 Py_XDECREF(conversion_str);
1139 return tuple;
1140 }
1141 }
1142
1143 static PyMethodDef formatteriter_methods[] = {
1144 {NULL, NULL} /* sentinel */
1145 };
1146
1147 static PyTypeObject PyFormatterIter_Type = {
1148 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1149 "formatteriterator", /* tp_name */
1150 sizeof(formatteriterobject), /* tp_basicsize */
1151 0, /* tp_itemsize */
1152 /* methods */
1153 (destructor)formatteriter_dealloc, /* tp_dealloc */
1154 0, /* tp_print */
1155 0, /* tp_getattr */
1156 0, /* tp_setattr */
1157 0, /* tp_compare */
1158 0, /* tp_repr */
1159 0, /* tp_as_number */
1160 0, /* tp_as_sequence */
1161 0, /* tp_as_mapping */
1162 0, /* tp_hash */
1163 0, /* tp_call */
1164 0, /* tp_str */
1165 PyObject_GenericGetAttr, /* tp_getattro */
1166 0, /* tp_setattro */
1167 0, /* tp_as_buffer */
1168 Py_TPFLAGS_DEFAULT, /* tp_flags */
1169 0, /* tp_doc */
1170 0, /* tp_traverse */
1171 0, /* tp_clear */
1172 0, /* tp_richcompare */
1173 0, /* tp_weaklistoffset */
1174 PyObject_SelfIter, /* tp_iter */
1175 (iternextfunc)formatteriter_next, /* tp_iternext */
1176 formatteriter_methods, /* tp_methods */
1177 0,
1178 };
1179
1180 /* unicode_formatter_parser is used to implement
1181 string.Formatter.vformat. it parses a string and returns tuples
1182 describing the parsed elements. It's a wrapper around
1183 stringlib/string_format.h's MarkupIterator */
1184 static PyObject *
formatter_parser(STRINGLIB_OBJECT * self)1185 formatter_parser(STRINGLIB_OBJECT *self)
1186 {
1187 formatteriterobject *it;
1188
1189 it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1190 if (it == NULL)
1191 return NULL;
1192
1193 /* take ownership, give the object to the iterator */
1194 Py_INCREF(self);
1195 it->str = self;
1196
1197 /* initialize the contained MarkupIterator */
1198 MarkupIterator_init(&it->it_markup,
1199 STRINGLIB_STR(self),
1200 STRINGLIB_LEN(self));
1201
1202 return (PyObject *)it;
1203 }
1204
1205
1206 /************************************************************************/
1207 /*********** fieldnameiterator ******************************************/
1208 /************************************************************************/
1209
1210
1211 /* This is used to implement string.Formatter.vparse(). It parses the
1212 field name into attribute and item values. It's a Python-callable
1213 wrapper around FieldNameIterator */
1214
1215 typedef struct {
1216 PyObject_HEAD
1217
1218 STRINGLIB_OBJECT *str;
1219
1220 FieldNameIterator it_field;
1221 } fieldnameiterobject;
1222
1223 static void
fieldnameiter_dealloc(fieldnameiterobject * it)1224 fieldnameiter_dealloc(fieldnameiterobject *it)
1225 {
1226 Py_XDECREF(it->str);
1227 PyObject_FREE(it);
1228 }
1229
1230 /* returns a tuple:
1231 (is_attr, value)
1232 is_attr is true if we used attribute syntax (e.g., '.foo')
1233 false if we used index syntax (e.g., '[foo]')
1234 value is an integer or string
1235 */
1236 static PyObject *
fieldnameiter_next(fieldnameiterobject * it)1237 fieldnameiter_next(fieldnameiterobject *it)
1238 {
1239 int result;
1240 int is_attr;
1241 Py_ssize_t idx;
1242 SubString name;
1243
1244 result = FieldNameIterator_next(&it->it_field, &is_attr,
1245 &idx, &name);
1246 if (result == 0 || result == 1)
1247 /* if 0, error has already been set, if 1, iterator is empty */
1248 return NULL;
1249 else {
1250 PyObject* result = NULL;
1251 PyObject* is_attr_obj = NULL;
1252 PyObject* obj = NULL;
1253
1254 is_attr_obj = PyBool_FromLong(is_attr);
1255 if (is_attr_obj == NULL)
1256 goto done;
1257
1258 /* either an integer or a string */
1259 if (idx != -1)
1260 obj = PyLong_FromSsize_t(idx);
1261 else
1262 obj = SubString_new_object(&name);
1263 if (obj == NULL)
1264 goto done;
1265
1266 /* return a tuple of values */
1267 result = PyTuple_Pack(2, is_attr_obj, obj);
1268
1269 done:
1270 Py_XDECREF(is_attr_obj);
1271 Py_XDECREF(obj);
1272 return result;
1273 }
1274 }
1275
1276 static PyMethodDef fieldnameiter_methods[] = {
1277 {NULL, NULL} /* sentinel */
1278 };
1279
1280 static PyTypeObject PyFieldNameIter_Type = {
1281 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1282 "fieldnameiterator", /* tp_name */
1283 sizeof(fieldnameiterobject), /* tp_basicsize */
1284 0, /* tp_itemsize */
1285 /* methods */
1286 (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1287 0, /* tp_print */
1288 0, /* tp_getattr */
1289 0, /* tp_setattr */
1290 0, /* tp_compare */
1291 0, /* tp_repr */
1292 0, /* tp_as_number */
1293 0, /* tp_as_sequence */
1294 0, /* tp_as_mapping */
1295 0, /* tp_hash */
1296 0, /* tp_call */
1297 0, /* tp_str */
1298 PyObject_GenericGetAttr, /* tp_getattro */
1299 0, /* tp_setattro */
1300 0, /* tp_as_buffer */
1301 Py_TPFLAGS_DEFAULT, /* tp_flags */
1302 0, /* tp_doc */
1303 0, /* tp_traverse */
1304 0, /* tp_clear */
1305 0, /* tp_richcompare */
1306 0, /* tp_weaklistoffset */
1307 PyObject_SelfIter, /* tp_iter */
1308 (iternextfunc)fieldnameiter_next, /* tp_iternext */
1309 fieldnameiter_methods, /* tp_methods */
1310 0};
1311
1312 /* unicode_formatter_field_name_split is used to implement
1313 string.Formatter.vformat. it takes a PEP 3101 "field name", and
1314 returns a tuple of (first, rest): "first", the part before the
1315 first '.' or '['; and "rest", an iterator for the rest of the field
1316 name. it's a wrapper around stringlib/string_format.h's
1317 field_name_split. The iterator it returns is a
1318 FieldNameIterator */
1319 static PyObject *
formatter_field_name_split(STRINGLIB_OBJECT * self)1320 formatter_field_name_split(STRINGLIB_OBJECT *self)
1321 {
1322 SubString first;
1323 Py_ssize_t first_idx;
1324 fieldnameiterobject *it;
1325
1326 PyObject *first_obj = NULL;
1327 PyObject *result = NULL;
1328
1329 it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1330 if (it == NULL)
1331 return NULL;
1332
1333 /* take ownership, give the object to the iterator. this is
1334 just to keep the field_name alive */
1335 Py_INCREF(self);
1336 it->str = self;
1337
1338 /* Pass in auto_number = NULL. We'll return an empty string for
1339 first_obj in that case. */
1340 if (!field_name_split(STRINGLIB_STR(self),
1341 STRINGLIB_LEN(self),
1342 &first, &first_idx, &it->it_field, NULL))
1343 goto done;
1344
1345 /* first becomes an integer, if possible; else a string */
1346 if (first_idx != -1)
1347 first_obj = PyLong_FromSsize_t(first_idx);
1348 else
1349 /* convert "first" into a string object */
1350 first_obj = SubString_new_object(&first);
1351 if (first_obj == NULL)
1352 goto done;
1353
1354 /* return a tuple of values */
1355 result = PyTuple_Pack(2, first_obj, it);
1356
1357 done:
1358 Py_XDECREF(it);
1359 Py_XDECREF(first_obj);
1360 return result;
1361 }
1362