• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* strop module */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include <ctype.h>
6 
7 PyDoc_STRVAR(strop_module__doc__,
8 "Common string manipulations, optimized for speed.\n"
9 "\n"
10 "Always use \"import string\" rather than referencing\n"
11 "this module directly.");
12 
13 /* XXX This file assumes that the <ctype.h> is*() functions
14    XXX are defined for all 8-bit characters! */
15 
16 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17                "strop functions are obsolete; use string methods")) \
18          return NULL
19 
20 /* The lstrip(), rstrip() and strip() functions are implemented
21    in do_strip(), which uses an additional parameter to indicate what
22    type of strip should occur. */
23 
24 #define LEFTSTRIP 0
25 #define RIGHTSTRIP 1
26 #define BOTHSTRIP 2
27 
28 
29 static PyObject *
split_whitespace(char * s,Py_ssize_t len,Py_ssize_t maxsplit)30 split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
31 {
32     Py_ssize_t i = 0, j;
33     int err;
34     Py_ssize_t countsplit = 0;
35     PyObject* item;
36     PyObject *list = PyList_New(0);
37 
38     if (list == NULL)
39         return NULL;
40 
41     while (i < len) {
42         while (i < len && isspace(Py_CHARMASK(s[i]))) {
43             i = i+1;
44         }
45         j = i;
46         while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47             i = i+1;
48         }
49         if (j < i) {
50             item = PyString_FromStringAndSize(s+j, i-j);
51             if (item == NULL)
52                 goto finally;
53 
54             err = PyList_Append(list, item);
55             Py_DECREF(item);
56             if (err < 0)
57                 goto finally;
58 
59             countsplit++;
60             while (i < len && isspace(Py_CHARMASK(s[i]))) {
61                 i = i+1;
62             }
63             if (maxsplit && (countsplit >= maxsplit) && i < len) {
64                 item = PyString_FromStringAndSize(
65                     s+i, len - i);
66                 if (item == NULL)
67                     goto finally;
68 
69                 err = PyList_Append(list, item);
70                 Py_DECREF(item);
71                 if (err < 0)
72                     goto finally;
73 
74                 i = len;
75             }
76         }
77     }
78     return list;
79   finally:
80     Py_DECREF(list);
81     return NULL;
82 }
83 
84 
85 PyDoc_STRVAR(splitfields__doc__,
86 "split(s [,sep [,maxsplit]]) -> list of strings\n"
87 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88 "\n"
89 "Return a list of the words in the string s, using sep as the\n"
90 "delimiter string.  If maxsplit is nonzero, splits into at most\n"
91 "maxsplit words.  If sep is not specified, any whitespace string\n"
92 "is a separator.  Maxsplit defaults to 0.\n"
93 "\n"
94 "(split and splitfields are synonymous)");
95 
96 static PyObject *
strop_splitfields(PyObject * self,PyObject * args)97 strop_splitfields(PyObject *self, PyObject *args)
98 {
99     Py_ssize_t len, n, i, j, err;
100     Py_ssize_t splitcount, maxsplit;
101     char *s, *sub;
102     PyObject *list, *item;
103 
104     WARN;
105     sub = NULL;
106     n = 0;
107     splitcount = 0;
108     maxsplit = 0;
109     if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
110         return NULL;
111     if (sub == NULL)
112         return split_whitespace(s, len, maxsplit);
113     if (n == 0) {
114         PyErr_SetString(PyExc_ValueError, "empty separator");
115         return NULL;
116     }
117 
118     list = PyList_New(0);
119     if (list == NULL)
120         return NULL;
121 
122     i = j = 0;
123     while (i+n <= len) {
124         if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125             item = PyString_FromStringAndSize(s+j, i-j);
126             if (item == NULL)
127                 goto fail;
128             err = PyList_Append(list, item);
129             Py_DECREF(item);
130             if (err < 0)
131                 goto fail;
132             i = j = i + n;
133             splitcount++;
134             if (maxsplit && (splitcount >= maxsplit))
135                 break;
136         }
137         else
138             i++;
139     }
140     item = PyString_FromStringAndSize(s+j, len-j);
141     if (item == NULL)
142         goto fail;
143     err = PyList_Append(list, item);
144     Py_DECREF(item);
145     if (err < 0)
146         goto fail;
147 
148     return list;
149 
150  fail:
151     Py_DECREF(list);
152     return NULL;
153 }
154 
155 
156 PyDoc_STRVAR(joinfields__doc__,
157 "join(list [,sep]) -> string\n"
158 "joinfields(list [,sep]) -> string\n"
159 "\n"
160 "Return a string composed of the words in list, with\n"
161 "intervening occurrences of sep.  Sep defaults to a single\n"
162 "space.\n"
163 "\n"
164 "(join and joinfields are synonymous)");
165 
166 static PyObject *
strop_joinfields(PyObject * self,PyObject * args)167 strop_joinfields(PyObject *self, PyObject *args)
168 {
169     PyObject *seq;
170     char *sep = NULL;
171     Py_ssize_t seqlen, seplen = 0;
172     Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173     PyObject *res = NULL;
174     char* p = NULL;
175     ssizeargfunc getitemfunc;
176 
177     WARN;
178     if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
179         return NULL;
180     if (sep == NULL) {
181         sep = " ";
182         seplen = 1;
183     }
184 
185     seqlen = PySequence_Size(seq);
186     if (seqlen < 0 && PyErr_Occurred())
187         return NULL;
188 
189     if (seqlen == 1) {
190         /* Optimization if there's only one item */
191         PyObject *item = PySequence_GetItem(seq, 0);
192         if (item && !PyString_Check(item)) {
193             PyErr_SetString(PyExc_TypeError,
194                      "first argument must be sequence of strings");
195             Py_DECREF(item);
196             return NULL;
197         }
198         return item;
199     }
200 
201     if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202         return NULL;
203     p = PyString_AsString(res);
204 
205     /* optimize for lists, since it's the most common case.  all others
206      * (tuples and arbitrary sequences) just use the sequence abstract
207      * interface.
208      */
209     if (PyList_Check(seq)) {
210         for (i = 0; i < seqlen; i++) {
211             PyObject *item = PyList_GET_ITEM(seq, i);
212             if (!PyString_Check(item)) {
213                 PyErr_SetString(PyExc_TypeError,
214                 "first argument must be sequence of strings");
215                 Py_DECREF(res);
216                 return NULL;
217             }
218             slen = PyString_GET_SIZE(item);
219             if (slen > PY_SSIZE_T_MAX - reslen ||
220                 seplen > PY_SSIZE_T_MAX - reslen - seplen) {
221                 PyErr_SetString(PyExc_OverflowError,
222                                 "input too long");
223                 Py_DECREF(res);
224                 return NULL;
225             }
226             while (reslen + slen + seplen >= sz) {
227                 if (_PyString_Resize(&res, sz * 2) < 0)
228                     return NULL;
229                 sz *= 2;
230                 p = PyString_AsString(res) + reslen;
231             }
232             if (i > 0) {
233                 memcpy(p, sep, seplen);
234                 p += seplen;
235                 reslen += seplen;
236             }
237             memcpy(p, PyString_AS_STRING(item), slen);
238             p += slen;
239             reslen += slen;
240         }
241         _PyString_Resize(&res, reslen);
242         return res;
243     }
244 
245     if (seq->ob_type->tp_as_sequence == NULL ||
246              (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
247     {
248         PyErr_SetString(PyExc_TypeError,
249                         "first argument must be a sequence");
250         return NULL;
251     }
252     /* This is now type safe */
253     for (i = 0; i < seqlen; i++) {
254         PyObject *item = getitemfunc(seq, i);
255         if (!item || !PyString_Check(item)) {
256             PyErr_SetString(PyExc_TypeError,
257                      "first argument must be sequence of strings");
258             Py_DECREF(res);
259             Py_XDECREF(item);
260             return NULL;
261         }
262         slen = PyString_GET_SIZE(item);
263         if (slen > PY_SSIZE_T_MAX - reslen ||
264             seplen > PY_SSIZE_T_MAX - reslen - seplen) {
265             PyErr_SetString(PyExc_OverflowError,
266                             "input too long");
267             Py_DECREF(res);
268             Py_XDECREF(item);
269             return NULL;
270         }
271         while (reslen + slen + seplen >= sz) {
272             if (_PyString_Resize(&res, sz * 2) < 0) {
273                 Py_DECREF(item);
274                 return NULL;
275             }
276             sz *= 2;
277             p = PyString_AsString(res) + reslen;
278         }
279         if (i > 0) {
280             memcpy(p, sep, seplen);
281             p += seplen;
282             reslen += seplen;
283         }
284         memcpy(p, PyString_AS_STRING(item), slen);
285         p += slen;
286         reslen += slen;
287         Py_DECREF(item);
288     }
289     _PyString_Resize(&res, reslen);
290     return res;
291 }
292 
293 
294 PyDoc_STRVAR(find__doc__,
295 "find(s, sub [,start [,end]]) -> in\n"
296 "\n"
297 "Return the lowest index in s where substring sub is found,\n"
298 "such that sub is contained within s[start,end].  Optional\n"
299 "arguments start and end are interpreted as in slice notation.\n"
300 "\n"
301 "Return -1 on failure.");
302 
303 static PyObject *
strop_find(PyObject * self,PyObject * args)304 strop_find(PyObject *self, PyObject *args)
305 {
306     char *s, *sub;
307     Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
308 
309     WARN;
310     if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
311         return NULL;
312 
313     if (last > len)
314         last = len;
315     if (last < 0)
316         last += len;
317     if (last < 0)
318         last = 0;
319     if (i < 0)
320         i += len;
321     if (i < 0)
322         i = 0;
323 
324     if (n == 0 && i <= last)
325         return PyInt_FromLong((long)i);
326 
327     last -= n;
328     for (; i <= last; ++i)
329         if (s[i] == sub[0] &&
330             (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
331             return PyInt_FromLong((long)i);
332 
333     return PyInt_FromLong(-1L);
334 }
335 
336 
337 PyDoc_STRVAR(rfind__doc__,
338 "rfind(s, sub [,start [,end]]) -> int\n"
339 "\n"
340 "Return the highest index in s where substring sub is found,\n"
341 "such that sub is contained within s[start,end].  Optional\n"
342 "arguments start and end are interpreted as in slice notation.\n"
343 "\n"
344 "Return -1 on failure.");
345 
346 static PyObject *
strop_rfind(PyObject * self,PyObject * args)347 strop_rfind(PyObject *self, PyObject *args)
348 {
349     char *s, *sub;
350     Py_ssize_t len, n, j;
351     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
352 
353     WARN;
354     if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
355         return NULL;
356 
357     if (last > len)
358         last = len;
359     if (last < 0)
360         last += len;
361     if (last < 0)
362         last = 0;
363     if (i < 0)
364         i += len;
365     if (i < 0)
366         i = 0;
367 
368     if (n == 0 && i <= last)
369         return PyInt_FromLong((long)last);
370 
371     for (j = last-n; j >= i; --j)
372         if (s[j] == sub[0] &&
373             (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
374             return PyInt_FromLong((long)j);
375 
376     return PyInt_FromLong(-1L);
377 }
378 
379 
380 static PyObject *
do_strip(PyObject * args,int striptype)381 do_strip(PyObject *args, int striptype)
382 {
383     char *s;
384     Py_ssize_t len, i, j;
385 
386 
387     if (PyString_AsStringAndSize(args, &s, &len))
388         return NULL;
389 
390     i = 0;
391     if (striptype != RIGHTSTRIP) {
392         while (i < len && isspace(Py_CHARMASK(s[i]))) {
393             i++;
394         }
395     }
396 
397     j = len;
398     if (striptype != LEFTSTRIP) {
399         do {
400             j--;
401         } while (j >= i && isspace(Py_CHARMASK(s[j])));
402         j++;
403     }
404 
405     if (i == 0 && j == len) {
406         Py_INCREF(args);
407         return args;
408     }
409     else
410         return PyString_FromStringAndSize(s+i, j-i);
411 }
412 
413 
414 PyDoc_STRVAR(strip__doc__,
415 "strip(s) -> string\n"
416 "\n"
417 "Return a copy of the string s with leading and trailing\n"
418 "whitespace removed.");
419 
420 static PyObject *
strop_strip(PyObject * self,PyObject * args)421 strop_strip(PyObject *self, PyObject *args)
422 {
423     WARN;
424     return do_strip(args, BOTHSTRIP);
425 }
426 
427 
428 PyDoc_STRVAR(lstrip__doc__,
429 "lstrip(s) -> string\n"
430 "\n"
431 "Return a copy of the string s with leading whitespace removed.");
432 
433 static PyObject *
strop_lstrip(PyObject * self,PyObject * args)434 strop_lstrip(PyObject *self, PyObject *args)
435 {
436     WARN;
437     return do_strip(args, LEFTSTRIP);
438 }
439 
440 
441 PyDoc_STRVAR(rstrip__doc__,
442 "rstrip(s) -> string\n"
443 "\n"
444 "Return a copy of the string s with trailing whitespace removed.");
445 
446 static PyObject *
strop_rstrip(PyObject * self,PyObject * args)447 strop_rstrip(PyObject *self, PyObject *args)
448 {
449     WARN;
450     return do_strip(args, RIGHTSTRIP);
451 }
452 
453 
454 PyDoc_STRVAR(lower__doc__,
455 "lower(s) -> string\n"
456 "\n"
457 "Return a copy of the string s converted to lowercase.");
458 
459 static PyObject *
strop_lower(PyObject * self,PyObject * args)460 strop_lower(PyObject *self, PyObject *args)
461 {
462     char *s, *s_new;
463     Py_ssize_t i, n;
464     PyObject *newstr;
465     int changed;
466 
467     WARN;
468     if (PyString_AsStringAndSize(args, &s, &n))
469         return NULL;
470     newstr = PyString_FromStringAndSize(NULL, n);
471     if (newstr == NULL)
472         return NULL;
473     s_new = PyString_AsString(newstr);
474     changed = 0;
475     for (i = 0; i < n; i++) {
476         int c = Py_CHARMASK(*s++);
477         if (isupper(c)) {
478             changed = 1;
479             *s_new = tolower(c);
480         } else
481             *s_new = c;
482         s_new++;
483     }
484     if (!changed) {
485         Py_DECREF(newstr);
486         Py_INCREF(args);
487         return args;
488     }
489     return newstr;
490 }
491 
492 
493 PyDoc_STRVAR(upper__doc__,
494 "upper(s) -> string\n"
495 "\n"
496 "Return a copy of the string s converted to uppercase.");
497 
498 static PyObject *
strop_upper(PyObject * self,PyObject * args)499 strop_upper(PyObject *self, PyObject *args)
500 {
501     char *s, *s_new;
502     Py_ssize_t i, n;
503     PyObject *newstr;
504     int changed;
505 
506     WARN;
507     if (PyString_AsStringAndSize(args, &s, &n))
508         return NULL;
509     newstr = PyString_FromStringAndSize(NULL, n);
510     if (newstr == NULL)
511         return NULL;
512     s_new = PyString_AsString(newstr);
513     changed = 0;
514     for (i = 0; i < n; i++) {
515         int c = Py_CHARMASK(*s++);
516         if (islower(c)) {
517             changed = 1;
518             *s_new = toupper(c);
519         } else
520             *s_new = c;
521         s_new++;
522     }
523     if (!changed) {
524         Py_DECREF(newstr);
525         Py_INCREF(args);
526         return args;
527     }
528     return newstr;
529 }
530 
531 
532 PyDoc_STRVAR(capitalize__doc__,
533 "capitalize(s) -> string\n"
534 "\n"
535 "Return a copy of the string s with only its first character\n"
536 "capitalized.");
537 
538 static PyObject *
strop_capitalize(PyObject * self,PyObject * args)539 strop_capitalize(PyObject *self, PyObject *args)
540 {
541     char *s, *s_new;
542     Py_ssize_t i, n;
543     PyObject *newstr;
544     int changed;
545 
546     WARN;
547     if (PyString_AsStringAndSize(args, &s, &n))
548         return NULL;
549     newstr = PyString_FromStringAndSize(NULL, n);
550     if (newstr == NULL)
551         return NULL;
552     s_new = PyString_AsString(newstr);
553     changed = 0;
554     if (0 < n) {
555         int c = Py_CHARMASK(*s++);
556         if (islower(c)) {
557             changed = 1;
558             *s_new = toupper(c);
559         } else
560             *s_new = c;
561         s_new++;
562     }
563     for (i = 1; i < n; i++) {
564         int c = Py_CHARMASK(*s++);
565         if (isupper(c)) {
566             changed = 1;
567             *s_new = tolower(c);
568         } else
569             *s_new = c;
570         s_new++;
571     }
572     if (!changed) {
573         Py_DECREF(newstr);
574         Py_INCREF(args);
575         return args;
576     }
577     return newstr;
578 }
579 
580 
581 PyDoc_STRVAR(expandtabs__doc__,
582 "expandtabs(string, [tabsize]) -> string\n"
583 "\n"
584 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
585 "depending on the current column and the given tab size (default 8).\n"
586 "The column number is reset to zero after each newline occurring in the\n"
587 "string.  This doesn't understand other non-printing characters.");
588 
589 static PyObject *
strop_expandtabs(PyObject * self,PyObject * args)590 strop_expandtabs(PyObject *self, PyObject *args)
591 {
592     /* Original by Fredrik Lundh */
593     char* e;
594     char* p;
595     char* q;
596     Py_ssize_t i, j;
597     PyObject* out;
598     char* string;
599     Py_ssize_t stringlen;
600     int tabsize = 8;
601 
602     WARN;
603     /* Get arguments */
604     if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
605         return NULL;
606     if (tabsize < 1) {
607         PyErr_SetString(PyExc_ValueError,
608                         "tabsize must be at least 1");
609         return NULL;
610     }
611 
612     /* First pass: determine size of output string */
613     i = j = 0; /* j: current column; i: total of previous lines */
614     e = string + stringlen;
615     for (p = string; p < e; p++) {
616         if (*p == '\t') {
617             Py_ssize_t incr = tabsize - (j%tabsize);
618             if (j > PY_SSIZE_T_MAX - incr)
619                 goto overflow;
620             j += incr;
621         } else {
622             if (j > PY_SSIZE_T_MAX - 1)
623                 goto overflow;
624             j++;
625             if (*p == '\n') {
626                 if (i > PY_SSIZE_T_MAX - j)
627                     goto overflow;
628                 i += j;
629                 j = 0;
630             }
631         }
632     }
633 
634     if (i > PY_SSIZE_T_MAX - j)
635         goto overflow;
636 
637     /* Second pass: create output string and fill it */
638     out = PyString_FromStringAndSize(NULL, i+j);
639     if (out == NULL)
640         return NULL;
641 
642     i = 0;
643     q = PyString_AS_STRING(out);
644 
645     for (p = string; p < e; p++) {
646         if (*p == '\t') {
647             j = tabsize - (i%tabsize);
648             i += j;
649             while (j-- > 0)
650                 *q++ = ' ';
651         } else {
652             *q++ = *p;
653             i++;
654             if (*p == '\n')
655                 i = 0;
656         }
657     }
658 
659     return out;
660   overflow:
661     PyErr_SetString(PyExc_OverflowError, "result is too long");
662     return NULL;
663 }
664 
665 
666 PyDoc_STRVAR(count__doc__,
667 "count(s, sub[, start[, end]]) -> int\n"
668 "\n"
669 "Return the number of occurrences of substring sub in string\n"
670 "s[start:end].  Optional arguments start and end are\n"
671 "interpreted as in slice notation.");
672 
673 static PyObject *
strop_count(PyObject * self,PyObject * args)674 strop_count(PyObject *self, PyObject *args)
675 {
676     char *s, *sub;
677     Py_ssize_t len, n;
678     Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
679     Py_ssize_t m, r;
680 
681     WARN;
682     if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
683         return NULL;
684     if (last > len)
685         last = len;
686     if (last < 0)
687         last += len;
688     if (last < 0)
689         last = 0;
690     if (i < 0)
691         i += len;
692     if (i < 0)
693         i = 0;
694     m = last + 1 - n;
695     if (n == 0)
696         return PyInt_FromLong((long) (m-i));
697 
698     r = 0;
699     while (i < m) {
700         if (!memcmp(s+i, sub, n)) {
701             r++;
702             i += n;
703         } else {
704             i++;
705         }
706     }
707     return PyInt_FromLong((long) r);
708 }
709 
710 
711 PyDoc_STRVAR(swapcase__doc__,
712 "swapcase(s) -> string\n"
713 "\n"
714 "Return a copy of the string s with upper case characters\n"
715 "converted to lowercase and vice versa.");
716 
717 static PyObject *
strop_swapcase(PyObject * self,PyObject * args)718 strop_swapcase(PyObject *self, PyObject *args)
719 {
720     char *s, *s_new;
721     Py_ssize_t i, n;
722     PyObject *newstr;
723     int changed;
724 
725     WARN;
726     if (PyString_AsStringAndSize(args, &s, &n))
727         return NULL;
728     newstr = PyString_FromStringAndSize(NULL, n);
729     if (newstr == NULL)
730         return NULL;
731     s_new = PyString_AsString(newstr);
732     changed = 0;
733     for (i = 0; i < n; i++) {
734         int c = Py_CHARMASK(*s++);
735         if (islower(c)) {
736             changed = 1;
737             *s_new = toupper(c);
738         }
739         else if (isupper(c)) {
740             changed = 1;
741             *s_new = tolower(c);
742         }
743         else
744             *s_new = c;
745         s_new++;
746     }
747     if (!changed) {
748         Py_DECREF(newstr);
749         Py_INCREF(args);
750         return args;
751     }
752     return newstr;
753 }
754 
755 
756 PyDoc_STRVAR(atoi__doc__,
757 "atoi(s [,base]) -> int\n"
758 "\n"
759 "Return the integer represented by the string s in the given\n"
760 "base, which defaults to 10.  The string s must consist of one\n"
761 "or more digits, possibly preceded by a sign.  If base is 0, it\n"
762 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
763 "0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
764 "accepted.");
765 
766 static PyObject *
strop_atoi(PyObject * self,PyObject * args)767 strop_atoi(PyObject *self, PyObject *args)
768 {
769     char *s, *end;
770     int base = 10;
771     long x;
772     char buffer[256]; /* For errors */
773 
774     WARN;
775     if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
776         return NULL;
777 
778     if ((base != 0 && base < 2) || base > 36) {
779         PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
780         return NULL;
781     }
782 
783     while (*s && isspace(Py_CHARMASK(*s)))
784         s++;
785     errno = 0;
786     if (base == 0 && s[0] == '0')
787         x = (long) PyOS_strtoul(s, &end, base);
788     else
789         x = PyOS_strtol(s, &end, base);
790     if (end == s || !isalnum(Py_CHARMASK(end[-1])))
791         goto bad;
792     while (*end && isspace(Py_CHARMASK(*end)))
793         end++;
794     if (*end != '\0') {
795   bad:
796         PyOS_snprintf(buffer, sizeof(buffer),
797                       "invalid literal for atoi(): %.200s", s);
798         PyErr_SetString(PyExc_ValueError, buffer);
799         return NULL;
800     }
801     else if (errno != 0) {
802         PyOS_snprintf(buffer, sizeof(buffer),
803                       "atoi() literal too large: %.200s", s);
804         PyErr_SetString(PyExc_ValueError, buffer);
805         return NULL;
806     }
807     return PyInt_FromLong(x);
808 }
809 
810 
811 PyDoc_STRVAR(atol__doc__,
812 "atol(s [,base]) -> long\n"
813 "\n"
814 "Return the long integer represented by the string s in the\n"
815 "given base, which defaults to 10.  The string s must consist\n"
816 "of one or more digits, possibly preceded by a sign.  If base\n"
817 "is 0, it is chosen from the leading characters of s, 0 for\n"
818 "octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
819 "0x or 0X is accepted.  A trailing L or l is not accepted,\n"
820 "unless base is 0.");
821 
822 static PyObject *
strop_atol(PyObject * self,PyObject * args)823 strop_atol(PyObject *self, PyObject *args)
824 {
825     char *s, *end;
826     int base = 10;
827     PyObject *x;
828     char buffer[256]; /* For errors */
829 
830     WARN;
831     if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
832         return NULL;
833 
834     if ((base != 0 && base < 2) || base > 36) {
835         PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
836         return NULL;
837     }
838 
839     while (*s && isspace(Py_CHARMASK(*s)))
840         s++;
841     if (s[0] == '\0') {
842         PyErr_SetString(PyExc_ValueError, "empty string for atol()");
843         return NULL;
844     }
845     x = PyLong_FromString(s, &end, base);
846     if (x == NULL)
847         return NULL;
848     if (base == 0 && (*end == 'l' || *end == 'L'))
849         end++;
850     while (*end && isspace(Py_CHARMASK(*end)))
851         end++;
852     if (*end != '\0') {
853         PyOS_snprintf(buffer, sizeof(buffer),
854                       "invalid literal for atol(): %.200s", s);
855         PyErr_SetString(PyExc_ValueError, buffer);
856         Py_DECREF(x);
857         return NULL;
858     }
859     return x;
860 }
861 
862 
863 PyDoc_STRVAR(atof__doc__,
864 "atof(s) -> float\n"
865 "\n"
866 "Return the floating point number represented by the string s.");
867 
868 static PyObject *
strop_atof(PyObject * self,PyObject * args)869 strop_atof(PyObject *self, PyObject *args)
870 {
871     char *s, *end;
872     double x;
873     char buffer[256]; /* For errors */
874 
875     WARN;
876     if (!PyArg_ParseTuple(args, "s:atof", &s))
877         return NULL;
878     while (*s && isspace(Py_CHARMASK(*s)))
879         s++;
880     if (s[0] == '\0') {
881         PyErr_SetString(PyExc_ValueError, "empty string for atof()");
882         return NULL;
883     }
884 
885     PyFPE_START_PROTECT("strop_atof", return 0)
886     x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
887     PyFPE_END_PROTECT(x)
888     if (x == -1 && PyErr_Occurred())
889         return NULL;
890     while (*end && isspace(Py_CHARMASK(*end)))
891         end++;
892     if (*end != '\0') {
893         PyOS_snprintf(buffer, sizeof(buffer),
894                       "invalid literal for atof(): %.200s", s);
895         PyErr_SetString(PyExc_ValueError, buffer);
896         return NULL;
897     }
898     return PyFloat_FromDouble(x);
899 }
900 
901 
902 PyDoc_STRVAR(maketrans__doc__,
903 "maketrans(frm, to) -> string\n"
904 "\n"
905 "Return a translation table (a string of 256 bytes long)\n"
906 "suitable for use in string.translate.  The strings frm and to\n"
907 "must be of the same length.");
908 
909 static PyObject *
strop_maketrans(PyObject * self,PyObject * args)910 strop_maketrans(PyObject *self, PyObject *args)
911 {
912     unsigned char *c, *from=NULL, *to=NULL;
913     Py_ssize_t i, fromlen=0, tolen=0;
914     PyObject *result;
915 
916     if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
917         return NULL;
918 
919     if (fromlen != tolen) {
920         PyErr_SetString(PyExc_ValueError,
921                         "maketrans arguments must have same length");
922         return NULL;
923     }
924 
925     result = PyString_FromStringAndSize((char *)NULL, 256);
926     if (result == NULL)
927         return NULL;
928     c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
929     for (i = 0; i < 256; i++)
930         c[i]=(unsigned char)i;
931     for (i = 0; i < fromlen; i++)
932         c[from[i]]=to[i];
933 
934     return result;
935 }
936 
937 
938 PyDoc_STRVAR(translate__doc__,
939 "translate(s,table [,deletechars]) -> string\n"
940 "\n"
941 "Return a copy of the string s, where all characters occurring\n"
942 "in the optional argument deletechars are removed, and the\n"
943 "remaining characters have been mapped through the given\n"
944 "translation table, which must be a string of length 256.");
945 
946 static PyObject *
strop_translate(PyObject * self,PyObject * args)947 strop_translate(PyObject *self, PyObject *args)
948 {
949     register char *input, *table, *output;
950     Py_ssize_t i;
951     int c, changed = 0;
952     PyObject *input_obj;
953     char *table1, *output_start, *del_table=NULL;
954     Py_ssize_t inlen, tablen, dellen = 0;
955     PyObject *result;
956     int trans_table[256];
957 
958     WARN;
959     if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
960                           &table1, &tablen, &del_table, &dellen))
961         return NULL;
962     if (tablen != 256) {
963         PyErr_SetString(PyExc_ValueError,
964                       "translation table must be 256 characters long");
965         return NULL;
966     }
967 
968     table = table1;
969     inlen = PyString_GET_SIZE(input_obj);
970     result = PyString_FromStringAndSize((char *)NULL, inlen);
971     if (result == NULL)
972         return NULL;
973     output_start = output = PyString_AsString(result);
974     input = PyString_AsString(input_obj);
975 
976     if (dellen == 0) {
977         /* If no deletions are required, use faster code */
978         for (i = inlen; --i >= 0; ) {
979             c = Py_CHARMASK(*input++);
980             if (Py_CHARMASK((*output++ = table[c])) != c)
981                 changed = 1;
982         }
983         if (changed)
984             return result;
985         Py_DECREF(result);
986         Py_INCREF(input_obj);
987         return input_obj;
988     }
989 
990     for (i = 0; i < 256; i++)
991         trans_table[i] = Py_CHARMASK(table[i]);
992 
993     for (i = 0; i < dellen; i++)
994         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
995 
996     for (i = inlen; --i >= 0; ) {
997         c = Py_CHARMASK(*input++);
998         if (trans_table[c] != -1)
999             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1000                 continue;
1001         changed = 1;
1002     }
1003     if (!changed) {
1004         Py_DECREF(result);
1005         Py_INCREF(input_obj);
1006         return input_obj;
1007     }
1008     /* Fix the size of the resulting string */
1009     if (inlen > 0)
1010         _PyString_Resize(&result, output - output_start);
1011     return result;
1012 }
1013 
1014 
1015 /* What follows is used for implementing replace().  Perry Stoll. */
1016 
1017 /*
1018   mymemfind
1019 
1020   strstr replacement for arbitrary blocks of memory.
1021 
1022   Locates the first occurrence in the memory pointed to by MEM of the
1023   contents of memory pointed to by PAT.  Returns the index into MEM if
1024   found, or -1 if not found.  If len of PAT is greater than length of
1025   MEM, the function returns -1.
1026 */
1027 static Py_ssize_t
mymemfind(const char * mem,Py_ssize_t len,const char * pat,Py_ssize_t pat_len)1028 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1029 {
1030     register Py_ssize_t ii;
1031 
1032     /* pattern can not occur in the last pat_len-1 chars */
1033     len -= pat_len;
1034 
1035     for (ii = 0; ii <= len; ii++) {
1036         if (mem[ii] == pat[0] &&
1037             (pat_len == 1 ||
1038              memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1039             return ii;
1040         }
1041     }
1042     return -1;
1043 }
1044 
1045 /*
1046   mymemcnt
1047 
1048    Return the number of distinct times PAT is found in MEM.
1049    meaning mem=1111 and pat==11 returns 2.
1050        mem=11111 and pat==11 also return 2.
1051  */
1052 static Py_ssize_t
mymemcnt(const char * mem,Py_ssize_t len,const char * pat,Py_ssize_t pat_len)1053 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1054 {
1055     register Py_ssize_t offset = 0;
1056     Py_ssize_t nfound = 0;
1057 
1058     while (len >= 0) {
1059         offset = mymemfind(mem, len, pat, pat_len);
1060         if (offset == -1)
1061             break;
1062         mem += offset + pat_len;
1063         len -= offset + pat_len;
1064         nfound++;
1065     }
1066     return nfound;
1067 }
1068 
1069 /*
1070    mymemreplace
1071 
1072    Return a string in which all occurrences of PAT in memory STR are
1073    replaced with SUB.
1074 
1075    If length of PAT is less than length of STR or there are no occurrences
1076    of PAT in STR, then the original string is returned. Otherwise, a new
1077    string is allocated here and returned.
1078 
1079    on return, out_len is:
1080        the length of output string, or
1081        -1 if the input string is returned, or
1082        unchanged if an error occurs (no memory).
1083 
1084    return value is:
1085        the new string allocated locally, or
1086        NULL if an error occurred.
1087 */
1088 static char *
mymemreplace(const char * str,Py_ssize_t len,const char * pat,Py_ssize_t pat_len,const char * sub,Py_ssize_t sub_len,Py_ssize_t count,Py_ssize_t * out_len)1089 mymemreplace(const char *str, Py_ssize_t len,           /* input string */
1090          const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
1091          const char *sub, Py_ssize_t sub_len,           /* substitution string */
1092          Py_ssize_t count,                              /* number of replacements */
1093          Py_ssize_t *out_len)
1094 {
1095     char *out_s;
1096     char *new_s;
1097     Py_ssize_t nfound, offset, new_len, delta_len, abs_delta;
1098 
1099     if (len == 0 || pat_len > len)
1100         goto return_same;
1101 
1102     /* find length of output string */
1103     nfound = mymemcnt(str, len, pat, pat_len);
1104     if (count < 0)
1105         count = PY_SSIZE_T_MAX;
1106     else if (nfound > count)
1107         nfound = count;
1108     if (nfound == 0)
1109         goto return_same;
1110 
1111     delta_len = sub_len - pat_len;
1112     abs_delta = (delta_len < 0) ? -delta_len : delta_len;
1113     if (PY_SSIZE_T_MAX/nfound < abs_delta)
1114         return NULL;
1115     delta_len *= nfound;
1116     if (PY_SSIZE_T_MAX - len < delta_len)
1117         return NULL;
1118     new_len = len + delta_len;
1119     if (new_len == 0) {
1120         /* Have to allocate something for the caller to free(). */
1121         out_s = (char *)PyMem_MALLOC(1);
1122         if (out_s == NULL)
1123             return NULL;
1124         out_s[0] = '\0';
1125     }
1126     else {
1127         assert(new_len > 0);
1128         new_s = (char *)PyMem_MALLOC(new_len);
1129         if (new_s == NULL)
1130             return NULL;
1131         out_s = new_s;
1132 
1133         for (; count > 0 && len > 0; --count) {
1134             /* find index of next instance of pattern */
1135             offset = mymemfind(str, len, pat, pat_len);
1136             if (offset == -1)
1137                 break;
1138 
1139             /* copy non matching part of input string */
1140             memcpy(new_s, str, offset);
1141             str += offset + pat_len;
1142             len -= offset + pat_len;
1143 
1144             /* copy substitute into the output string */
1145             new_s += offset;
1146             memcpy(new_s, sub, sub_len);
1147             new_s += sub_len;
1148         }
1149         /* copy any remaining values into output string */
1150         if (len > 0)
1151             memcpy(new_s, str, len);
1152     }
1153     *out_len = new_len;
1154     return out_s;
1155 
1156   return_same:
1157     *out_len = -1;
1158     return (char *)str; /* cast away const */
1159 }
1160 
1161 
1162 PyDoc_STRVAR(replace__doc__,
1163 "replace (str, old, new[, maxsplit]) -> string\n"
1164 "\n"
1165 "Return a copy of string str with all occurrences of substring\n"
1166 "old replaced by new. If the optional argument maxsplit is\n"
1167 "given, only the first maxsplit occurrences are replaced.");
1168 
1169 static PyObject *
strop_replace(PyObject * self,PyObject * args)1170 strop_replace(PyObject *self, PyObject *args)
1171 {
1172     char *str, *pat,*sub,*new_s;
1173     Py_ssize_t len,pat_len,sub_len,out_len;
1174     Py_ssize_t count = -1;
1175     PyObject *newstr;
1176 
1177     WARN;
1178     if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1179                           &str, &len, &pat, &pat_len, &sub, &sub_len,
1180                           &count))
1181         return NULL;
1182     if (pat_len <= 0) {
1183         PyErr_SetString(PyExc_ValueError, "empty pattern string");
1184         return NULL;
1185     }
1186     /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
1187      * current (2.1) string.py and string methods.  Preserve this for
1188      * ... well, hard to say for what <wink>.
1189      */
1190     if (count == 0)
1191         count = -1;
1192     new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1193     if (new_s == NULL) {
1194         PyErr_NoMemory();
1195         return NULL;
1196     }
1197     if (out_len == -1) {
1198         /* we're returning another reference to the input string */
1199         newstr = PyTuple_GetItem(args, 0);
1200         Py_XINCREF(newstr);
1201     }
1202     else {
1203         newstr = PyString_FromStringAndSize(new_s, out_len);
1204         PyMem_FREE(new_s);
1205     }
1206     return newstr;
1207 }
1208 
1209 
1210 /* List of functions defined in the module */
1211 
1212 static PyMethodDef
1213 strop_methods[] = {
1214     {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
1215     {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
1216     {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
1217     {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
1218     {"count",           strop_count,       METH_VARARGS, count__doc__},
1219     {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
1220     {"find",            strop_find,        METH_VARARGS, find__doc__},
1221     {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
1222     {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
1223     {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
1224     {"lower",           strop_lower,       METH_O,       lower__doc__},
1225     {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
1226     {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
1227     {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
1228     {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
1229     {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
1230     {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
1231     {"strip",           strop_strip,       METH_O,       strip__doc__},
1232     {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
1233     {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
1234     {"upper",           strop_upper,       METH_O,       upper__doc__},
1235     {NULL,              NULL}   /* sentinel */
1236 };
1237 
1238 
1239 PyMODINIT_FUNC
initstrop(void)1240 initstrop(void)
1241 {
1242     PyObject *m, *s;
1243     char buf[256];
1244     int c, n;
1245     m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1246                        (PyObject*)NULL, PYTHON_API_VERSION);
1247     if (m == NULL)
1248         return;
1249 
1250     /* Create 'whitespace' object */
1251     n = 0;
1252     for (c = 0; c < 256; c++) {
1253         if (isspace(c))
1254             buf[n++] = c;
1255     }
1256     s = PyString_FromStringAndSize(buf, n);
1257     if (s)
1258         PyModule_AddObject(m, "whitespace", s);
1259 
1260     /* Create 'lowercase' object */
1261     n = 0;
1262     for (c = 0; c < 256; c++) {
1263         if (islower(c))
1264             buf[n++] = c;
1265     }
1266     s = PyString_FromStringAndSize(buf, n);
1267     if (s)
1268         PyModule_AddObject(m, "lowercase", s);
1269 
1270     /* Create 'uppercase' object */
1271     n = 0;
1272     for (c = 0; c < 256; c++) {
1273         if (isupper(c))
1274             buf[n++] = c;
1275     }
1276     s = PyString_FromStringAndSize(buf, n);
1277     if (s)
1278         PyModule_AddObject(m, "uppercase", s);
1279 }
1280