• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
3 typedef int Py_ssize_t;
4 #define PY_SSIZE_T_MAX INT_MAX
5 #define PY_SSIZE_T_MIN INT_MIN
6 #endif
7 
8 static Py_ssize_t
9 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
10 static PyObject *
11 ascii_escape_unicode(PyObject *pystr);
12 static PyObject *
13 ascii_escape_str(PyObject *pystr);
14 static PyObject *
15 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
16 void init_speedups(void);
17 
18 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
19 
20 #define MIN_EXPANSION 6
21 #ifdef Py_UNICODE_WIDE
22 #define MAX_EXPANSION (2 * MIN_EXPANSION)
23 #else
24 #define MAX_EXPANSION MIN_EXPANSION
25 #endif
26 
27 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)28 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
29     Py_UNICODE x;
30     output[chars++] = '\\';
31     switch (c) {
32         case '/': output[chars++] = (char)c; break;
33         case '\\': output[chars++] = (char)c; break;
34         case '"': output[chars++] = (char)c; break;
35         case '\b': output[chars++] = 'b'; break;
36         case '\f': output[chars++] = 'f'; break;
37         case '\n': output[chars++] = 'n'; break;
38         case '\r': output[chars++] = 'r'; break;
39         case '\t': output[chars++] = 't'; break;
40         default:
41 #ifdef Py_UNICODE_WIDE
42             if (c >= 0x10000) {
43                 /* UTF-16 surrogate pair */
44                 Py_UNICODE v = c - 0x10000;
45                 c = 0xd800 | ((v >> 10) & 0x3ff);
46                 output[chars++] = 'u';
47                 x = (c & 0xf000) >> 12;
48                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
49                 x = (c & 0x0f00) >> 8;
50                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
51                 x = (c & 0x00f0) >> 4;
52                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
53                 x = (c & 0x000f);
54                 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
55                 c = 0xdc00 | (v & 0x3ff);
56                 output[chars++] = '\\';
57             }
58 #endif
59             output[chars++] = 'u';
60             x = (c & 0xf000) >> 12;
61             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
62             x = (c & 0x0f00) >> 8;
63             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
64             x = (c & 0x00f0) >> 4;
65             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
66             x = (c & 0x000f);
67             output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
68     }
69     return chars;
70 }
71 
72 static PyObject *
ascii_escape_unicode(PyObject * pystr)73 ascii_escape_unicode(PyObject *pystr) {
74     Py_ssize_t i;
75     Py_ssize_t input_chars;
76     Py_ssize_t output_size;
77     Py_ssize_t chars;
78     PyObject *rval;
79     char *output;
80     Py_UNICODE *input_unicode;
81 
82     input_chars = PyUnicode_GET_SIZE(pystr);
83     input_unicode = PyUnicode_AS_UNICODE(pystr);
84     /* One char input can be up to 6 chars output, estimate 4 of these */
85     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
86     rval = PyString_FromStringAndSize(NULL, output_size);
87     if (rval == NULL) {
88         return NULL;
89     }
90     output = PyString_AS_STRING(rval);
91     chars = 0;
92     output[chars++] = '"';
93     for (i = 0; i < input_chars; i++) {
94         Py_UNICODE c = input_unicode[i];
95         if (S_CHAR(c)) {
96             output[chars++] = (char)c;
97         } else {
98             chars = ascii_escape_char(c, output, chars);
99         }
100         if (output_size - chars < (1 + MAX_EXPANSION)) {
101             /* There's more than four, so let's resize by a lot */
102             output_size *= 2;
103             /* This is an upper bound */
104             if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
105                 output_size = 2 + (input_chars * MAX_EXPANSION);
106             }
107             if (_PyString_Resize(&rval, output_size) == -1) {
108                 return NULL;
109             }
110             output = PyString_AS_STRING(rval);
111         }
112     }
113     output[chars++] = '"';
114     if (_PyString_Resize(&rval, chars) == -1) {
115         return NULL;
116     }
117     return rval;
118 }
119 
120 static PyObject *
ascii_escape_str(PyObject * pystr)121 ascii_escape_str(PyObject *pystr) {
122     Py_ssize_t i;
123     Py_ssize_t input_chars;
124     Py_ssize_t output_size;
125     Py_ssize_t chars;
126     PyObject *rval;
127     char *output;
128     char *input_str;
129 
130     input_chars = PyString_GET_SIZE(pystr);
131     input_str = PyString_AS_STRING(pystr);
132     /* One char input can be up to 6 chars output, estimate 4 of these */
133     output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
134     rval = PyString_FromStringAndSize(NULL, output_size);
135     if (rval == NULL) {
136         return NULL;
137     }
138     output = PyString_AS_STRING(rval);
139     chars = 0;
140     output[chars++] = '"';
141     for (i = 0; i < input_chars; i++) {
142         Py_UNICODE c = (Py_UNICODE)input_str[i];
143         if (S_CHAR(c)) {
144             output[chars++] = (char)c;
145         } else if (c > 0x7F) {
146             /* We hit a non-ASCII character, bail to unicode mode */
147             PyObject *uni;
148             Py_DECREF(rval);
149             uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
150             if (uni == NULL) {
151                 return NULL;
152             }
153             rval = ascii_escape_unicode(uni);
154             Py_DECREF(uni);
155             return rval;
156         } else {
157             chars = ascii_escape_char(c, output, chars);
158         }
159         /* An ASCII char can't possibly expand to a surrogate! */
160         if (output_size - chars < (1 + MIN_EXPANSION)) {
161             /* There's more than four, so let's resize by a lot */
162             output_size *= 2;
163             if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
164                 output_size = 2 + (input_chars * MIN_EXPANSION);
165             }
166             if (_PyString_Resize(&rval, output_size) == -1) {
167                 return NULL;
168             }
169             output = PyString_AS_STRING(rval);
170         }
171     }
172     output[chars++] = '"';
173     if (_PyString_Resize(&rval, chars) == -1) {
174         return NULL;
175     }
176     return rval;
177 }
178 
179 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
180     "encode_basestring_ascii(basestring) -> str\n"
181     "\n"
182     "..."
183 );
184 
185 static PyObject *
py_encode_basestring_ascii(PyObject * self,PyObject * pystr)186 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
187     /* METH_O */
188     if (PyString_Check(pystr)) {
189         return ascii_escape_str(pystr);
190     } else if (PyUnicode_Check(pystr)) {
191         return ascii_escape_unicode(pystr);
192     }
193     PyErr_SetString(PyExc_TypeError, "first argument must be a string");
194     return NULL;
195 }
196 
197 #define DEFN(n, k) \
198     {  \
199         #n, \
200         (PyCFunction)py_ ##n, \
201         k, \
202         pydoc_ ##n \
203     }
204 static PyMethodDef speedups_methods[] = {
205     DEFN(encode_basestring_ascii, METH_O),
206     {}
207 };
208 #undef DEFN
209 
210 void
init_speedups(void)211 init_speedups(void)
212 {
213     PyObject *m;
214     m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
215 }
216