1 #include "Python.h"
2 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
3 typedef int Py_ssize_t;
4 #define PY_SSIZE_T_MAX INT_MAX
5 #define PY_SSIZE_T_MIN INT_MIN
6 #endif
7
8 static Py_ssize_t
9 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
10 static PyObject *
11 ascii_escape_unicode(PyObject *pystr);
12 static PyObject *
13 ascii_escape_str(PyObject *pystr);
14 static PyObject *
15 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
16 void init_speedups(void);
17
18 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
19
20 #define MIN_EXPANSION 6
21 #ifdef Py_UNICODE_WIDE
22 #define MAX_EXPANSION (2 * MIN_EXPANSION)
23 #else
24 #define MAX_EXPANSION MIN_EXPANSION
25 #endif
26
27 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)28 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
29 Py_UNICODE x;
30 output[chars++] = '\\';
31 switch (c) {
32 case '/': output[chars++] = (char)c; break;
33 case '\\': output[chars++] = (char)c; break;
34 case '"': output[chars++] = (char)c; break;
35 case '\b': output[chars++] = 'b'; break;
36 case '\f': output[chars++] = 'f'; break;
37 case '\n': output[chars++] = 'n'; break;
38 case '\r': output[chars++] = 'r'; break;
39 case '\t': output[chars++] = 't'; break;
40 default:
41 #ifdef Py_UNICODE_WIDE
42 if (c >= 0x10000) {
43 /* UTF-16 surrogate pair */
44 Py_UNICODE v = c - 0x10000;
45 c = 0xd800 | ((v >> 10) & 0x3ff);
46 output[chars++] = 'u';
47 x = (c & 0xf000) >> 12;
48 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
49 x = (c & 0x0f00) >> 8;
50 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
51 x = (c & 0x00f0) >> 4;
52 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
53 x = (c & 0x000f);
54 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
55 c = 0xdc00 | (v & 0x3ff);
56 output[chars++] = '\\';
57 }
58 #endif
59 output[chars++] = 'u';
60 x = (c & 0xf000) >> 12;
61 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
62 x = (c & 0x0f00) >> 8;
63 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
64 x = (c & 0x00f0) >> 4;
65 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
66 x = (c & 0x000f);
67 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
68 }
69 return chars;
70 }
71
72 static PyObject *
ascii_escape_unicode(PyObject * pystr)73 ascii_escape_unicode(PyObject *pystr) {
74 Py_ssize_t i;
75 Py_ssize_t input_chars;
76 Py_ssize_t output_size;
77 Py_ssize_t chars;
78 PyObject *rval;
79 char *output;
80 Py_UNICODE *input_unicode;
81
82 input_chars = PyUnicode_GET_SIZE(pystr);
83 input_unicode = PyUnicode_AS_UNICODE(pystr);
84 /* One char input can be up to 6 chars output, estimate 4 of these */
85 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
86 rval = PyString_FromStringAndSize(NULL, output_size);
87 if (rval == NULL) {
88 return NULL;
89 }
90 output = PyString_AS_STRING(rval);
91 chars = 0;
92 output[chars++] = '"';
93 for (i = 0; i < input_chars; i++) {
94 Py_UNICODE c = input_unicode[i];
95 if (S_CHAR(c)) {
96 output[chars++] = (char)c;
97 } else {
98 chars = ascii_escape_char(c, output, chars);
99 }
100 if (output_size - chars < (1 + MAX_EXPANSION)) {
101 /* There's more than four, so let's resize by a lot */
102 output_size *= 2;
103 /* This is an upper bound */
104 if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
105 output_size = 2 + (input_chars * MAX_EXPANSION);
106 }
107 if (_PyString_Resize(&rval, output_size) == -1) {
108 return NULL;
109 }
110 output = PyString_AS_STRING(rval);
111 }
112 }
113 output[chars++] = '"';
114 if (_PyString_Resize(&rval, chars) == -1) {
115 return NULL;
116 }
117 return rval;
118 }
119
120 static PyObject *
ascii_escape_str(PyObject * pystr)121 ascii_escape_str(PyObject *pystr) {
122 Py_ssize_t i;
123 Py_ssize_t input_chars;
124 Py_ssize_t output_size;
125 Py_ssize_t chars;
126 PyObject *rval;
127 char *output;
128 char *input_str;
129
130 input_chars = PyString_GET_SIZE(pystr);
131 input_str = PyString_AS_STRING(pystr);
132 /* One char input can be up to 6 chars output, estimate 4 of these */
133 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
134 rval = PyString_FromStringAndSize(NULL, output_size);
135 if (rval == NULL) {
136 return NULL;
137 }
138 output = PyString_AS_STRING(rval);
139 chars = 0;
140 output[chars++] = '"';
141 for (i = 0; i < input_chars; i++) {
142 Py_UNICODE c = (Py_UNICODE)input_str[i];
143 if (S_CHAR(c)) {
144 output[chars++] = (char)c;
145 } else if (c > 0x7F) {
146 /* We hit a non-ASCII character, bail to unicode mode */
147 PyObject *uni;
148 Py_DECREF(rval);
149 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
150 if (uni == NULL) {
151 return NULL;
152 }
153 rval = ascii_escape_unicode(uni);
154 Py_DECREF(uni);
155 return rval;
156 } else {
157 chars = ascii_escape_char(c, output, chars);
158 }
159 /* An ASCII char can't possibly expand to a surrogate! */
160 if (output_size - chars < (1 + MIN_EXPANSION)) {
161 /* There's more than four, so let's resize by a lot */
162 output_size *= 2;
163 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
164 output_size = 2 + (input_chars * MIN_EXPANSION);
165 }
166 if (_PyString_Resize(&rval, output_size) == -1) {
167 return NULL;
168 }
169 output = PyString_AS_STRING(rval);
170 }
171 }
172 output[chars++] = '"';
173 if (_PyString_Resize(&rval, chars) == -1) {
174 return NULL;
175 }
176 return rval;
177 }
178
179 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
180 "encode_basestring_ascii(basestring) -> str\n"
181 "\n"
182 "..."
183 );
184
185 static PyObject *
py_encode_basestring_ascii(PyObject * self,PyObject * pystr)186 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
187 /* METH_O */
188 if (PyString_Check(pystr)) {
189 return ascii_escape_str(pystr);
190 } else if (PyUnicode_Check(pystr)) {
191 return ascii_escape_unicode(pystr);
192 }
193 PyErr_SetString(PyExc_TypeError, "first argument must be a string");
194 return NULL;
195 }
196
197 #define DEFN(n, k) \
198 { \
199 #n, \
200 (PyCFunction)py_ ##n, \
201 k, \
202 pydoc_ ##n \
203 }
204 static PyMethodDef speedups_methods[] = {
205 DEFN(encode_basestring_ascii, METH_O),
206 {}
207 };
208 #undef DEFN
209
210 void
init_speedups(void)211 init_speedups(void)
212 {
213 PyObject *m;
214 m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
215 }
216