1 #include <Python.h>
2
3 static PyObject* markup;
4
5 static int
init_constants(void)6 init_constants(void)
7 {
8 PyObject *module;
9
10 /* import markup type so that we can mark the return value */
11 module = PyImport_ImportModule("markupsafe");
12 if (!module)
13 return 0;
14 markup = PyObject_GetAttrString(module, "Markup");
15 Py_DECREF(module);
16
17 return 1;
18 }
19
20 #define GET_DELTA(inp, inp_end, delta) \
21 while (inp < inp_end) { \
22 switch (*inp++) { \
23 case '"': \
24 case '\'': \
25 case '&': \
26 delta += 4; \
27 break; \
28 case '<': \
29 case '>': \
30 delta += 3; \
31 break; \
32 } \
33 }
34
35 #define DO_ESCAPE(inp, inp_end, outp) \
36 { \
37 Py_ssize_t ncopy = 0; \
38 while (inp < inp_end) { \
39 switch (*inp) { \
40 case '"': \
41 memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
42 outp += ncopy; ncopy = 0; \
43 *outp++ = '&'; \
44 *outp++ = '#'; \
45 *outp++ = '3'; \
46 *outp++ = '4'; \
47 *outp++ = ';'; \
48 break; \
49 case '\'': \
50 memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
51 outp += ncopy; ncopy = 0; \
52 *outp++ = '&'; \
53 *outp++ = '#'; \
54 *outp++ = '3'; \
55 *outp++ = '9'; \
56 *outp++ = ';'; \
57 break; \
58 case '&': \
59 memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
60 outp += ncopy; ncopy = 0; \
61 *outp++ = '&'; \
62 *outp++ = 'a'; \
63 *outp++ = 'm'; \
64 *outp++ = 'p'; \
65 *outp++ = ';'; \
66 break; \
67 case '<': \
68 memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
69 outp += ncopy; ncopy = 0; \
70 *outp++ = '&'; \
71 *outp++ = 'l'; \
72 *outp++ = 't'; \
73 *outp++ = ';'; \
74 break; \
75 case '>': \
76 memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
77 outp += ncopy; ncopy = 0; \
78 *outp++ = '&'; \
79 *outp++ = 'g'; \
80 *outp++ = 't'; \
81 *outp++ = ';'; \
82 break; \
83 default: \
84 ncopy++; \
85 } \
86 inp++; \
87 } \
88 memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
89 }
90
91 static PyObject*
escape_unicode_kind1(PyUnicodeObject * in)92 escape_unicode_kind1(PyUnicodeObject *in)
93 {
94 Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
95 Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
96 Py_UCS1 *outp;
97 PyObject *out;
98 Py_ssize_t delta = 0;
99
100 GET_DELTA(inp, inp_end, delta);
101 if (!delta) {
102 Py_INCREF(in);
103 return (PyObject*)in;
104 }
105
106 out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
107 PyUnicode_IS_ASCII(in) ? 127 : 255);
108 if (!out)
109 return NULL;
110
111 inp = PyUnicode_1BYTE_DATA(in);
112 outp = PyUnicode_1BYTE_DATA(out);
113 DO_ESCAPE(inp, inp_end, outp);
114 return out;
115 }
116
117 static PyObject*
escape_unicode_kind2(PyUnicodeObject * in)118 escape_unicode_kind2(PyUnicodeObject *in)
119 {
120 Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
121 Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
122 Py_UCS2 *outp;
123 PyObject *out;
124 Py_ssize_t delta = 0;
125
126 GET_DELTA(inp, inp_end, delta);
127 if (!delta) {
128 Py_INCREF(in);
129 return (PyObject*)in;
130 }
131
132 out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
133 if (!out)
134 return NULL;
135
136 inp = PyUnicode_2BYTE_DATA(in);
137 outp = PyUnicode_2BYTE_DATA(out);
138 DO_ESCAPE(inp, inp_end, outp);
139 return out;
140 }
141
142
143 static PyObject*
escape_unicode_kind4(PyUnicodeObject * in)144 escape_unicode_kind4(PyUnicodeObject *in)
145 {
146 Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
147 Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
148 Py_UCS4 *outp;
149 PyObject *out;
150 Py_ssize_t delta = 0;
151
152 GET_DELTA(inp, inp_end, delta);
153 if (!delta) {
154 Py_INCREF(in);
155 return (PyObject*)in;
156 }
157
158 out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
159 if (!out)
160 return NULL;
161
162 inp = PyUnicode_4BYTE_DATA(in);
163 outp = PyUnicode_4BYTE_DATA(out);
164 DO_ESCAPE(inp, inp_end, outp);
165 return out;
166 }
167
168 static PyObject*
escape_unicode(PyUnicodeObject * in)169 escape_unicode(PyUnicodeObject *in)
170 {
171 if (PyUnicode_READY(in))
172 return NULL;
173
174 switch (PyUnicode_KIND(in)) {
175 case PyUnicode_1BYTE_KIND:
176 return escape_unicode_kind1(in);
177 case PyUnicode_2BYTE_KIND:
178 return escape_unicode_kind2(in);
179 case PyUnicode_4BYTE_KIND:
180 return escape_unicode_kind4(in);
181 }
182 assert(0); /* shouldn't happen */
183 return NULL;
184 }
185
186 static PyObject*
escape(PyObject * self,PyObject * text)187 escape(PyObject *self, PyObject *text)
188 {
189 static PyObject *id_html;
190 PyObject *s = NULL, *rv = NULL, *html;
191
192 if (id_html == NULL) {
193 id_html = PyUnicode_InternFromString("__html__");
194 if (id_html == NULL) {
195 return NULL;
196 }
197 }
198
199 /* we don't have to escape integers, bools or floats */
200 if (PyLong_CheckExact(text) ||
201 PyFloat_CheckExact(text) || PyBool_Check(text) ||
202 text == Py_None)
203 return PyObject_CallFunctionObjArgs(markup, text, NULL);
204
205 /* if the object has an __html__ method that performs the escaping */
206 html = PyObject_GetAttr(text ,id_html);
207 if (html) {
208 s = PyObject_CallObject(html, NULL);
209 Py_DECREF(html);
210 if (s == NULL) {
211 return NULL;
212 }
213 /* Convert to Markup object */
214 rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
215 Py_DECREF(s);
216 return rv;
217 }
218
219 /* otherwise make the object unicode if it isn't, then escape */
220 PyErr_Clear();
221 if (!PyUnicode_Check(text)) {
222 PyObject *unicode = PyObject_Str(text);
223 if (!unicode)
224 return NULL;
225 s = escape_unicode((PyUnicodeObject*)unicode);
226 Py_DECREF(unicode);
227 }
228 else
229 s = escape_unicode((PyUnicodeObject*)text);
230
231 /* convert the unicode string into a markup object. */
232 rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
233 Py_DECREF(s);
234 return rv;
235 }
236
237
238 static PyObject*
escape_silent(PyObject * self,PyObject * text)239 escape_silent(PyObject *self, PyObject *text)
240 {
241 if (text != Py_None)
242 return escape(self, text);
243 return PyObject_CallFunctionObjArgs(markup, NULL);
244 }
245
246
247 static PyObject*
soft_str(PyObject * self,PyObject * s)248 soft_str(PyObject *self, PyObject *s)
249 {
250 if (!PyUnicode_Check(s))
251 return PyObject_Str(s);
252 Py_INCREF(s);
253 return s;
254 }
255
256
257 static PyObject*
soft_unicode(PyObject * self,PyObject * s)258 soft_unicode(PyObject *self, PyObject *s)
259 {
260 PyErr_WarnEx(
261 PyExc_DeprecationWarning,
262 "'soft_unicode' has been renamed to 'soft_str'. The old name"
263 " will be removed in MarkupSafe 2.1.",
264 2
265 );
266 return soft_str(self, s);
267 }
268
269
270 static PyMethodDef module_methods[] = {
271 {
272 "escape",
273 (PyCFunction)escape,
274 METH_O,
275 "Replace the characters ``&``, ``<``, ``>``, ``'``, and ``\"`` in"
276 " the string with HTML-safe sequences. Use this if you need to display"
277 " text that might contain such characters in HTML.\n\n"
278 "If the object has an ``__html__`` method, it is called and the"
279 " return value is assumed to already be safe for HTML.\n\n"
280 ":param s: An object to be converted to a string and escaped.\n"
281 ":return: A :class:`Markup` string with the escaped text.\n"
282 },
283 {
284 "escape_silent",
285 (PyCFunction)escape_silent,
286 METH_O,
287 "Like :func:`escape` but treats ``None`` as the empty string."
288 " Useful with optional values, as otherwise you get the string"
289 " ``'None'`` when the value is ``None``.\n\n"
290 ">>> escape(None)\n"
291 "Markup('None')\n"
292 ">>> escape_silent(None)\n"
293 "Markup('')\n"
294 },
295 {
296 "soft_str",
297 (PyCFunction)soft_str,
298 METH_O,
299 "Convert an object to a string if it isn't already. This preserves"
300 " a :class:`Markup` string rather than converting it back to a basic"
301 " string, so it will still be marked as safe and won't be escaped"
302 " again.\n\n"
303 ">>> value = escape(\"<User 1>\")\n"
304 ">>> value\n"
305 "Markup('<User 1>')\n"
306 ">>> escape(str(value))\n"
307 "Markup('&lt;User 1&gt;')\n"
308 ">>> escape(soft_str(value))\n"
309 "Markup('<User 1>')\n"
310 },
311 {
312 "soft_unicode",
313 (PyCFunction)soft_unicode,
314 METH_O,
315 ""
316 },
317 {NULL, NULL, 0, NULL} /* Sentinel */
318 };
319
320 static struct PyModuleDef module_definition = {
321 PyModuleDef_HEAD_INIT,
322 "markupsafe._speedups",
323 NULL,
324 -1,
325 module_methods,
326 NULL,
327 NULL,
328 NULL,
329 NULL
330 };
331
332 PyMODINIT_FUNC
PyInit__speedups(void)333 PyInit__speedups(void)
334 {
335 if (!init_constants())
336 return NULL;
337
338 return PyModule_Create(&module_definition);
339 }
340