1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 // clinic/_csv.c.h uses internal pycore_modsupport.h API
12 #ifndef Py_BUILD_CORE_BUILTIN
13 # define Py_BUILD_CORE_MODULE 1
14 #endif
15
16 #include "Python.h"
17 #include "pycore_pyatomic_ft_wrappers.h"
18
19 #include <stddef.h> // offsetof()
20 #include <stdbool.h>
21
22 /*[clinic input]
23 module _csv
24 [clinic start generated code]*/
25 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
26
27 #include "clinic/_csv.c.h"
28 #define NOT_SET ((Py_UCS4)-1)
29 #define EOL ((Py_UCS4)-2)
30
31
32 typedef struct {
33 PyObject *error_obj; /* CSV exception */
34 PyObject *dialects; /* Dialect registry */
35 PyTypeObject *dialect_type;
36 PyTypeObject *reader_type;
37 PyTypeObject *writer_type;
38 Py_ssize_t field_limit; /* max parsed field size */
39 PyObject *str_write;
40 } _csvstate;
41
42 static struct PyModuleDef _csvmodule;
43
44 static inline _csvstate*
get_csv_state(PyObject * module)45 get_csv_state(PyObject *module)
46 {
47 void *state = PyModule_GetState(module);
48 assert(state != NULL);
49 return (_csvstate *)state;
50 }
51
52 static int
_csv_clear(PyObject * module)53 _csv_clear(PyObject *module)
54 {
55 _csvstate *module_state = PyModule_GetState(module);
56 Py_CLEAR(module_state->error_obj);
57 Py_CLEAR(module_state->dialects);
58 Py_CLEAR(module_state->dialect_type);
59 Py_CLEAR(module_state->reader_type);
60 Py_CLEAR(module_state->writer_type);
61 Py_CLEAR(module_state->str_write);
62 return 0;
63 }
64
65 static int
_csv_traverse(PyObject * module,visitproc visit,void * arg)66 _csv_traverse(PyObject *module, visitproc visit, void *arg)
67 {
68 _csvstate *module_state = PyModule_GetState(module);
69 Py_VISIT(module_state->error_obj);
70 Py_VISIT(module_state->dialects);
71 Py_VISIT(module_state->dialect_type);
72 Py_VISIT(module_state->reader_type);
73 Py_VISIT(module_state->writer_type);
74 return 0;
75 }
76
77 static void
_csv_free(void * module)78 _csv_free(void *module)
79 {
80 _csv_clear((PyObject *)module);
81 }
82
83 typedef enum {
84 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
85 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
86 EAT_CRNL,AFTER_ESCAPED_CRNL
87 } ParserState;
88
89 typedef enum {
90 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
91 QUOTE_STRINGS, QUOTE_NOTNULL
92 } QuoteStyle;
93
94 typedef struct {
95 QuoteStyle style;
96 const char *name;
97 } StyleDesc;
98
99 static const StyleDesc quote_styles[] = {
100 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
101 { QUOTE_ALL, "QUOTE_ALL" },
102 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
103 { QUOTE_NONE, "QUOTE_NONE" },
104 { QUOTE_STRINGS, "QUOTE_STRINGS" },
105 { QUOTE_NOTNULL, "QUOTE_NOTNULL" },
106 { 0 }
107 };
108
109 typedef struct {
110 PyObject_HEAD
111
112 char doublequote; /* is " represented by ""? */
113 char skipinitialspace; /* ignore spaces following delimiter? */
114 char strict; /* raise exception on bad CSV */
115 int quoting; /* style of quoting to write */
116 Py_UCS4 delimiter; /* field separator */
117 Py_UCS4 quotechar; /* quote character */
118 Py_UCS4 escapechar; /* escape character */
119 PyObject *lineterminator; /* string to write between records */
120
121 } DialectObj;
122
123 typedef struct {
124 PyObject_HEAD
125
126 PyObject *input_iter; /* iterate over this for input lines */
127
128 DialectObj *dialect; /* parsing dialect */
129
130 PyObject *fields; /* field list for current record */
131 ParserState state; /* current CSV parse state */
132 Py_UCS4 *field; /* temporary buffer */
133 Py_ssize_t field_size; /* size of allocated buffer */
134 Py_ssize_t field_len; /* length of current field */
135 bool unquoted_field; /* true if no quotes around the current field */
136 unsigned long line_num; /* Source-file line number */
137 } ReaderObj;
138
139 typedef struct {
140 PyObject_HEAD
141
142 PyObject *write; /* write output lines to this file */
143
144 DialectObj *dialect; /* parsing dialect */
145
146 Py_UCS4 *rec; /* buffer for parser.join */
147 Py_ssize_t rec_size; /* size of allocated record */
148 Py_ssize_t rec_len; /* length of record */
149 int num_fields; /* number of fields in record */
150
151 PyObject *error_obj; /* cached error object */
152 } WriterObj;
153
154 /*
155 * DIALECT class
156 */
157
158 static PyObject *
get_dialect_from_registry(PyObject * name_obj,_csvstate * module_state)159 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
160 {
161 PyObject *dialect_obj;
162 if (PyDict_GetItemRef(module_state->dialects, name_obj, &dialect_obj) == 0) {
163 PyErr_SetString(module_state->error_obj, "unknown dialect");
164 }
165 return dialect_obj;
166 }
167
168 static PyObject *
get_char_or_None(Py_UCS4 c)169 get_char_or_None(Py_UCS4 c)
170 {
171 if (c == NOT_SET) {
172 Py_RETURN_NONE;
173 }
174 else
175 return PyUnicode_FromOrdinal(c);
176 }
177
178 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))179 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
180 {
181 return Py_XNewRef(self->lineterminator);
182 }
183
184 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))185 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
186 {
187 return get_char_or_None(self->delimiter);
188 }
189
190 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))191 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
192 {
193 return get_char_or_None(self->escapechar);
194 }
195
196 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))197 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
198 {
199 return get_char_or_None(self->quotechar);
200 }
201
202 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))203 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
204 {
205 return PyLong_FromLong(self->quoting);
206 }
207
208 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)209 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
210 {
211 if (src == NULL)
212 *target = dflt;
213 else {
214 int b = PyObject_IsTrue(src);
215 if (b < 0)
216 return -1;
217 *target = (char)b;
218 }
219 return 0;
220 }
221
222 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)223 _set_int(const char *name, int *target, PyObject *src, int dflt)
224 {
225 if (src == NULL)
226 *target = dflt;
227 else {
228 int value;
229 if (!PyLong_CheckExact(src)) {
230 PyErr_Format(PyExc_TypeError,
231 "\"%s\" must be an integer", name);
232 return -1;
233 }
234 value = PyLong_AsInt(src);
235 if (value == -1 && PyErr_Occurred()) {
236 return -1;
237 }
238 *target = value;
239 }
240 return 0;
241 }
242
243 static int
_set_char_or_none(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)244 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
245 {
246 if (src == NULL) {
247 *target = dflt;
248 }
249 else {
250 *target = NOT_SET;
251 if (src != Py_None) {
252 if (!PyUnicode_Check(src)) {
253 PyErr_Format(PyExc_TypeError,
254 "\"%s\" must be string or None, not %.200s", name,
255 Py_TYPE(src)->tp_name);
256 return -1;
257 }
258 Py_ssize_t len = PyUnicode_GetLength(src);
259 if (len < 0) {
260 return -1;
261 }
262 if (len != 1) {
263 PyErr_Format(PyExc_TypeError,
264 "\"%s\" must be a 1-character string",
265 name);
266 return -1;
267 }
268 *target = PyUnicode_READ_CHAR(src, 0);
269 }
270 }
271 return 0;
272 }
273
274 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)275 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
276 {
277 if (src == NULL) {
278 *target = dflt;
279 }
280 else {
281 if (!PyUnicode_Check(src)) {
282 PyErr_Format(PyExc_TypeError,
283 "\"%s\" must be string, not %.200s", name,
284 Py_TYPE(src)->tp_name);
285 return -1;
286 }
287 Py_ssize_t len = PyUnicode_GetLength(src);
288 if (len < 0) {
289 return -1;
290 }
291 if (len != 1) {
292 PyErr_Format(PyExc_TypeError,
293 "\"%s\" must be a 1-character string",
294 name);
295 return -1;
296 }
297 *target = PyUnicode_READ_CHAR(src, 0);
298 }
299 return 0;
300 }
301
302 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)303 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
304 {
305 if (src == NULL)
306 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
307 else {
308 if (src == Py_None)
309 *target = NULL;
310 else if (!PyUnicode_Check(src)) {
311 PyErr_Format(PyExc_TypeError,
312 "\"%s\" must be a string", name);
313 return -1;
314 }
315 else {
316 Py_XSETREF(*target, Py_NewRef(src));
317 }
318 }
319 return 0;
320 }
321
322 static int
dialect_check_quoting(int quoting)323 dialect_check_quoting(int quoting)
324 {
325 const StyleDesc *qs;
326
327 for (qs = quote_styles; qs->name; qs++) {
328 if ((int)qs->style == quoting)
329 return 0;
330 }
331 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
332 return -1;
333 }
334
335 static int
dialect_check_char(const char * name,Py_UCS4 c,DialectObj * dialect,bool allowspace)336 dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect, bool allowspace)
337 {
338 if (c == '\r' || c == '\n' || (c == ' ' && !allowspace)) {
339 PyErr_Format(PyExc_ValueError, "bad %s value", name);
340 return -1;
341 }
342 if (PyUnicode_FindChar(
343 dialect->lineterminator, c, 0,
344 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
345 {
346 PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
347 return -1;
348 }
349 return 0;
350 }
351
352 static int
dialect_check_chars(const char * name1,const char * name2,Py_UCS4 c1,Py_UCS4 c2)353 dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
354 {
355 if (c1 == c2 && c1 != NOT_SET) {
356 PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
357 return -1;
358 }
359 return 0;
360 }
361
362 #define D_OFF(x) offsetof(DialectObj, x)
363
364 static struct PyMemberDef Dialect_memberlist[] = {
365 { "skipinitialspace", Py_T_BOOL, D_OFF(skipinitialspace), Py_READONLY },
366 { "doublequote", Py_T_BOOL, D_OFF(doublequote), Py_READONLY },
367 { "strict", Py_T_BOOL, D_OFF(strict), Py_READONLY },
368 { NULL }
369 };
370
371 static PyGetSetDef Dialect_getsetlist[] = {
372 { "delimiter", (getter)Dialect_get_delimiter},
373 { "escapechar", (getter)Dialect_get_escapechar},
374 { "lineterminator", (getter)Dialect_get_lineterminator},
375 { "quotechar", (getter)Dialect_get_quotechar},
376 { "quoting", (getter)Dialect_get_quoting},
377 {NULL},
378 };
379
380 static void
Dialect_dealloc(DialectObj * self)381 Dialect_dealloc(DialectObj *self)
382 {
383 PyTypeObject *tp = Py_TYPE(self);
384 PyObject_GC_UnTrack(self);
385 tp->tp_clear((PyObject *)self);
386 PyObject_GC_Del(self);
387 Py_DECREF(tp);
388 }
389
390 static char *dialect_kws[] = {
391 "dialect",
392 "delimiter",
393 "doublequote",
394 "escapechar",
395 "lineterminator",
396 "quotechar",
397 "quoting",
398 "skipinitialspace",
399 "strict",
400 NULL
401 };
402
403 static _csvstate *
_csv_state_from_type(PyTypeObject * type,const char * name)404 _csv_state_from_type(PyTypeObject *type, const char *name)
405 {
406 PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
407 if (module == NULL) {
408 return NULL;
409 }
410 _csvstate *module_state = PyModule_GetState(module);
411 if (module_state == NULL) {
412 PyErr_Format(PyExc_SystemError,
413 "%s: No _csv module state found", name);
414 return NULL;
415 }
416 return module_state;
417 }
418
419 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)420 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
421 {
422 DialectObj *self;
423 PyObject *ret = NULL;
424 PyObject *dialect = NULL;
425 PyObject *delimiter = NULL;
426 PyObject *doublequote = NULL;
427 PyObject *escapechar = NULL;
428 PyObject *lineterminator = NULL;
429 PyObject *quotechar = NULL;
430 PyObject *quoting = NULL;
431 PyObject *skipinitialspace = NULL;
432 PyObject *strict = NULL;
433
434 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
435 "|OOOOOOOOO", dialect_kws,
436 &dialect,
437 &delimiter,
438 &doublequote,
439 &escapechar,
440 &lineterminator,
441 "echar,
442 "ing,
443 &skipinitialspace,
444 &strict))
445 return NULL;
446
447 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
448 if (module_state == NULL) {
449 return NULL;
450 }
451
452 if (dialect != NULL) {
453 if (PyUnicode_Check(dialect)) {
454 dialect = get_dialect_from_registry(dialect, module_state);
455 if (dialect == NULL)
456 return NULL;
457 }
458 else
459 Py_INCREF(dialect);
460 /* Can we reuse this instance? */
461 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
462 delimiter == NULL &&
463 doublequote == NULL &&
464 escapechar == NULL &&
465 lineterminator == NULL &&
466 quotechar == NULL &&
467 quoting == NULL &&
468 skipinitialspace == NULL &&
469 strict == NULL)
470 return dialect;
471 }
472
473 self = (DialectObj *)type->tp_alloc(type, 0);
474 if (self == NULL) {
475 Py_CLEAR(dialect);
476 return NULL;
477 }
478 self->lineterminator = NULL;
479
480 Py_XINCREF(delimiter);
481 Py_XINCREF(doublequote);
482 Py_XINCREF(escapechar);
483 Py_XINCREF(lineterminator);
484 Py_XINCREF(quotechar);
485 Py_XINCREF(quoting);
486 Py_XINCREF(skipinitialspace);
487 Py_XINCREF(strict);
488 if (dialect != NULL) {
489 #define DIALECT_GETATTR(v, n) \
490 do { \
491 if (v == NULL) { \
492 v = PyObject_GetAttrString(dialect, n); \
493 if (v == NULL) \
494 PyErr_Clear(); \
495 } \
496 } while (0)
497 DIALECT_GETATTR(delimiter, "delimiter");
498 DIALECT_GETATTR(doublequote, "doublequote");
499 DIALECT_GETATTR(escapechar, "escapechar");
500 DIALECT_GETATTR(lineterminator, "lineterminator");
501 DIALECT_GETATTR(quotechar, "quotechar");
502 DIALECT_GETATTR(quoting, "quoting");
503 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
504 DIALECT_GETATTR(strict, "strict");
505 }
506
507 /* check types and convert to C values */
508 #define DIASET(meth, name, target, src, dflt) \
509 if (meth(name, target, src, dflt)) \
510 goto err
511 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
512 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
513 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
514 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
515 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
516 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
517 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
518 DIASET(_set_bool, "strict", &self->strict, strict, false);
519
520 /* validate options */
521 if (dialect_check_quoting(self->quoting))
522 goto err;
523 if (self->delimiter == NOT_SET) {
524 PyErr_SetString(PyExc_TypeError,
525 "\"delimiter\" must be a 1-character string");
526 goto err;
527 }
528 if (quotechar == Py_None && quoting == NULL)
529 self->quoting = QUOTE_NONE;
530 if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
531 PyErr_SetString(PyExc_TypeError,
532 "quotechar must be set if quoting enabled");
533 goto err;
534 }
535 if (self->lineterminator == NULL) {
536 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
537 goto err;
538 }
539 if (dialect_check_char("delimiter", self->delimiter, self, true) ||
540 dialect_check_char("escapechar", self->escapechar, self,
541 !self->skipinitialspace) ||
542 dialect_check_char("quotechar", self->quotechar, self,
543 !self->skipinitialspace) ||
544 dialect_check_chars("delimiter", "escapechar",
545 self->delimiter, self->escapechar) ||
546 dialect_check_chars("delimiter", "quotechar",
547 self->delimiter, self->quotechar) ||
548 dialect_check_chars("escapechar", "quotechar",
549 self->escapechar, self->quotechar))
550 {
551 goto err;
552 }
553
554 ret = Py_NewRef(self);
555 err:
556 Py_CLEAR(self);
557 Py_CLEAR(dialect);
558 Py_CLEAR(delimiter);
559 Py_CLEAR(doublequote);
560 Py_CLEAR(escapechar);
561 Py_CLEAR(lineterminator);
562 Py_CLEAR(quotechar);
563 Py_CLEAR(quoting);
564 Py_CLEAR(skipinitialspace);
565 Py_CLEAR(strict);
566 return ret;
567 }
568
569 /* Since dialect is now a heap type, it inherits pickling method for
570 * protocol 0 and 1 from object, therefore it needs to be overridden */
571
572 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
573
574 static PyObject *
Dialect_reduce(PyObject * self,PyObject * args)575 Dialect_reduce(PyObject *self, PyObject *args) {
576 PyErr_Format(PyExc_TypeError,
577 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
578 return NULL;
579 }
580
581 static struct PyMethodDef dialect_methods[] = {
582 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
583 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
584 {NULL, NULL}
585 };
586
587 PyDoc_STRVAR(Dialect_Type_doc,
588 "CSV dialect\n"
589 "\n"
590 "The Dialect type records CSV parsing and generation options.\n");
591
592 static int
Dialect_clear(DialectObj * self)593 Dialect_clear(DialectObj *self)
594 {
595 Py_CLEAR(self->lineterminator);
596 return 0;
597 }
598
599 static int
Dialect_traverse(DialectObj * self,visitproc visit,void * arg)600 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
601 {
602 Py_VISIT(self->lineterminator);
603 Py_VISIT(Py_TYPE(self));
604 return 0;
605 }
606
607 static PyType_Slot Dialect_Type_slots[] = {
608 {Py_tp_doc, (char*)Dialect_Type_doc},
609 {Py_tp_members, Dialect_memberlist},
610 {Py_tp_getset, Dialect_getsetlist},
611 {Py_tp_new, dialect_new},
612 {Py_tp_methods, dialect_methods},
613 {Py_tp_dealloc, Dialect_dealloc},
614 {Py_tp_clear, Dialect_clear},
615 {Py_tp_traverse, Dialect_traverse},
616 {0, NULL}
617 };
618
619 PyType_Spec Dialect_Type_spec = {
620 .name = "_csv.Dialect",
621 .basicsize = sizeof(DialectObj),
622 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
623 Py_TPFLAGS_IMMUTABLETYPE),
624 .slots = Dialect_Type_slots,
625 };
626
627
628 /*
629 * Return an instance of the dialect type, given a Python instance or kwarg
630 * description of the dialect
631 */
632 static PyObject *
_call_dialect(_csvstate * module_state,PyObject * dialect_inst,PyObject * kwargs)633 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
634 {
635 PyObject *type = (PyObject *)module_state->dialect_type;
636 if (dialect_inst) {
637 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
638 }
639 else {
640 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
641 }
642 }
643
644 /*
645 * READER
646 */
647 static int
parse_save_field(ReaderObj * self)648 parse_save_field(ReaderObj *self)
649 {
650 int quoting = self->dialect->quoting;
651 PyObject *field;
652
653 if (self->unquoted_field &&
654 self->field_len == 0 &&
655 (quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
656 {
657 field = Py_NewRef(Py_None);
658 }
659 else {
660 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
661 (void *) self->field, self->field_len);
662 if (field == NULL) {
663 return -1;
664 }
665 if (self->unquoted_field &&
666 self->field_len != 0 &&
667 (quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
668 {
669 PyObject *tmp = PyNumber_Float(field);
670 Py_DECREF(field);
671 if (tmp == NULL) {
672 return -1;
673 }
674 field = tmp;
675 }
676 self->field_len = 0;
677 }
678 if (PyList_Append(self->fields, field) < 0) {
679 Py_DECREF(field);
680 return -1;
681 }
682 Py_DECREF(field);
683 return 0;
684 }
685
686 static int
parse_grow_buff(ReaderObj * self)687 parse_grow_buff(ReaderObj *self)
688 {
689 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
690
691 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
692 Py_UCS4 *field_new = self->field;
693 PyMem_Resize(field_new, Py_UCS4, field_size_new);
694 if (field_new == NULL) {
695 PyErr_NoMemory();
696 return 0;
697 }
698 self->field = field_new;
699 self->field_size = field_size_new;
700 return 1;
701 }
702
703 static int
parse_add_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)704 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
705 {
706 Py_ssize_t field_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit);
707 if (self->field_len >= field_limit) {
708 PyErr_Format(module_state->error_obj,
709 "field larger than field limit (%zd)",
710 field_limit);
711 return -1;
712 }
713 if (self->field_len == self->field_size && !parse_grow_buff(self))
714 return -1;
715 self->field[self->field_len++] = c;
716 return 0;
717 }
718
719 static int
parse_process_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)720 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
721 {
722 DialectObj *dialect = self->dialect;
723
724 switch (self->state) {
725 case START_RECORD:
726 /* start of record */
727 if (c == EOL)
728 /* empty line - return [] */
729 break;
730 else if (c == '\n' || c == '\r') {
731 self->state = EAT_CRNL;
732 break;
733 }
734 /* normal character - handle as START_FIELD */
735 self->state = START_FIELD;
736 /* fallthru */
737 case START_FIELD:
738 /* expecting field */
739 self->unquoted_field = true;
740 if (c == '\n' || c == '\r' || c == EOL) {
741 /* save empty field - return [fields] */
742 if (parse_save_field(self) < 0)
743 return -1;
744 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
745 }
746 else if (c == dialect->quotechar &&
747 dialect->quoting != QUOTE_NONE) {
748 /* start quoted field */
749 self->unquoted_field = false;
750 self->state = IN_QUOTED_FIELD;
751 }
752 else if (c == dialect->escapechar) {
753 /* possible escaped character */
754 self->state = ESCAPED_CHAR;
755 }
756 else if (c == ' ' && dialect->skipinitialspace)
757 /* ignore spaces at start of field */
758 ;
759 else if (c == dialect->delimiter) {
760 /* save empty field */
761 if (parse_save_field(self) < 0)
762 return -1;
763 }
764 else {
765 /* begin new unquoted field */
766 if (parse_add_char(self, module_state, c) < 0)
767 return -1;
768 self->state = IN_FIELD;
769 }
770 break;
771
772 case ESCAPED_CHAR:
773 if (c == '\n' || c=='\r') {
774 if (parse_add_char(self, module_state, c) < 0)
775 return -1;
776 self->state = AFTER_ESCAPED_CRNL;
777 break;
778 }
779 if (c == EOL)
780 c = '\n';
781 if (parse_add_char(self, module_state, c) < 0)
782 return -1;
783 self->state = IN_FIELD;
784 break;
785
786 case AFTER_ESCAPED_CRNL:
787 if (c == EOL)
788 break;
789 /*fallthru*/
790
791 case IN_FIELD:
792 /* in unquoted field */
793 if (c == '\n' || c == '\r' || c == EOL) {
794 /* end of line - return [fields] */
795 if (parse_save_field(self) < 0)
796 return -1;
797 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
798 }
799 else if (c == dialect->escapechar) {
800 /* possible escaped character */
801 self->state = ESCAPED_CHAR;
802 }
803 else if (c == dialect->delimiter) {
804 /* save field - wait for new field */
805 if (parse_save_field(self) < 0)
806 return -1;
807 self->state = START_FIELD;
808 }
809 else {
810 /* normal character - save in field */
811 if (parse_add_char(self, module_state, c) < 0)
812 return -1;
813 }
814 break;
815
816 case IN_QUOTED_FIELD:
817 /* in quoted field */
818 if (c == EOL)
819 ;
820 else if (c == dialect->escapechar) {
821 /* Possible escape character */
822 self->state = ESCAPE_IN_QUOTED_FIELD;
823 }
824 else if (c == dialect->quotechar &&
825 dialect->quoting != QUOTE_NONE) {
826 if (dialect->doublequote) {
827 /* doublequote; " represented by "" */
828 self->state = QUOTE_IN_QUOTED_FIELD;
829 }
830 else {
831 /* end of quote part of field */
832 self->state = IN_FIELD;
833 }
834 }
835 else {
836 /* normal character - save in field */
837 if (parse_add_char(self, module_state, c) < 0)
838 return -1;
839 }
840 break;
841
842 case ESCAPE_IN_QUOTED_FIELD:
843 if (c == EOL)
844 c = '\n';
845 if (parse_add_char(self, module_state, c) < 0)
846 return -1;
847 self->state = IN_QUOTED_FIELD;
848 break;
849
850 case QUOTE_IN_QUOTED_FIELD:
851 /* doublequote - seen a quote in a quoted field */
852 if (dialect->quoting != QUOTE_NONE &&
853 c == dialect->quotechar) {
854 /* save "" as " */
855 if (parse_add_char(self, module_state, c) < 0)
856 return -1;
857 self->state = IN_QUOTED_FIELD;
858 }
859 else if (c == dialect->delimiter) {
860 /* save field - wait for new field */
861 if (parse_save_field(self) < 0)
862 return -1;
863 self->state = START_FIELD;
864 }
865 else if (c == '\n' || c == '\r' || c == EOL) {
866 /* end of line - return [fields] */
867 if (parse_save_field(self) < 0)
868 return -1;
869 self->state = (c == EOL ? START_RECORD : EAT_CRNL);
870 }
871 else if (!dialect->strict) {
872 if (parse_add_char(self, module_state, c) < 0)
873 return -1;
874 self->state = IN_FIELD;
875 }
876 else {
877 /* illegal */
878 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
879 dialect->delimiter,
880 dialect->quotechar);
881 return -1;
882 }
883 break;
884
885 case EAT_CRNL:
886 if (c == '\n' || c == '\r')
887 ;
888 else if (c == EOL)
889 self->state = START_RECORD;
890 else {
891 PyErr_Format(module_state->error_obj,
892 "new-line character seen in unquoted field - "
893 "do you need to open the file with newline=''?");
894 return -1;
895 }
896 break;
897
898 }
899 return 0;
900 }
901
902 static int
parse_reset(ReaderObj * self)903 parse_reset(ReaderObj *self)
904 {
905 Py_XSETREF(self->fields, PyList_New(0));
906 if (self->fields == NULL)
907 return -1;
908 self->field_len = 0;
909 self->state = START_RECORD;
910 self->unquoted_field = false;
911 return 0;
912 }
913
914 static PyObject *
Reader_iternext(ReaderObj * self)915 Reader_iternext(ReaderObj *self)
916 {
917 PyObject *fields = NULL;
918 Py_UCS4 c;
919 Py_ssize_t pos, linelen;
920 int kind;
921 const void *data;
922 PyObject *lineobj;
923
924 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
925 "Reader.__next__");
926 if (module_state == NULL) {
927 return NULL;
928 }
929
930 if (parse_reset(self) < 0)
931 return NULL;
932 do {
933 lineobj = PyIter_Next(self->input_iter);
934 if (lineobj == NULL) {
935 /* End of input OR exception */
936 if (!PyErr_Occurred() && (self->field_len != 0 ||
937 self->state == IN_QUOTED_FIELD)) {
938 if (self->dialect->strict)
939 PyErr_SetString(module_state->error_obj,
940 "unexpected end of data");
941 else if (parse_save_field(self) >= 0)
942 break;
943 }
944 return NULL;
945 }
946 if (!PyUnicode_Check(lineobj)) {
947 PyErr_Format(module_state->error_obj,
948 "iterator should return strings, "
949 "not %.200s "
950 "(the file should be opened in text mode)",
951 Py_TYPE(lineobj)->tp_name
952 );
953 Py_DECREF(lineobj);
954 return NULL;
955 }
956 ++self->line_num;
957 kind = PyUnicode_KIND(lineobj);
958 data = PyUnicode_DATA(lineobj);
959 pos = 0;
960 linelen = PyUnicode_GET_LENGTH(lineobj);
961 while (linelen--) {
962 c = PyUnicode_READ(kind, data, pos);
963 if (parse_process_char(self, module_state, c) < 0) {
964 Py_DECREF(lineobj);
965 goto err;
966 }
967 pos++;
968 }
969 Py_DECREF(lineobj);
970 if (parse_process_char(self, module_state, EOL) < 0)
971 goto err;
972 } while (self->state != START_RECORD);
973
974 fields = self->fields;
975 self->fields = NULL;
976 err:
977 return fields;
978 }
979
980 static void
Reader_dealloc(ReaderObj * self)981 Reader_dealloc(ReaderObj *self)
982 {
983 PyTypeObject *tp = Py_TYPE(self);
984 PyObject_GC_UnTrack(self);
985 tp->tp_clear((PyObject *)self);
986 if (self->field != NULL) {
987 PyMem_Free(self->field);
988 self->field = NULL;
989 }
990 PyObject_GC_Del(self);
991 Py_DECREF(tp);
992 }
993
994 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)995 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
996 {
997 Py_VISIT(self->dialect);
998 Py_VISIT(self->input_iter);
999 Py_VISIT(self->fields);
1000 Py_VISIT(Py_TYPE(self));
1001 return 0;
1002 }
1003
1004 static int
Reader_clear(ReaderObj * self)1005 Reader_clear(ReaderObj *self)
1006 {
1007 Py_CLEAR(self->dialect);
1008 Py_CLEAR(self->input_iter);
1009 Py_CLEAR(self->fields);
1010 return 0;
1011 }
1012
1013 PyDoc_STRVAR(Reader_Type_doc,
1014 "CSV reader\n"
1015 "\n"
1016 "Reader objects are responsible for reading and parsing tabular data\n"
1017 "in CSV format.\n"
1018 );
1019
1020 static struct PyMethodDef Reader_methods[] = {
1021 { NULL, NULL }
1022 };
1023 #define R_OFF(x) offsetof(ReaderObj, x)
1024
1025 static struct PyMemberDef Reader_memberlist[] = {
1026 { "dialect", _Py_T_OBJECT, R_OFF(dialect), Py_READONLY },
1027 { "line_num", Py_T_ULONG, R_OFF(line_num), Py_READONLY },
1028 { NULL }
1029 };
1030
1031
1032 static PyType_Slot Reader_Type_slots[] = {
1033 {Py_tp_doc, (char*)Reader_Type_doc},
1034 {Py_tp_traverse, Reader_traverse},
1035 {Py_tp_iter, PyObject_SelfIter},
1036 {Py_tp_iternext, Reader_iternext},
1037 {Py_tp_methods, Reader_methods},
1038 {Py_tp_members, Reader_memberlist},
1039 {Py_tp_clear, Reader_clear},
1040 {Py_tp_dealloc, Reader_dealloc},
1041 {0, NULL}
1042 };
1043
1044 PyType_Spec Reader_Type_spec = {
1045 .name = "_csv.reader",
1046 .basicsize = sizeof(ReaderObj),
1047 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1048 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1049 .slots = Reader_Type_slots
1050 };
1051
1052
1053 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)1054 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1055 {
1056 PyObject * iterator, * dialect = NULL;
1057 _csvstate *module_state = get_csv_state(module);
1058 ReaderObj * self = PyObject_GC_New(
1059 ReaderObj,
1060 module_state->reader_type);
1061
1062 if (!self)
1063 return NULL;
1064
1065 self->dialect = NULL;
1066 self->fields = NULL;
1067 self->input_iter = NULL;
1068 self->field = NULL;
1069 self->field_size = 0;
1070 self->line_num = 0;
1071
1072 if (parse_reset(self) < 0) {
1073 Py_DECREF(self);
1074 return NULL;
1075 }
1076
1077 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1078 Py_DECREF(self);
1079 return NULL;
1080 }
1081 self->input_iter = PyObject_GetIter(iterator);
1082 if (self->input_iter == NULL) {
1083 Py_DECREF(self);
1084 return NULL;
1085 }
1086 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1087 keyword_args);
1088 if (self->dialect == NULL) {
1089 Py_DECREF(self);
1090 return NULL;
1091 }
1092
1093 PyObject_GC_Track(self);
1094 return (PyObject *)self;
1095 }
1096
1097 /*
1098 * WRITER
1099 */
1100 /* ---------------------------------------------------------------- */
1101 static void
join_reset(WriterObj * self)1102 join_reset(WriterObj *self)
1103 {
1104 self->rec_len = 0;
1105 self->num_fields = 0;
1106 }
1107
1108 #define MEM_INCR 32768
1109
1110 /* Calculate new record length or append field to record. Return new
1111 * record length.
1112 */
1113 static Py_ssize_t
join_append_data(WriterObj * self,int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1114 join_append_data(WriterObj *self, int field_kind, const void *field_data,
1115 Py_ssize_t field_len, int *quoted,
1116 int copy_phase)
1117 {
1118 DialectObj *dialect = self->dialect;
1119 int i;
1120 Py_ssize_t rec_len;
1121
1122 #define INCLEN \
1123 do {\
1124 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1125 goto overflow; \
1126 } \
1127 rec_len++; \
1128 } while(0)
1129
1130 #define ADDCH(c) \
1131 do {\
1132 if (copy_phase) \
1133 self->rec[rec_len] = c;\
1134 INCLEN;\
1135 } while(0)
1136
1137 rec_len = self->rec_len;
1138
1139 /* If this is not the first field we need a field separator */
1140 if (self->num_fields > 0)
1141 ADDCH(dialect->delimiter);
1142
1143 /* Handle preceding quote */
1144 if (copy_phase && *quoted)
1145 ADDCH(dialect->quotechar);
1146
1147 /* Copy/count field data */
1148 /* If field is null just pass over */
1149 for (i = 0; field_data && (i < field_len); i++) {
1150 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1151 int want_escape = 0;
1152
1153 if (c == dialect->delimiter ||
1154 c == dialect->escapechar ||
1155 c == dialect->quotechar ||
1156 c == '\n' ||
1157 c == '\r' ||
1158 PyUnicode_FindChar(
1159 dialect->lineterminator, c, 0,
1160 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1161 if (dialect->quoting == QUOTE_NONE)
1162 want_escape = 1;
1163 else {
1164 if (c == dialect->quotechar) {
1165 if (dialect->doublequote)
1166 ADDCH(dialect->quotechar);
1167 else
1168 want_escape = 1;
1169 }
1170 else if (c == dialect->escapechar) {
1171 want_escape = 1;
1172 }
1173 if (!want_escape)
1174 *quoted = 1;
1175 }
1176 if (want_escape) {
1177 if (dialect->escapechar == NOT_SET) {
1178 PyErr_Format(self->error_obj,
1179 "need to escape, but no escapechar set");
1180 return -1;
1181 }
1182 ADDCH(dialect->escapechar);
1183 }
1184 }
1185 /* Copy field character into record buffer.
1186 */
1187 ADDCH(c);
1188 }
1189
1190 if (*quoted) {
1191 if (copy_phase)
1192 ADDCH(dialect->quotechar);
1193 else {
1194 INCLEN; /* starting quote */
1195 INCLEN; /* ending quote */
1196 }
1197 }
1198 return rec_len;
1199
1200 overflow:
1201 PyErr_NoMemory();
1202 return -1;
1203 #undef ADDCH
1204 #undef INCLEN
1205 }
1206
1207 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1208 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1209 {
1210 assert(rec_len >= 0);
1211
1212 if (rec_len > self->rec_size) {
1213 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1214 Py_UCS4 *rec_new = self->rec;
1215 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1216 if (rec_new == NULL) {
1217 PyErr_NoMemory();
1218 return 0;
1219 }
1220 self->rec = rec_new;
1221 self->rec_size = (Py_ssize_t)rec_size_new;
1222 }
1223 return 1;
1224 }
1225
1226 static int
join_append(WriterObj * self,PyObject * field,int quoted)1227 join_append(WriterObj *self, PyObject *field, int quoted)
1228 {
1229 DialectObj *dialect = self->dialect;
1230 int field_kind = -1;
1231 const void *field_data = NULL;
1232 Py_ssize_t field_len = 0;
1233 Py_ssize_t rec_len;
1234
1235 if (field != NULL) {
1236 field_kind = PyUnicode_KIND(field);
1237 field_data = PyUnicode_DATA(field);
1238 field_len = PyUnicode_GET_LENGTH(field);
1239 }
1240 if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
1241 if (dialect->quoting == QUOTE_NONE ||
1242 (field == NULL &&
1243 (dialect->quoting == QUOTE_STRINGS ||
1244 dialect->quoting == QUOTE_NOTNULL)))
1245 {
1246 PyErr_Format(self->error_obj,
1247 "empty field must be quoted if delimiter is a space "
1248 "and skipinitialspace is true");
1249 return 0;
1250 }
1251 quoted = 1;
1252 }
1253 rec_len = join_append_data(self, field_kind, field_data, field_len,
1254 "ed, 0);
1255 if (rec_len < 0)
1256 return 0;
1257
1258 /* grow record buffer if necessary */
1259 if (!join_check_rec_size(self, rec_len))
1260 return 0;
1261
1262 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1263 "ed, 1);
1264 self->num_fields++;
1265
1266 return 1;
1267 }
1268
1269 static int
join_append_lineterminator(WriterObj * self)1270 join_append_lineterminator(WriterObj *self)
1271 {
1272 Py_ssize_t terminator_len, i;
1273 int term_kind;
1274 const void *term_data;
1275
1276 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1277 if (terminator_len == -1)
1278 return 0;
1279
1280 /* grow record buffer if necessary */
1281 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1282 return 0;
1283
1284 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1285 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1286 for (i = 0; i < terminator_len; i++)
1287 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1288 self->rec_len += terminator_len;
1289
1290 return 1;
1291 }
1292
1293 PyDoc_STRVAR(csv_writerow_doc,
1294 "writerow(iterable)\n"
1295 "\n"
1296 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1297 "elements will be converted to string.");
1298
1299 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1300 csv_writerow(WriterObj *self, PyObject *seq)
1301 {
1302 DialectObj *dialect = self->dialect;
1303 PyObject *iter, *field, *line, *result;
1304 bool null_field = false;
1305
1306 iter = PyObject_GetIter(seq);
1307 if (iter == NULL) {
1308 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1309 PyErr_Format(self->error_obj,
1310 "iterable expected, not %.200s",
1311 Py_TYPE(seq)->tp_name);
1312 }
1313 return NULL;
1314 }
1315
1316 /* Join all fields in internal buffer.
1317 */
1318 join_reset(self);
1319 while ((field = PyIter_Next(iter))) {
1320 int append_ok;
1321 int quoted;
1322
1323 switch (dialect->quoting) {
1324 case QUOTE_NONNUMERIC:
1325 quoted = !PyNumber_Check(field);
1326 break;
1327 case QUOTE_ALL:
1328 quoted = 1;
1329 break;
1330 case QUOTE_STRINGS:
1331 quoted = PyUnicode_Check(field);
1332 break;
1333 case QUOTE_NOTNULL:
1334 quoted = field != Py_None;
1335 break;
1336 default:
1337 quoted = 0;
1338 break;
1339 }
1340
1341 null_field = (field == Py_None);
1342 if (PyUnicode_Check(field)) {
1343 append_ok = join_append(self, field, quoted);
1344 Py_DECREF(field);
1345 }
1346 else if (null_field) {
1347 append_ok = join_append(self, NULL, quoted);
1348 Py_DECREF(field);
1349 }
1350 else {
1351 PyObject *str;
1352
1353 str = PyObject_Str(field);
1354 Py_DECREF(field);
1355 if (str == NULL) {
1356 Py_DECREF(iter);
1357 return NULL;
1358 }
1359 append_ok = join_append(self, str, quoted);
1360 Py_DECREF(str);
1361 }
1362 if (!append_ok) {
1363 Py_DECREF(iter);
1364 return NULL;
1365 }
1366 }
1367 Py_DECREF(iter);
1368 if (PyErr_Occurred())
1369 return NULL;
1370
1371 if (self->num_fields > 0 && self->rec_len == 0) {
1372 if (dialect->quoting == QUOTE_NONE ||
1373 (null_field &&
1374 (dialect->quoting == QUOTE_STRINGS ||
1375 dialect->quoting == QUOTE_NOTNULL)))
1376 {
1377 PyErr_Format(self->error_obj,
1378 "single empty field record must be quoted");
1379 return NULL;
1380 }
1381 self->num_fields--;
1382 if (!join_append(self, NULL, 1))
1383 return NULL;
1384 }
1385
1386 /* Add line terminator.
1387 */
1388 if (!join_append_lineterminator(self)) {
1389 return NULL;
1390 }
1391
1392 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1393 (void *) self->rec, self->rec_len);
1394 if (line == NULL) {
1395 return NULL;
1396 }
1397 result = PyObject_CallOneArg(self->write, line);
1398 Py_DECREF(line);
1399 return result;
1400 }
1401
1402 PyDoc_STRVAR(csv_writerows_doc,
1403 "writerows(iterable of iterables)\n"
1404 "\n"
1405 "Construct and write a series of iterables to a csv file. Non-string\n"
1406 "elements will be converted to string.");
1407
1408 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1409 csv_writerows(WriterObj *self, PyObject *seqseq)
1410 {
1411 PyObject *row_iter, *row_obj, *result;
1412
1413 row_iter = PyObject_GetIter(seqseq);
1414 if (row_iter == NULL) {
1415 return NULL;
1416 }
1417 while ((row_obj = PyIter_Next(row_iter))) {
1418 result = csv_writerow(self, row_obj);
1419 Py_DECREF(row_obj);
1420 if (!result) {
1421 Py_DECREF(row_iter);
1422 return NULL;
1423 }
1424 else
1425 Py_DECREF(result);
1426 }
1427 Py_DECREF(row_iter);
1428 if (PyErr_Occurred())
1429 return NULL;
1430 Py_RETURN_NONE;
1431 }
1432
1433 static struct PyMethodDef Writer_methods[] = {
1434 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1435 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1436 { NULL, NULL }
1437 };
1438
1439 #define W_OFF(x) offsetof(WriterObj, x)
1440
1441 static struct PyMemberDef Writer_memberlist[] = {
1442 { "dialect", _Py_T_OBJECT, W_OFF(dialect), Py_READONLY },
1443 { NULL }
1444 };
1445
1446 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1447 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1448 {
1449 Py_VISIT(self->dialect);
1450 Py_VISIT(self->write);
1451 Py_VISIT(self->error_obj);
1452 Py_VISIT(Py_TYPE(self));
1453 return 0;
1454 }
1455
1456 static int
Writer_clear(WriterObj * self)1457 Writer_clear(WriterObj *self)
1458 {
1459 Py_CLEAR(self->dialect);
1460 Py_CLEAR(self->write);
1461 Py_CLEAR(self->error_obj);
1462 return 0;
1463 }
1464
1465 static void
Writer_dealloc(WriterObj * self)1466 Writer_dealloc(WriterObj *self)
1467 {
1468 PyTypeObject *tp = Py_TYPE(self);
1469 PyObject_GC_UnTrack(self);
1470 tp->tp_clear((PyObject *)self);
1471 if (self->rec != NULL) {
1472 PyMem_Free(self->rec);
1473 }
1474 PyObject_GC_Del(self);
1475 Py_DECREF(tp);
1476 }
1477
1478 PyDoc_STRVAR(Writer_Type_doc,
1479 "CSV writer\n"
1480 "\n"
1481 "Writer objects are responsible for generating tabular data\n"
1482 "in CSV format from sequence input.\n"
1483 );
1484
1485 static PyType_Slot Writer_Type_slots[] = {
1486 {Py_tp_doc, (char*)Writer_Type_doc},
1487 {Py_tp_traverse, Writer_traverse},
1488 {Py_tp_clear, Writer_clear},
1489 {Py_tp_dealloc, Writer_dealloc},
1490 {Py_tp_methods, Writer_methods},
1491 {Py_tp_members, Writer_memberlist},
1492 {0, NULL}
1493 };
1494
1495 PyType_Spec Writer_Type_spec = {
1496 .name = "_csv.writer",
1497 .basicsize = sizeof(WriterObj),
1498 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1499 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1500 .slots = Writer_Type_slots,
1501 };
1502
1503
1504 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1505 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1506 {
1507 PyObject * output_file, * dialect = NULL;
1508 _csvstate *module_state = get_csv_state(module);
1509 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1510
1511 if (!self)
1512 return NULL;
1513
1514 self->dialect = NULL;
1515 self->write = NULL;
1516
1517 self->rec = NULL;
1518 self->rec_size = 0;
1519 self->rec_len = 0;
1520 self->num_fields = 0;
1521
1522 self->error_obj = Py_NewRef(module_state->error_obj);
1523
1524 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1525 Py_DECREF(self);
1526 return NULL;
1527 }
1528 if (PyObject_GetOptionalAttr(output_file,
1529 module_state->str_write,
1530 &self->write) < 0) {
1531 Py_DECREF(self);
1532 return NULL;
1533 }
1534 if (self->write == NULL || !PyCallable_Check(self->write)) {
1535 PyErr_SetString(PyExc_TypeError,
1536 "argument 1 must have a \"write\" method");
1537 Py_DECREF(self);
1538 return NULL;
1539 }
1540 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1541 keyword_args);
1542 if (self->dialect == NULL) {
1543 Py_DECREF(self);
1544 return NULL;
1545 }
1546 PyObject_GC_Track(self);
1547 return (PyObject *)self;
1548 }
1549
1550 /*
1551 * DIALECT REGISTRY
1552 */
1553
1554 /*[clinic input]
1555 _csv.list_dialects
1556
1557 Return a list of all known dialect names.
1558
1559 names = csv.list_dialects()
1560 [clinic start generated code]*/
1561
1562 static PyObject *
_csv_list_dialects_impl(PyObject * module)1563 _csv_list_dialects_impl(PyObject *module)
1564 /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1565 {
1566 return PyDict_Keys(get_csv_state(module)->dialects);
1567 }
1568
1569 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1570 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1571 {
1572 PyObject *name_obj, *dialect_obj = NULL;
1573 _csvstate *module_state = get_csv_state(module);
1574 PyObject *dialect;
1575
1576 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1577 return NULL;
1578 if (!PyUnicode_Check(name_obj)) {
1579 PyErr_SetString(PyExc_TypeError,
1580 "dialect name must be a string");
1581 return NULL;
1582 }
1583 dialect = _call_dialect(module_state, dialect_obj, kwargs);
1584 if (dialect == NULL)
1585 return NULL;
1586 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1587 Py_DECREF(dialect);
1588 return NULL;
1589 }
1590 Py_DECREF(dialect);
1591 Py_RETURN_NONE;
1592 }
1593
1594
1595 /*[clinic input]
1596 _csv.unregister_dialect
1597
1598 name: object
1599
1600 Delete the name/dialect mapping associated with a string name.
1601
1602 csv.unregister_dialect(name)
1603 [clinic start generated code]*/
1604
1605 static PyObject *
_csv_unregister_dialect_impl(PyObject * module,PyObject * name)1606 _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1607 /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1608 {
1609 _csvstate *module_state = get_csv_state(module);
1610 int rc = PyDict_Pop(module_state->dialects, name, NULL);
1611 if (rc < 0) {
1612 return NULL;
1613 }
1614 if (rc == 0) {
1615 PyErr_Format(module_state->error_obj, "unknown dialect");
1616 return NULL;
1617 }
1618 Py_RETURN_NONE;
1619 }
1620
1621 /*[clinic input]
1622 _csv.get_dialect
1623
1624 name: object
1625
1626 Return the dialect instance associated with name.
1627
1628 dialect = csv.get_dialect(name)
1629 [clinic start generated code]*/
1630
1631 static PyObject *
_csv_get_dialect_impl(PyObject * module,PyObject * name)1632 _csv_get_dialect_impl(PyObject *module, PyObject *name)
1633 /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1634 {
1635 return get_dialect_from_registry(name, get_csv_state(module));
1636 }
1637
1638 /*[clinic input]
1639 _csv.field_size_limit
1640
1641 new_limit: object = NULL
1642
1643 Sets an upper limit on parsed fields.
1644
1645 csv.field_size_limit([limit])
1646
1647 Returns old limit. If limit is not given, no new limit is set and
1648 the old limit is returned
1649 [clinic start generated code]*/
1650
1651 static PyObject *
_csv_field_size_limit_impl(PyObject * module,PyObject * new_limit)1652 _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1653 /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1654 {
1655 _csvstate *module_state = get_csv_state(module);
1656 Py_ssize_t old_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit);
1657 if (new_limit != NULL) {
1658 if (!PyLong_CheckExact(new_limit)) {
1659 PyErr_Format(PyExc_TypeError,
1660 "limit must be an integer");
1661 return NULL;
1662 }
1663 Py_ssize_t new_limit_value = PyLong_AsSsize_t(new_limit);
1664 if (new_limit_value == -1 && PyErr_Occurred()) {
1665 return NULL;
1666 }
1667 FT_ATOMIC_STORE_SSIZE_RELAXED(module_state->field_limit, new_limit_value);
1668 }
1669 return PyLong_FromSsize_t(old_limit);
1670 }
1671
1672 static PyType_Slot error_slots[] = {
1673 {0, NULL},
1674 };
1675
1676 PyType_Spec error_spec = {
1677 .name = "_csv.Error",
1678 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1679 .slots = error_slots,
1680 };
1681
1682 /*
1683 * MODULE
1684 */
1685
1686 PyDoc_STRVAR(csv_module_doc, "CSV parsing and writing.\n");
1687
1688 PyDoc_STRVAR(csv_reader_doc,
1689 " csv_reader = reader(iterable [, dialect='excel']\n"
1690 " [optional keyword args])\n"
1691 " for row in csv_reader:\n"
1692 " process(row)\n"
1693 "\n"
1694 "The \"iterable\" argument can be any object that returns a line\n"
1695 "of input for each iteration, such as a file object or a list. The\n"
1696 "optional \"dialect\" parameter is discussed below. The function\n"
1697 "also accepts optional keyword arguments which override settings\n"
1698 "provided by the dialect.\n"
1699 "\n"
1700 "The returned object is an iterator. Each iteration returns a row\n"
1701 "of the CSV file (which can span multiple input lines).\n");
1702
1703 PyDoc_STRVAR(csv_writer_doc,
1704 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1705 " [optional keyword args])\n"
1706 " for row in sequence:\n"
1707 " csv_writer.writerow(row)\n"
1708 "\n"
1709 " [or]\n"
1710 "\n"
1711 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1712 " [optional keyword args])\n"
1713 " csv_writer.writerows(rows)\n"
1714 "\n"
1715 "The \"fileobj\" argument can be any object that supports the file API.\n");
1716
1717 PyDoc_STRVAR(csv_register_dialect_doc,
1718 "Create a mapping from a string name to a dialect class.\n"
1719 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1720
1721 static struct PyMethodDef csv_methods[] = {
1722 { "reader", _PyCFunction_CAST(csv_reader),
1723 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1724 { "writer", _PyCFunction_CAST(csv_writer),
1725 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1726 { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1727 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1728 _CSV_LIST_DIALECTS_METHODDEF
1729 _CSV_UNREGISTER_DIALECT_METHODDEF
1730 _CSV_GET_DIALECT_METHODDEF
1731 _CSV_FIELD_SIZE_LIMIT_METHODDEF
1732 { NULL, NULL }
1733 };
1734
1735 static int
csv_exec(PyObject * module)1736 csv_exec(PyObject *module) {
1737 const StyleDesc *style;
1738 PyObject *temp;
1739 _csvstate *module_state = get_csv_state(module);
1740
1741 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1742 module_state->dialect_type = (PyTypeObject *)temp;
1743 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1744 return -1;
1745 }
1746
1747 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1748 module_state->reader_type = (PyTypeObject *)temp;
1749 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1750 return -1;
1751 }
1752
1753 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1754 module_state->writer_type = (PyTypeObject *)temp;
1755 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1756 return -1;
1757 }
1758
1759 /* Set the field limit */
1760 module_state->field_limit = 128 * 1024;
1761
1762 /* Add _dialects dictionary */
1763 module_state->dialects = PyDict_New();
1764 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1765 return -1;
1766 }
1767
1768 /* Add quote styles into dictionary */
1769 for (style = quote_styles; style->name; style++) {
1770 if (PyModule_AddIntConstant(module, style->name,
1771 style->style) == -1)
1772 return -1;
1773 }
1774
1775 /* Add the CSV exception object to the module. */
1776 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1777 if (bases == NULL) {
1778 return -1;
1779 }
1780 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1781 bases);
1782 Py_DECREF(bases);
1783 if (module_state->error_obj == NULL) {
1784 return -1;
1785 }
1786 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1787 return -1;
1788 }
1789
1790 module_state->str_write = PyUnicode_InternFromString("write");
1791 if (module_state->str_write == NULL) {
1792 return -1;
1793 }
1794 return 0;
1795 }
1796
1797 static PyModuleDef_Slot csv_slots[] = {
1798 {Py_mod_exec, csv_exec},
1799 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1800 {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1801 {0, NULL}
1802 };
1803
1804 static struct PyModuleDef _csvmodule = {
1805 PyModuleDef_HEAD_INIT,
1806 "_csv",
1807 csv_module_doc,
1808 sizeof(_csvstate),
1809 csv_methods,
1810 csv_slots,
1811 _csv_traverse,
1812 _csv_clear,
1813 _csv_free
1814 };
1815
1816 PyMODINIT_FUNC
PyInit__csv(void)1817 PyInit__csv(void)
1818 {
1819 return PyModuleDef_Init(&_csvmodule);
1820 }
1821