• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 // clinic/_csv.c.h uses internal pycore_modsupport.h API
12 #ifndef Py_BUILD_CORE_BUILTIN
13 #  define Py_BUILD_CORE_MODULE 1
14 #endif
15 
16 #include "Python.h"
17 #include "pycore_pyatomic_ft_wrappers.h"
18 
19 #include <stddef.h>               // offsetof()
20 #include <stdbool.h>
21 
22 /*[clinic input]
23 module _csv
24 [clinic start generated code]*/
25 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
26 
27 #include "clinic/_csv.c.h"
28 #define NOT_SET ((Py_UCS4)-1)
29 #define EOL ((Py_UCS4)-2)
30 
31 
32 typedef struct {
33     PyObject *error_obj;   /* CSV exception */
34     PyObject *dialects;   /* Dialect registry */
35     PyTypeObject *dialect_type;
36     PyTypeObject *reader_type;
37     PyTypeObject *writer_type;
38     Py_ssize_t field_limit;   /* max parsed field size */
39     PyObject *str_write;
40 } _csvstate;
41 
42 static struct PyModuleDef _csvmodule;
43 
44 static inline _csvstate*
get_csv_state(PyObject * module)45 get_csv_state(PyObject *module)
46 {
47     void *state = PyModule_GetState(module);
48     assert(state != NULL);
49     return (_csvstate *)state;
50 }
51 
52 static int
_csv_clear(PyObject * module)53 _csv_clear(PyObject *module)
54 {
55     _csvstate *module_state = PyModule_GetState(module);
56     Py_CLEAR(module_state->error_obj);
57     Py_CLEAR(module_state->dialects);
58     Py_CLEAR(module_state->dialect_type);
59     Py_CLEAR(module_state->reader_type);
60     Py_CLEAR(module_state->writer_type);
61     Py_CLEAR(module_state->str_write);
62     return 0;
63 }
64 
65 static int
_csv_traverse(PyObject * module,visitproc visit,void * arg)66 _csv_traverse(PyObject *module, visitproc visit, void *arg)
67 {
68     _csvstate *module_state = PyModule_GetState(module);
69     Py_VISIT(module_state->error_obj);
70     Py_VISIT(module_state->dialects);
71     Py_VISIT(module_state->dialect_type);
72     Py_VISIT(module_state->reader_type);
73     Py_VISIT(module_state->writer_type);
74     return 0;
75 }
76 
77 static void
_csv_free(void * module)78 _csv_free(void *module)
79 {
80    _csv_clear((PyObject *)module);
81 }
82 
83 typedef enum {
84     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
85     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
86     EAT_CRNL,AFTER_ESCAPED_CRNL
87 } ParserState;
88 
89 typedef enum {
90     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
91     QUOTE_STRINGS, QUOTE_NOTNULL
92 } QuoteStyle;
93 
94 typedef struct {
95     QuoteStyle style;
96     const char *name;
97 } StyleDesc;
98 
99 static const StyleDesc quote_styles[] = {
100     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
101     { QUOTE_ALL,        "QUOTE_ALL" },
102     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
103     { QUOTE_NONE,       "QUOTE_NONE" },
104     { QUOTE_STRINGS,    "QUOTE_STRINGS" },
105     { QUOTE_NOTNULL,    "QUOTE_NOTNULL" },
106     { 0 }
107 };
108 
109 typedef struct {
110     PyObject_HEAD
111 
112     char doublequote;           /* is " represented by ""? */
113     char skipinitialspace;      /* ignore spaces following delimiter? */
114     char strict;                /* raise exception on bad CSV */
115     int quoting;                /* style of quoting to write */
116     Py_UCS4 delimiter;          /* field separator */
117     Py_UCS4 quotechar;          /* quote character */
118     Py_UCS4 escapechar;         /* escape character */
119     PyObject *lineterminator;   /* string to write between records */
120 
121 } DialectObj;
122 
123 typedef struct {
124     PyObject_HEAD
125 
126     PyObject *input_iter;   /* iterate over this for input lines */
127 
128     DialectObj *dialect;    /* parsing dialect */
129 
130     PyObject *fields;           /* field list for current record */
131     ParserState state;          /* current CSV parse state */
132     Py_UCS4 *field;             /* temporary buffer */
133     Py_ssize_t field_size;      /* size of allocated buffer */
134     Py_ssize_t field_len;       /* length of current field */
135     bool unquoted_field;        /* true if no quotes around the current field */
136     unsigned long line_num;     /* Source-file line number */
137 } ReaderObj;
138 
139 typedef struct {
140     PyObject_HEAD
141 
142     PyObject *write;    /* write output lines to this file */
143 
144     DialectObj *dialect;    /* parsing dialect */
145 
146     Py_UCS4 *rec;            /* buffer for parser.join */
147     Py_ssize_t rec_size;        /* size of allocated record */
148     Py_ssize_t rec_len;         /* length of record */
149     int num_fields;             /* number of fields in record */
150 
151     PyObject *error_obj;       /* cached error object */
152 } WriterObj;
153 
154 /*
155  * DIALECT class
156  */
157 
158 static PyObject *
get_dialect_from_registry(PyObject * name_obj,_csvstate * module_state)159 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
160 {
161     PyObject *dialect_obj;
162     if (PyDict_GetItemRef(module_state->dialects, name_obj, &dialect_obj) == 0) {
163         PyErr_SetString(module_state->error_obj, "unknown dialect");
164     }
165     return dialect_obj;
166 }
167 
168 static PyObject *
get_char_or_None(Py_UCS4 c)169 get_char_or_None(Py_UCS4 c)
170 {
171     if (c == NOT_SET) {
172         Py_RETURN_NONE;
173     }
174     else
175         return PyUnicode_FromOrdinal(c);
176 }
177 
178 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))179 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
180 {
181     return Py_XNewRef(self->lineterminator);
182 }
183 
184 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))185 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
186 {
187     return get_char_or_None(self->delimiter);
188 }
189 
190 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))191 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
192 {
193     return get_char_or_None(self->escapechar);
194 }
195 
196 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))197 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
198 {
199     return get_char_or_None(self->quotechar);
200 }
201 
202 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))203 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
204 {
205     return PyLong_FromLong(self->quoting);
206 }
207 
208 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)209 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
210 {
211     if (src == NULL)
212         *target = dflt;
213     else {
214         int b = PyObject_IsTrue(src);
215         if (b < 0)
216             return -1;
217         *target = (char)b;
218     }
219     return 0;
220 }
221 
222 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)223 _set_int(const char *name, int *target, PyObject *src, int dflt)
224 {
225     if (src == NULL)
226         *target = dflt;
227     else {
228         int value;
229         if (!PyLong_CheckExact(src)) {
230             PyErr_Format(PyExc_TypeError,
231                          "\"%s\" must be an integer", name);
232             return -1;
233         }
234         value = PyLong_AsInt(src);
235         if (value == -1 && PyErr_Occurred()) {
236             return -1;
237         }
238         *target = value;
239     }
240     return 0;
241 }
242 
243 static int
_set_char_or_none(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)244 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
245 {
246     if (src == NULL) {
247         *target = dflt;
248     }
249     else {
250         *target = NOT_SET;
251         if (src != Py_None) {
252             if (!PyUnicode_Check(src)) {
253                 PyErr_Format(PyExc_TypeError,
254                     "\"%s\" must be string or None, not %.200s", name,
255                     Py_TYPE(src)->tp_name);
256                 return -1;
257             }
258             Py_ssize_t len = PyUnicode_GetLength(src);
259             if (len < 0) {
260                 return -1;
261             }
262             if (len != 1) {
263                 PyErr_Format(PyExc_TypeError,
264                     "\"%s\" must be a 1-character string",
265                     name);
266                 return -1;
267             }
268             *target = PyUnicode_READ_CHAR(src, 0);
269         }
270     }
271     return 0;
272 }
273 
274 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)275 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
276 {
277     if (src == NULL) {
278         *target = dflt;
279     }
280     else {
281         if (!PyUnicode_Check(src)) {
282             PyErr_Format(PyExc_TypeError,
283                          "\"%s\" must be string, not %.200s", name,
284                          Py_TYPE(src)->tp_name);
285                 return -1;
286         }
287         Py_ssize_t len = PyUnicode_GetLength(src);
288         if (len < 0) {
289             return -1;
290         }
291         if (len != 1) {
292             PyErr_Format(PyExc_TypeError,
293                          "\"%s\" must be a 1-character string",
294                          name);
295             return -1;
296         }
297         *target = PyUnicode_READ_CHAR(src, 0);
298     }
299     return 0;
300 }
301 
302 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)303 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
304 {
305     if (src == NULL)
306         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
307     else {
308         if (src == Py_None)
309             *target = NULL;
310         else if (!PyUnicode_Check(src)) {
311             PyErr_Format(PyExc_TypeError,
312                          "\"%s\" must be a string", name);
313             return -1;
314         }
315         else {
316             Py_XSETREF(*target, Py_NewRef(src));
317         }
318     }
319     return 0;
320 }
321 
322 static int
dialect_check_quoting(int quoting)323 dialect_check_quoting(int quoting)
324 {
325     const StyleDesc *qs;
326 
327     for (qs = quote_styles; qs->name; qs++) {
328         if ((int)qs->style == quoting)
329             return 0;
330     }
331     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
332     return -1;
333 }
334 
335 static int
dialect_check_char(const char * name,Py_UCS4 c,DialectObj * dialect,bool allowspace)336 dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect, bool allowspace)
337 {
338     if (c == '\r' || c == '\n' || (c == ' ' && !allowspace)) {
339         PyErr_Format(PyExc_ValueError, "bad %s value", name);
340         return -1;
341     }
342     if (PyUnicode_FindChar(
343         dialect->lineterminator, c, 0,
344         PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
345     {
346         PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
347         return -1;
348     }
349     return 0;
350 }
351 
352  static int
dialect_check_chars(const char * name1,const char * name2,Py_UCS4 c1,Py_UCS4 c2)353 dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
354 {
355     if (c1 == c2 && c1 != NOT_SET) {
356         PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
357         return -1;
358     }
359     return 0;
360 }
361 
362 #define D_OFF(x) offsetof(DialectObj, x)
363 
364 static struct PyMemberDef Dialect_memberlist[] = {
365     { "skipinitialspace",   Py_T_BOOL, D_OFF(skipinitialspace), Py_READONLY },
366     { "doublequote",        Py_T_BOOL, D_OFF(doublequote), Py_READONLY },
367     { "strict",             Py_T_BOOL, D_OFF(strict), Py_READONLY },
368     { NULL }
369 };
370 
371 static PyGetSetDef Dialect_getsetlist[] = {
372     { "delimiter",          (getter)Dialect_get_delimiter},
373     { "escapechar",             (getter)Dialect_get_escapechar},
374     { "lineterminator",         (getter)Dialect_get_lineterminator},
375     { "quotechar",              (getter)Dialect_get_quotechar},
376     { "quoting",                (getter)Dialect_get_quoting},
377     {NULL},
378 };
379 
380 static void
Dialect_dealloc(DialectObj * self)381 Dialect_dealloc(DialectObj *self)
382 {
383     PyTypeObject *tp = Py_TYPE(self);
384     PyObject_GC_UnTrack(self);
385     tp->tp_clear((PyObject *)self);
386     PyObject_GC_Del(self);
387     Py_DECREF(tp);
388 }
389 
390 static char *dialect_kws[] = {
391     "dialect",
392     "delimiter",
393     "doublequote",
394     "escapechar",
395     "lineterminator",
396     "quotechar",
397     "quoting",
398     "skipinitialspace",
399     "strict",
400     NULL
401 };
402 
403 static _csvstate *
_csv_state_from_type(PyTypeObject * type,const char * name)404 _csv_state_from_type(PyTypeObject *type, const char *name)
405 {
406     PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
407     if (module == NULL) {
408         return NULL;
409     }
410     _csvstate *module_state = PyModule_GetState(module);
411     if (module_state == NULL) {
412         PyErr_Format(PyExc_SystemError,
413                      "%s: No _csv module state found", name);
414         return NULL;
415     }
416     return module_state;
417 }
418 
419 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)420 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
421 {
422     DialectObj *self;
423     PyObject *ret = NULL;
424     PyObject *dialect = NULL;
425     PyObject *delimiter = NULL;
426     PyObject *doublequote = NULL;
427     PyObject *escapechar = NULL;
428     PyObject *lineterminator = NULL;
429     PyObject *quotechar = NULL;
430     PyObject *quoting = NULL;
431     PyObject *skipinitialspace = NULL;
432     PyObject *strict = NULL;
433 
434     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
435                                      "|OOOOOOOOO", dialect_kws,
436                                      &dialect,
437                                      &delimiter,
438                                      &doublequote,
439                                      &escapechar,
440                                      &lineterminator,
441                                      &quotechar,
442                                      &quoting,
443                                      &skipinitialspace,
444                                      &strict))
445         return NULL;
446 
447     _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
448     if (module_state == NULL) {
449         return NULL;
450     }
451 
452     if (dialect != NULL) {
453         if (PyUnicode_Check(dialect)) {
454             dialect = get_dialect_from_registry(dialect, module_state);
455             if (dialect == NULL)
456                 return NULL;
457         }
458         else
459             Py_INCREF(dialect);
460         /* Can we reuse this instance? */
461         if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
462             delimiter == NULL &&
463             doublequote == NULL &&
464             escapechar == NULL &&
465             lineterminator == NULL &&
466             quotechar == NULL &&
467             quoting == NULL &&
468             skipinitialspace == NULL &&
469             strict == NULL)
470             return dialect;
471     }
472 
473     self = (DialectObj *)type->tp_alloc(type, 0);
474     if (self == NULL) {
475         Py_CLEAR(dialect);
476         return NULL;
477     }
478     self->lineterminator = NULL;
479 
480     Py_XINCREF(delimiter);
481     Py_XINCREF(doublequote);
482     Py_XINCREF(escapechar);
483     Py_XINCREF(lineterminator);
484     Py_XINCREF(quotechar);
485     Py_XINCREF(quoting);
486     Py_XINCREF(skipinitialspace);
487     Py_XINCREF(strict);
488     if (dialect != NULL) {
489 #define DIALECT_GETATTR(v, n)                            \
490         do {                                             \
491             if (v == NULL) {                             \
492                 v = PyObject_GetAttrString(dialect, n);  \
493                 if (v == NULL)                           \
494                     PyErr_Clear();                       \
495             }                                            \
496         } while (0)
497         DIALECT_GETATTR(delimiter, "delimiter");
498         DIALECT_GETATTR(doublequote, "doublequote");
499         DIALECT_GETATTR(escapechar, "escapechar");
500         DIALECT_GETATTR(lineterminator, "lineterminator");
501         DIALECT_GETATTR(quotechar, "quotechar");
502         DIALECT_GETATTR(quoting, "quoting");
503         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
504         DIALECT_GETATTR(strict, "strict");
505     }
506 
507     /* check types and convert to C values */
508 #define DIASET(meth, name, target, src, dflt) \
509     if (meth(name, target, src, dflt)) \
510         goto err
511     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
512     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
513     DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
514     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
515     DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
516     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
517     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
518     DIASET(_set_bool, "strict", &self->strict, strict, false);
519 
520     /* validate options */
521     if (dialect_check_quoting(self->quoting))
522         goto err;
523     if (self->delimiter == NOT_SET) {
524         PyErr_SetString(PyExc_TypeError,
525                         "\"delimiter\" must be a 1-character string");
526         goto err;
527     }
528     if (quotechar == Py_None && quoting == NULL)
529         self->quoting = QUOTE_NONE;
530     if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
531         PyErr_SetString(PyExc_TypeError,
532                         "quotechar must be set if quoting enabled");
533         goto err;
534     }
535     if (self->lineterminator == NULL) {
536         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
537         goto err;
538     }
539     if (dialect_check_char("delimiter", self->delimiter, self, true) ||
540         dialect_check_char("escapechar", self->escapechar, self,
541                            !self->skipinitialspace) ||
542         dialect_check_char("quotechar", self->quotechar, self,
543                            !self->skipinitialspace) ||
544         dialect_check_chars("delimiter", "escapechar",
545                             self->delimiter, self->escapechar) ||
546         dialect_check_chars("delimiter", "quotechar",
547                             self->delimiter, self->quotechar) ||
548         dialect_check_chars("escapechar", "quotechar",
549                             self->escapechar, self->quotechar))
550     {
551         goto err;
552     }
553 
554     ret = Py_NewRef(self);
555 err:
556     Py_CLEAR(self);
557     Py_CLEAR(dialect);
558     Py_CLEAR(delimiter);
559     Py_CLEAR(doublequote);
560     Py_CLEAR(escapechar);
561     Py_CLEAR(lineterminator);
562     Py_CLEAR(quotechar);
563     Py_CLEAR(quoting);
564     Py_CLEAR(skipinitialspace);
565     Py_CLEAR(strict);
566     return ret;
567 }
568 
569 /* Since dialect is now a heap type, it inherits pickling method for
570  * protocol 0 and 1 from object, therefore it needs to be overridden */
571 
572 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
573 
574 static PyObject *
Dialect_reduce(PyObject * self,PyObject * args)575 Dialect_reduce(PyObject *self, PyObject *args) {
576     PyErr_Format(PyExc_TypeError,
577         "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
578     return NULL;
579 }
580 
581 static struct PyMethodDef dialect_methods[] = {
582     {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
583     {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
584     {NULL, NULL}
585 };
586 
587 PyDoc_STRVAR(Dialect_Type_doc,
588 "CSV dialect\n"
589 "\n"
590 "The Dialect type records CSV parsing and generation options.\n");
591 
592 static int
Dialect_clear(DialectObj * self)593 Dialect_clear(DialectObj *self)
594 {
595     Py_CLEAR(self->lineterminator);
596     return 0;
597 }
598 
599 static int
Dialect_traverse(DialectObj * self,visitproc visit,void * arg)600 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
601 {
602     Py_VISIT(self->lineterminator);
603     Py_VISIT(Py_TYPE(self));
604     return 0;
605 }
606 
607 static PyType_Slot Dialect_Type_slots[] = {
608     {Py_tp_doc, (char*)Dialect_Type_doc},
609     {Py_tp_members, Dialect_memberlist},
610     {Py_tp_getset, Dialect_getsetlist},
611     {Py_tp_new, dialect_new},
612     {Py_tp_methods, dialect_methods},
613     {Py_tp_dealloc, Dialect_dealloc},
614     {Py_tp_clear, Dialect_clear},
615     {Py_tp_traverse, Dialect_traverse},
616     {0, NULL}
617 };
618 
619 PyType_Spec Dialect_Type_spec = {
620     .name = "_csv.Dialect",
621     .basicsize = sizeof(DialectObj),
622     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
623               Py_TPFLAGS_IMMUTABLETYPE),
624     .slots = Dialect_Type_slots,
625 };
626 
627 
628 /*
629  * Return an instance of the dialect type, given a Python instance or kwarg
630  * description of the dialect
631  */
632 static PyObject *
_call_dialect(_csvstate * module_state,PyObject * dialect_inst,PyObject * kwargs)633 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
634 {
635     PyObject *type = (PyObject *)module_state->dialect_type;
636     if (dialect_inst) {
637         return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
638     }
639     else {
640         return PyObject_VectorcallDict(type, NULL, 0, kwargs);
641     }
642 }
643 
644 /*
645  * READER
646  */
647 static int
parse_save_field(ReaderObj * self)648 parse_save_field(ReaderObj *self)
649 {
650     int quoting = self->dialect->quoting;
651     PyObject *field;
652 
653     if (self->unquoted_field &&
654         self->field_len == 0 &&
655         (quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
656     {
657         field = Py_NewRef(Py_None);
658     }
659     else {
660         field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
661                                         (void *) self->field, self->field_len);
662         if (field == NULL) {
663             return -1;
664         }
665         if (self->unquoted_field &&
666             self->field_len != 0 &&
667             (quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
668         {
669             PyObject *tmp = PyNumber_Float(field);
670             Py_DECREF(field);
671             if (tmp == NULL) {
672                 return -1;
673             }
674             field = tmp;
675         }
676         self->field_len = 0;
677     }
678     if (PyList_Append(self->fields, field) < 0) {
679         Py_DECREF(field);
680         return -1;
681     }
682     Py_DECREF(field);
683     return 0;
684 }
685 
686 static int
parse_grow_buff(ReaderObj * self)687 parse_grow_buff(ReaderObj *self)
688 {
689     assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
690 
691     Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
692     Py_UCS4 *field_new = self->field;
693     PyMem_Resize(field_new, Py_UCS4, field_size_new);
694     if (field_new == NULL) {
695         PyErr_NoMemory();
696         return 0;
697     }
698     self->field = field_new;
699     self->field_size = field_size_new;
700     return 1;
701 }
702 
703 static int
parse_add_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)704 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
705 {
706     Py_ssize_t field_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit);
707     if (self->field_len >= field_limit) {
708         PyErr_Format(module_state->error_obj,
709                      "field larger than field limit (%zd)",
710                      field_limit);
711         return -1;
712     }
713     if (self->field_len == self->field_size && !parse_grow_buff(self))
714         return -1;
715     self->field[self->field_len++] = c;
716     return 0;
717 }
718 
719 static int
parse_process_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)720 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
721 {
722     DialectObj *dialect = self->dialect;
723 
724     switch (self->state) {
725     case START_RECORD:
726         /* start of record */
727         if (c == EOL)
728             /* empty line - return [] */
729             break;
730         else if (c == '\n' || c == '\r') {
731             self->state = EAT_CRNL;
732             break;
733         }
734         /* normal character - handle as START_FIELD */
735         self->state = START_FIELD;
736         /* fallthru */
737     case START_FIELD:
738         /* expecting field */
739         self->unquoted_field = true;
740         if (c == '\n' || c == '\r' || c == EOL) {
741             /* save empty field - return [fields] */
742             if (parse_save_field(self) < 0)
743                 return -1;
744             self->state = (c == EOL ? START_RECORD : EAT_CRNL);
745         }
746         else if (c == dialect->quotechar &&
747                  dialect->quoting != QUOTE_NONE) {
748             /* start quoted field */
749             self->unquoted_field = false;
750             self->state = IN_QUOTED_FIELD;
751         }
752         else if (c == dialect->escapechar) {
753             /* possible escaped character */
754             self->state = ESCAPED_CHAR;
755         }
756         else if (c == ' ' && dialect->skipinitialspace)
757             /* ignore spaces at start of field */
758             ;
759         else if (c == dialect->delimiter) {
760             /* save empty field */
761             if (parse_save_field(self) < 0)
762                 return -1;
763         }
764         else {
765             /* begin new unquoted field */
766             if (parse_add_char(self, module_state, c) < 0)
767                 return -1;
768             self->state = IN_FIELD;
769         }
770         break;
771 
772     case ESCAPED_CHAR:
773         if (c == '\n' || c=='\r') {
774             if (parse_add_char(self, module_state, c) < 0)
775                 return -1;
776             self->state = AFTER_ESCAPED_CRNL;
777             break;
778         }
779         if (c == EOL)
780             c = '\n';
781         if (parse_add_char(self, module_state, c) < 0)
782             return -1;
783         self->state = IN_FIELD;
784         break;
785 
786     case AFTER_ESCAPED_CRNL:
787         if (c == EOL)
788             break;
789         /*fallthru*/
790 
791     case IN_FIELD:
792         /* in unquoted field */
793         if (c == '\n' || c == '\r' || c == EOL) {
794             /* end of line - return [fields] */
795             if (parse_save_field(self) < 0)
796                 return -1;
797             self->state = (c == EOL ? START_RECORD : EAT_CRNL);
798         }
799         else if (c == dialect->escapechar) {
800             /* possible escaped character */
801             self->state = ESCAPED_CHAR;
802         }
803         else if (c == dialect->delimiter) {
804             /* save field - wait for new field */
805             if (parse_save_field(self) < 0)
806                 return -1;
807             self->state = START_FIELD;
808         }
809         else {
810             /* normal character - save in field */
811             if (parse_add_char(self, module_state, c) < 0)
812                 return -1;
813         }
814         break;
815 
816     case IN_QUOTED_FIELD:
817         /* in quoted field */
818         if (c == EOL)
819             ;
820         else if (c == dialect->escapechar) {
821             /* Possible escape character */
822             self->state = ESCAPE_IN_QUOTED_FIELD;
823         }
824         else if (c == dialect->quotechar &&
825                  dialect->quoting != QUOTE_NONE) {
826             if (dialect->doublequote) {
827                 /* doublequote; " represented by "" */
828                 self->state = QUOTE_IN_QUOTED_FIELD;
829             }
830             else {
831                 /* end of quote part of field */
832                 self->state = IN_FIELD;
833             }
834         }
835         else {
836             /* normal character - save in field */
837             if (parse_add_char(self, module_state, c) < 0)
838                 return -1;
839         }
840         break;
841 
842     case ESCAPE_IN_QUOTED_FIELD:
843         if (c == EOL)
844             c = '\n';
845         if (parse_add_char(self, module_state, c) < 0)
846             return -1;
847         self->state = IN_QUOTED_FIELD;
848         break;
849 
850     case QUOTE_IN_QUOTED_FIELD:
851         /* doublequote - seen a quote in a quoted field */
852         if (dialect->quoting != QUOTE_NONE &&
853             c == dialect->quotechar) {
854             /* save "" as " */
855             if (parse_add_char(self, module_state, c) < 0)
856                 return -1;
857             self->state = IN_QUOTED_FIELD;
858         }
859         else if (c == dialect->delimiter) {
860             /* save field - wait for new field */
861             if (parse_save_field(self) < 0)
862                 return -1;
863             self->state = START_FIELD;
864         }
865         else if (c == '\n' || c == '\r' || c == EOL) {
866             /* end of line - return [fields] */
867             if (parse_save_field(self) < 0)
868                 return -1;
869             self->state = (c == EOL ? START_RECORD : EAT_CRNL);
870         }
871         else if (!dialect->strict) {
872             if (parse_add_char(self, module_state, c) < 0)
873                 return -1;
874             self->state = IN_FIELD;
875         }
876         else {
877             /* illegal */
878             PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
879                             dialect->delimiter,
880                             dialect->quotechar);
881             return -1;
882         }
883         break;
884 
885     case EAT_CRNL:
886         if (c == '\n' || c == '\r')
887             ;
888         else if (c == EOL)
889             self->state = START_RECORD;
890         else {
891             PyErr_Format(module_state->error_obj,
892                          "new-line character seen in unquoted field - "
893                          "do you need to open the file with newline=''?");
894             return -1;
895         }
896         break;
897 
898     }
899     return 0;
900 }
901 
902 static int
parse_reset(ReaderObj * self)903 parse_reset(ReaderObj *self)
904 {
905     Py_XSETREF(self->fields, PyList_New(0));
906     if (self->fields == NULL)
907         return -1;
908     self->field_len = 0;
909     self->state = START_RECORD;
910     self->unquoted_field = false;
911     return 0;
912 }
913 
914 static PyObject *
Reader_iternext(ReaderObj * self)915 Reader_iternext(ReaderObj *self)
916 {
917     PyObject *fields = NULL;
918     Py_UCS4 c;
919     Py_ssize_t pos, linelen;
920     int kind;
921     const void *data;
922     PyObject *lineobj;
923 
924     _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
925                                                    "Reader.__next__");
926     if (module_state == NULL) {
927         return NULL;
928     }
929 
930     if (parse_reset(self) < 0)
931         return NULL;
932     do {
933         lineobj = PyIter_Next(self->input_iter);
934         if (lineobj == NULL) {
935             /* End of input OR exception */
936             if (!PyErr_Occurred() && (self->field_len != 0 ||
937                                       self->state == IN_QUOTED_FIELD)) {
938                 if (self->dialect->strict)
939                     PyErr_SetString(module_state->error_obj,
940                                     "unexpected end of data");
941                 else if (parse_save_field(self) >= 0)
942                     break;
943             }
944             return NULL;
945         }
946         if (!PyUnicode_Check(lineobj)) {
947             PyErr_Format(module_state->error_obj,
948                          "iterator should return strings, "
949                          "not %.200s "
950                          "(the file should be opened in text mode)",
951                          Py_TYPE(lineobj)->tp_name
952                 );
953             Py_DECREF(lineobj);
954             return NULL;
955         }
956         ++self->line_num;
957         kind = PyUnicode_KIND(lineobj);
958         data = PyUnicode_DATA(lineobj);
959         pos = 0;
960         linelen = PyUnicode_GET_LENGTH(lineobj);
961         while (linelen--) {
962             c = PyUnicode_READ(kind, data, pos);
963             if (parse_process_char(self, module_state, c) < 0) {
964                 Py_DECREF(lineobj);
965                 goto err;
966             }
967             pos++;
968         }
969         Py_DECREF(lineobj);
970         if (parse_process_char(self, module_state, EOL) < 0)
971             goto err;
972     } while (self->state != START_RECORD);
973 
974     fields = self->fields;
975     self->fields = NULL;
976 err:
977     return fields;
978 }
979 
980 static void
Reader_dealloc(ReaderObj * self)981 Reader_dealloc(ReaderObj *self)
982 {
983     PyTypeObject *tp = Py_TYPE(self);
984     PyObject_GC_UnTrack(self);
985     tp->tp_clear((PyObject *)self);
986     if (self->field != NULL) {
987         PyMem_Free(self->field);
988         self->field = NULL;
989     }
990     PyObject_GC_Del(self);
991     Py_DECREF(tp);
992 }
993 
994 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)995 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
996 {
997     Py_VISIT(self->dialect);
998     Py_VISIT(self->input_iter);
999     Py_VISIT(self->fields);
1000     Py_VISIT(Py_TYPE(self));
1001     return 0;
1002 }
1003 
1004 static int
Reader_clear(ReaderObj * self)1005 Reader_clear(ReaderObj *self)
1006 {
1007     Py_CLEAR(self->dialect);
1008     Py_CLEAR(self->input_iter);
1009     Py_CLEAR(self->fields);
1010     return 0;
1011 }
1012 
1013 PyDoc_STRVAR(Reader_Type_doc,
1014 "CSV reader\n"
1015 "\n"
1016 "Reader objects are responsible for reading and parsing tabular data\n"
1017 "in CSV format.\n"
1018 );
1019 
1020 static struct PyMethodDef Reader_methods[] = {
1021     { NULL, NULL }
1022 };
1023 #define R_OFF(x) offsetof(ReaderObj, x)
1024 
1025 static struct PyMemberDef Reader_memberlist[] = {
1026     { "dialect", _Py_T_OBJECT, R_OFF(dialect), Py_READONLY },
1027     { "line_num", Py_T_ULONG, R_OFF(line_num), Py_READONLY },
1028     { NULL }
1029 };
1030 
1031 
1032 static PyType_Slot Reader_Type_slots[] = {
1033     {Py_tp_doc, (char*)Reader_Type_doc},
1034     {Py_tp_traverse, Reader_traverse},
1035     {Py_tp_iter, PyObject_SelfIter},
1036     {Py_tp_iternext, Reader_iternext},
1037     {Py_tp_methods, Reader_methods},
1038     {Py_tp_members, Reader_memberlist},
1039     {Py_tp_clear, Reader_clear},
1040     {Py_tp_dealloc, Reader_dealloc},
1041     {0, NULL}
1042 };
1043 
1044 PyType_Spec Reader_Type_spec = {
1045     .name = "_csv.reader",
1046     .basicsize = sizeof(ReaderObj),
1047     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1048               Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1049     .slots = Reader_Type_slots
1050 };
1051 
1052 
1053 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)1054 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1055 {
1056     PyObject * iterator, * dialect = NULL;
1057     _csvstate *module_state = get_csv_state(module);
1058     ReaderObj * self = PyObject_GC_New(
1059         ReaderObj,
1060         module_state->reader_type);
1061 
1062     if (!self)
1063         return NULL;
1064 
1065     self->dialect = NULL;
1066     self->fields = NULL;
1067     self->input_iter = NULL;
1068     self->field = NULL;
1069     self->field_size = 0;
1070     self->line_num = 0;
1071 
1072     if (parse_reset(self) < 0) {
1073         Py_DECREF(self);
1074         return NULL;
1075     }
1076 
1077     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1078         Py_DECREF(self);
1079         return NULL;
1080     }
1081     self->input_iter = PyObject_GetIter(iterator);
1082     if (self->input_iter == NULL) {
1083         Py_DECREF(self);
1084         return NULL;
1085     }
1086     self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1087                                                 keyword_args);
1088     if (self->dialect == NULL) {
1089         Py_DECREF(self);
1090         return NULL;
1091     }
1092 
1093     PyObject_GC_Track(self);
1094     return (PyObject *)self;
1095 }
1096 
1097 /*
1098  * WRITER
1099  */
1100 /* ---------------------------------------------------------------- */
1101 static void
join_reset(WriterObj * self)1102 join_reset(WriterObj *self)
1103 {
1104     self->rec_len = 0;
1105     self->num_fields = 0;
1106 }
1107 
1108 #define MEM_INCR 32768
1109 
1110 /* Calculate new record length or append field to record.  Return new
1111  * record length.
1112  */
1113 static Py_ssize_t
join_append_data(WriterObj * self,int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1114 join_append_data(WriterObj *self, int field_kind, const void *field_data,
1115                  Py_ssize_t field_len, int *quoted,
1116                  int copy_phase)
1117 {
1118     DialectObj *dialect = self->dialect;
1119     int i;
1120     Py_ssize_t rec_len;
1121 
1122 #define INCLEN \
1123     do {\
1124         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1125             goto overflow; \
1126         } \
1127         rec_len++; \
1128     } while(0)
1129 
1130 #define ADDCH(c)                                \
1131     do {\
1132         if (copy_phase) \
1133             self->rec[rec_len] = c;\
1134         INCLEN;\
1135     } while(0)
1136 
1137     rec_len = self->rec_len;
1138 
1139     /* If this is not the first field we need a field separator */
1140     if (self->num_fields > 0)
1141         ADDCH(dialect->delimiter);
1142 
1143     /* Handle preceding quote */
1144     if (copy_phase && *quoted)
1145         ADDCH(dialect->quotechar);
1146 
1147     /* Copy/count field data */
1148     /* If field is null just pass over */
1149     for (i = 0; field_data && (i < field_len); i++) {
1150         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1151         int want_escape = 0;
1152 
1153         if (c == dialect->delimiter ||
1154             c == dialect->escapechar ||
1155             c == dialect->quotechar  ||
1156             c == '\n'  ||
1157             c == '\r'  ||
1158             PyUnicode_FindChar(
1159                 dialect->lineterminator, c, 0,
1160                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1161             if (dialect->quoting == QUOTE_NONE)
1162                 want_escape = 1;
1163             else {
1164                 if (c == dialect->quotechar) {
1165                     if (dialect->doublequote)
1166                         ADDCH(dialect->quotechar);
1167                     else
1168                         want_escape = 1;
1169                 }
1170                 else if (c == dialect->escapechar) {
1171                     want_escape = 1;
1172                 }
1173                 if (!want_escape)
1174                     *quoted = 1;
1175             }
1176             if (want_escape) {
1177                 if (dialect->escapechar == NOT_SET) {
1178                     PyErr_Format(self->error_obj,
1179                                  "need to escape, but no escapechar set");
1180                     return -1;
1181                 }
1182                 ADDCH(dialect->escapechar);
1183             }
1184         }
1185         /* Copy field character into record buffer.
1186          */
1187         ADDCH(c);
1188     }
1189 
1190     if (*quoted) {
1191         if (copy_phase)
1192             ADDCH(dialect->quotechar);
1193         else {
1194             INCLEN; /* starting quote */
1195             INCLEN; /* ending quote */
1196         }
1197     }
1198     return rec_len;
1199 
1200   overflow:
1201     PyErr_NoMemory();
1202     return -1;
1203 #undef ADDCH
1204 #undef INCLEN
1205 }
1206 
1207 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1208 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1209 {
1210     assert(rec_len >= 0);
1211 
1212     if (rec_len > self->rec_size) {
1213         size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1214         Py_UCS4 *rec_new = self->rec;
1215         PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1216         if (rec_new == NULL) {
1217             PyErr_NoMemory();
1218             return 0;
1219         }
1220         self->rec = rec_new;
1221         self->rec_size = (Py_ssize_t)rec_size_new;
1222     }
1223     return 1;
1224 }
1225 
1226 static int
join_append(WriterObj * self,PyObject * field,int quoted)1227 join_append(WriterObj *self, PyObject *field, int quoted)
1228 {
1229     DialectObj *dialect = self->dialect;
1230     int field_kind = -1;
1231     const void *field_data = NULL;
1232     Py_ssize_t field_len = 0;
1233     Py_ssize_t rec_len;
1234 
1235     if (field != NULL) {
1236         field_kind = PyUnicode_KIND(field);
1237         field_data = PyUnicode_DATA(field);
1238         field_len = PyUnicode_GET_LENGTH(field);
1239     }
1240     if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
1241         if (dialect->quoting == QUOTE_NONE ||
1242             (field == NULL &&
1243              (dialect->quoting == QUOTE_STRINGS ||
1244               dialect->quoting == QUOTE_NOTNULL)))
1245         {
1246             PyErr_Format(self->error_obj,
1247                          "empty field must be quoted if delimiter is a space "
1248                          "and skipinitialspace is true");
1249             return 0;
1250         }
1251         quoted = 1;
1252     }
1253     rec_len = join_append_data(self, field_kind, field_data, field_len,
1254                                &quoted, 0);
1255     if (rec_len < 0)
1256         return 0;
1257 
1258     /* grow record buffer if necessary */
1259     if (!join_check_rec_size(self, rec_len))
1260         return 0;
1261 
1262     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1263                                      &quoted, 1);
1264     self->num_fields++;
1265 
1266     return 1;
1267 }
1268 
1269 static int
join_append_lineterminator(WriterObj * self)1270 join_append_lineterminator(WriterObj *self)
1271 {
1272     Py_ssize_t terminator_len, i;
1273     int term_kind;
1274     const void *term_data;
1275 
1276     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1277     if (terminator_len == -1)
1278         return 0;
1279 
1280     /* grow record buffer if necessary */
1281     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1282         return 0;
1283 
1284     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1285     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1286     for (i = 0; i < terminator_len; i++)
1287         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1288     self->rec_len += terminator_len;
1289 
1290     return 1;
1291 }
1292 
1293 PyDoc_STRVAR(csv_writerow_doc,
1294 "writerow(iterable)\n"
1295 "\n"
1296 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1297 "elements will be converted to string.");
1298 
1299 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1300 csv_writerow(WriterObj *self, PyObject *seq)
1301 {
1302     DialectObj *dialect = self->dialect;
1303     PyObject *iter, *field, *line, *result;
1304     bool null_field = false;
1305 
1306     iter = PyObject_GetIter(seq);
1307     if (iter == NULL) {
1308         if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1309             PyErr_Format(self->error_obj,
1310                          "iterable expected, not %.200s",
1311                          Py_TYPE(seq)->tp_name);
1312         }
1313         return NULL;
1314     }
1315 
1316     /* Join all fields in internal buffer.
1317      */
1318     join_reset(self);
1319     while ((field = PyIter_Next(iter))) {
1320         int append_ok;
1321         int quoted;
1322 
1323         switch (dialect->quoting) {
1324         case QUOTE_NONNUMERIC:
1325             quoted = !PyNumber_Check(field);
1326             break;
1327         case QUOTE_ALL:
1328             quoted = 1;
1329             break;
1330         case QUOTE_STRINGS:
1331             quoted = PyUnicode_Check(field);
1332             break;
1333         case QUOTE_NOTNULL:
1334             quoted = field != Py_None;
1335             break;
1336         default:
1337             quoted = 0;
1338             break;
1339         }
1340 
1341         null_field = (field == Py_None);
1342         if (PyUnicode_Check(field)) {
1343             append_ok = join_append(self, field, quoted);
1344             Py_DECREF(field);
1345         }
1346         else if (null_field) {
1347             append_ok = join_append(self, NULL, quoted);
1348             Py_DECREF(field);
1349         }
1350         else {
1351             PyObject *str;
1352 
1353             str = PyObject_Str(field);
1354             Py_DECREF(field);
1355             if (str == NULL) {
1356                 Py_DECREF(iter);
1357                 return NULL;
1358             }
1359             append_ok = join_append(self, str, quoted);
1360             Py_DECREF(str);
1361         }
1362         if (!append_ok) {
1363             Py_DECREF(iter);
1364             return NULL;
1365         }
1366     }
1367     Py_DECREF(iter);
1368     if (PyErr_Occurred())
1369         return NULL;
1370 
1371     if (self->num_fields > 0 && self->rec_len == 0) {
1372         if (dialect->quoting == QUOTE_NONE ||
1373             (null_field &&
1374              (dialect->quoting == QUOTE_STRINGS ||
1375               dialect->quoting == QUOTE_NOTNULL)))
1376         {
1377             PyErr_Format(self->error_obj,
1378                 "single empty field record must be quoted");
1379             return NULL;
1380         }
1381         self->num_fields--;
1382         if (!join_append(self, NULL, 1))
1383             return NULL;
1384     }
1385 
1386     /* Add line terminator.
1387      */
1388     if (!join_append_lineterminator(self)) {
1389         return NULL;
1390     }
1391 
1392     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1393                                      (void *) self->rec, self->rec_len);
1394     if (line == NULL) {
1395         return NULL;
1396     }
1397     result = PyObject_CallOneArg(self->write, line);
1398     Py_DECREF(line);
1399     return result;
1400 }
1401 
1402 PyDoc_STRVAR(csv_writerows_doc,
1403 "writerows(iterable of iterables)\n"
1404 "\n"
1405 "Construct and write a series of iterables to a csv file.  Non-string\n"
1406 "elements will be converted to string.");
1407 
1408 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1409 csv_writerows(WriterObj *self, PyObject *seqseq)
1410 {
1411     PyObject *row_iter, *row_obj, *result;
1412 
1413     row_iter = PyObject_GetIter(seqseq);
1414     if (row_iter == NULL) {
1415         return NULL;
1416     }
1417     while ((row_obj = PyIter_Next(row_iter))) {
1418         result = csv_writerow(self, row_obj);
1419         Py_DECREF(row_obj);
1420         if (!result) {
1421             Py_DECREF(row_iter);
1422             return NULL;
1423         }
1424         else
1425              Py_DECREF(result);
1426     }
1427     Py_DECREF(row_iter);
1428     if (PyErr_Occurred())
1429         return NULL;
1430     Py_RETURN_NONE;
1431 }
1432 
1433 static struct PyMethodDef Writer_methods[] = {
1434     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1435     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1436     { NULL, NULL }
1437 };
1438 
1439 #define W_OFF(x) offsetof(WriterObj, x)
1440 
1441 static struct PyMemberDef Writer_memberlist[] = {
1442     { "dialect", _Py_T_OBJECT, W_OFF(dialect), Py_READONLY },
1443     { NULL }
1444 };
1445 
1446 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1447 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1448 {
1449     Py_VISIT(self->dialect);
1450     Py_VISIT(self->write);
1451     Py_VISIT(self->error_obj);
1452     Py_VISIT(Py_TYPE(self));
1453     return 0;
1454 }
1455 
1456 static int
Writer_clear(WriterObj * self)1457 Writer_clear(WriterObj *self)
1458 {
1459     Py_CLEAR(self->dialect);
1460     Py_CLEAR(self->write);
1461     Py_CLEAR(self->error_obj);
1462     return 0;
1463 }
1464 
1465 static void
Writer_dealloc(WriterObj * self)1466 Writer_dealloc(WriterObj *self)
1467 {
1468     PyTypeObject *tp = Py_TYPE(self);
1469     PyObject_GC_UnTrack(self);
1470     tp->tp_clear((PyObject *)self);
1471     if (self->rec != NULL) {
1472         PyMem_Free(self->rec);
1473     }
1474     PyObject_GC_Del(self);
1475     Py_DECREF(tp);
1476 }
1477 
1478 PyDoc_STRVAR(Writer_Type_doc,
1479 "CSV writer\n"
1480 "\n"
1481 "Writer objects are responsible for generating tabular data\n"
1482 "in CSV format from sequence input.\n"
1483 );
1484 
1485 static PyType_Slot Writer_Type_slots[] = {
1486     {Py_tp_doc, (char*)Writer_Type_doc},
1487     {Py_tp_traverse, Writer_traverse},
1488     {Py_tp_clear, Writer_clear},
1489     {Py_tp_dealloc, Writer_dealloc},
1490     {Py_tp_methods, Writer_methods},
1491     {Py_tp_members, Writer_memberlist},
1492     {0, NULL}
1493 };
1494 
1495 PyType_Spec Writer_Type_spec = {
1496     .name = "_csv.writer",
1497     .basicsize = sizeof(WriterObj),
1498     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1499               Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1500     .slots = Writer_Type_slots,
1501 };
1502 
1503 
1504 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1505 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1506 {
1507     PyObject * output_file, * dialect = NULL;
1508     _csvstate *module_state = get_csv_state(module);
1509     WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1510 
1511     if (!self)
1512         return NULL;
1513 
1514     self->dialect = NULL;
1515     self->write = NULL;
1516 
1517     self->rec = NULL;
1518     self->rec_size = 0;
1519     self->rec_len = 0;
1520     self->num_fields = 0;
1521 
1522     self->error_obj = Py_NewRef(module_state->error_obj);
1523 
1524     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1525         Py_DECREF(self);
1526         return NULL;
1527     }
1528     if (PyObject_GetOptionalAttr(output_file,
1529                              module_state->str_write,
1530                              &self->write) < 0) {
1531         Py_DECREF(self);
1532         return NULL;
1533     }
1534     if (self->write == NULL || !PyCallable_Check(self->write)) {
1535         PyErr_SetString(PyExc_TypeError,
1536                         "argument 1 must have a \"write\" method");
1537         Py_DECREF(self);
1538         return NULL;
1539     }
1540     self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1541                                                 keyword_args);
1542     if (self->dialect == NULL) {
1543         Py_DECREF(self);
1544         return NULL;
1545     }
1546     PyObject_GC_Track(self);
1547     return (PyObject *)self;
1548 }
1549 
1550 /*
1551  * DIALECT REGISTRY
1552  */
1553 
1554 /*[clinic input]
1555 _csv.list_dialects
1556 
1557 Return a list of all known dialect names.
1558 
1559     names = csv.list_dialects()
1560 [clinic start generated code]*/
1561 
1562 static PyObject *
_csv_list_dialects_impl(PyObject * module)1563 _csv_list_dialects_impl(PyObject *module)
1564 /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1565 {
1566     return PyDict_Keys(get_csv_state(module)->dialects);
1567 }
1568 
1569 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1570 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1571 {
1572     PyObject *name_obj, *dialect_obj = NULL;
1573     _csvstate *module_state = get_csv_state(module);
1574     PyObject *dialect;
1575 
1576     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1577         return NULL;
1578     if (!PyUnicode_Check(name_obj)) {
1579         PyErr_SetString(PyExc_TypeError,
1580                         "dialect name must be a string");
1581         return NULL;
1582     }
1583     dialect = _call_dialect(module_state, dialect_obj, kwargs);
1584     if (dialect == NULL)
1585         return NULL;
1586     if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1587         Py_DECREF(dialect);
1588         return NULL;
1589     }
1590     Py_DECREF(dialect);
1591     Py_RETURN_NONE;
1592 }
1593 
1594 
1595 /*[clinic input]
1596 _csv.unregister_dialect
1597 
1598     name: object
1599 
1600 Delete the name/dialect mapping associated with a string name.
1601 
1602     csv.unregister_dialect(name)
1603 [clinic start generated code]*/
1604 
1605 static PyObject *
_csv_unregister_dialect_impl(PyObject * module,PyObject * name)1606 _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1607 /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1608 {
1609     _csvstate *module_state = get_csv_state(module);
1610     int rc = PyDict_Pop(module_state->dialects, name, NULL);
1611     if (rc < 0) {
1612         return NULL;
1613     }
1614     if (rc == 0) {
1615         PyErr_Format(module_state->error_obj, "unknown dialect");
1616         return NULL;
1617     }
1618     Py_RETURN_NONE;
1619 }
1620 
1621 /*[clinic input]
1622 _csv.get_dialect
1623 
1624     name: object
1625 
1626 Return the dialect instance associated with name.
1627 
1628     dialect = csv.get_dialect(name)
1629 [clinic start generated code]*/
1630 
1631 static PyObject *
_csv_get_dialect_impl(PyObject * module,PyObject * name)1632 _csv_get_dialect_impl(PyObject *module, PyObject *name)
1633 /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1634 {
1635     return get_dialect_from_registry(name, get_csv_state(module));
1636 }
1637 
1638 /*[clinic input]
1639 _csv.field_size_limit
1640 
1641     new_limit: object = NULL
1642 
1643 Sets an upper limit on parsed fields.
1644 
1645     csv.field_size_limit([limit])
1646 
1647 Returns old limit. If limit is not given, no new limit is set and
1648 the old limit is returned
1649 [clinic start generated code]*/
1650 
1651 static PyObject *
_csv_field_size_limit_impl(PyObject * module,PyObject * new_limit)1652 _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1653 /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1654 {
1655     _csvstate *module_state = get_csv_state(module);
1656     Py_ssize_t old_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit);
1657     if (new_limit != NULL) {
1658         if (!PyLong_CheckExact(new_limit)) {
1659             PyErr_Format(PyExc_TypeError,
1660                          "limit must be an integer");
1661             return NULL;
1662         }
1663         Py_ssize_t new_limit_value = PyLong_AsSsize_t(new_limit);
1664         if (new_limit_value == -1 && PyErr_Occurred()) {
1665             return NULL;
1666         }
1667         FT_ATOMIC_STORE_SSIZE_RELAXED(module_state->field_limit, new_limit_value);
1668     }
1669     return PyLong_FromSsize_t(old_limit);
1670 }
1671 
1672 static PyType_Slot error_slots[] = {
1673     {0, NULL},
1674 };
1675 
1676 PyType_Spec error_spec = {
1677     .name = "_csv.Error",
1678     .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1679     .slots = error_slots,
1680 };
1681 
1682 /*
1683  * MODULE
1684  */
1685 
1686 PyDoc_STRVAR(csv_module_doc, "CSV parsing and writing.\n");
1687 
1688 PyDoc_STRVAR(csv_reader_doc,
1689 "    csv_reader = reader(iterable [, dialect='excel']\n"
1690 "                        [optional keyword args])\n"
1691 "    for row in csv_reader:\n"
1692 "        process(row)\n"
1693 "\n"
1694 "The \"iterable\" argument can be any object that returns a line\n"
1695 "of input for each iteration, such as a file object or a list.  The\n"
1696 "optional \"dialect\" parameter is discussed below.  The function\n"
1697 "also accepts optional keyword arguments which override settings\n"
1698 "provided by the dialect.\n"
1699 "\n"
1700 "The returned object is an iterator.  Each iteration returns a row\n"
1701 "of the CSV file (which can span multiple input lines).\n");
1702 
1703 PyDoc_STRVAR(csv_writer_doc,
1704 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1705 "                            [optional keyword args])\n"
1706 "    for row in sequence:\n"
1707 "        csv_writer.writerow(row)\n"
1708 "\n"
1709 "    [or]\n"
1710 "\n"
1711 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1712 "                            [optional keyword args])\n"
1713 "    csv_writer.writerows(rows)\n"
1714 "\n"
1715 "The \"fileobj\" argument can be any object that supports the file API.\n");
1716 
1717 PyDoc_STRVAR(csv_register_dialect_doc,
1718 "Create a mapping from a string name to a dialect class.\n"
1719 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1720 
1721 static struct PyMethodDef csv_methods[] = {
1722     { "reader", _PyCFunction_CAST(csv_reader),
1723         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1724     { "writer", _PyCFunction_CAST(csv_writer),
1725         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1726     { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1727         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1728     _CSV_LIST_DIALECTS_METHODDEF
1729     _CSV_UNREGISTER_DIALECT_METHODDEF
1730     _CSV_GET_DIALECT_METHODDEF
1731     _CSV_FIELD_SIZE_LIMIT_METHODDEF
1732     { NULL, NULL }
1733 };
1734 
1735 static int
csv_exec(PyObject * module)1736 csv_exec(PyObject *module) {
1737     const StyleDesc *style;
1738     PyObject *temp;
1739     _csvstate *module_state = get_csv_state(module);
1740 
1741     temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1742     module_state->dialect_type = (PyTypeObject *)temp;
1743     if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1744         return -1;
1745     }
1746 
1747     temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1748     module_state->reader_type = (PyTypeObject *)temp;
1749     if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1750         return -1;
1751     }
1752 
1753     temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1754     module_state->writer_type = (PyTypeObject *)temp;
1755     if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1756         return -1;
1757     }
1758 
1759     /* Set the field limit */
1760     module_state->field_limit = 128 * 1024;
1761 
1762     /* Add _dialects dictionary */
1763     module_state->dialects = PyDict_New();
1764     if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1765         return -1;
1766     }
1767 
1768     /* Add quote styles into dictionary */
1769     for (style = quote_styles; style->name; style++) {
1770         if (PyModule_AddIntConstant(module, style->name,
1771                                     style->style) == -1)
1772             return -1;
1773     }
1774 
1775     /* Add the CSV exception object to the module. */
1776     PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1777     if (bases == NULL) {
1778         return -1;
1779     }
1780     module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1781                                                        bases);
1782     Py_DECREF(bases);
1783     if (module_state->error_obj == NULL) {
1784         return -1;
1785     }
1786     if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1787         return -1;
1788     }
1789 
1790     module_state->str_write = PyUnicode_InternFromString("write");
1791     if (module_state->str_write == NULL) {
1792         return -1;
1793     }
1794     return 0;
1795 }
1796 
1797 static PyModuleDef_Slot csv_slots[] = {
1798     {Py_mod_exec, csv_exec},
1799     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1800     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1801     {0, NULL}
1802 };
1803 
1804 static struct PyModuleDef _csvmodule = {
1805     PyModuleDef_HEAD_INIT,
1806     "_csv",
1807     csv_module_doc,
1808     sizeof(_csvstate),
1809     csv_methods,
1810     csv_slots,
1811     _csv_traverse,
1812     _csv_clear,
1813     _csv_free
1814 };
1815 
1816 PyMODINIT_FUNC
PyInit__csv(void)1817 PyInit__csv(void)
1818 {
1819     return PyModuleDef_Init(&_csvmodule);
1820 }
1821