• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 #define MODULE_VERSION "1.0"
12 
13 #include "Python.h"
14 #include "structmember.h"         // PyMemberDef
15 #include <stdbool.h>
16 
17 
18 typedef struct {
19     PyObject *error_obj;   /* CSV exception */
20     PyObject *dialects;   /* Dialect registry */
21     PyTypeObject *dialect_type;
22     PyTypeObject *reader_type;
23     PyTypeObject *writer_type;
24     long field_limit;   /* max parsed field size */
25 } _csvstate;
26 
27 static struct PyModuleDef _csvmodule;
28 
29 static inline _csvstate*
get_csv_state(PyObject * module)30 get_csv_state(PyObject *module)
31 {
32     void *state = PyModule_GetState(module);
33     assert(state != NULL);
34     return (_csvstate *)state;
35 }
36 
37 static int
_csv_clear(PyObject * module)38 _csv_clear(PyObject *module)
39 {
40     _csvstate *module_state = PyModule_GetState(module);
41     Py_CLEAR(module_state->error_obj);
42     Py_CLEAR(module_state->dialects);
43     Py_CLEAR(module_state->dialect_type);
44     Py_CLEAR(module_state->reader_type);
45     Py_CLEAR(module_state->writer_type);
46     return 0;
47 }
48 
49 static int
_csv_traverse(PyObject * module,visitproc visit,void * arg)50 _csv_traverse(PyObject *module, visitproc visit, void *arg)
51 {
52     _csvstate *module_state = PyModule_GetState(module);
53     Py_VISIT(module_state->error_obj);
54     Py_VISIT(module_state->dialects);
55     Py_VISIT(module_state->dialect_type);
56     Py_VISIT(module_state->reader_type);
57     Py_VISIT(module_state->writer_type);
58     return 0;
59 }
60 
61 static void
_csv_free(void * module)62 _csv_free(void *module)
63 {
64    _csv_clear((PyObject *)module);
65 }
66 
67 typedef enum {
68     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
70     EAT_CRNL,AFTER_ESCAPED_CRNL
71 } ParserState;
72 
73 typedef enum {
74     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
75 } QuoteStyle;
76 
77 typedef struct {
78     QuoteStyle style;
79     const char *name;
80 } StyleDesc;
81 
82 static const StyleDesc quote_styles[] = {
83     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
84     { QUOTE_ALL,        "QUOTE_ALL" },
85     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86     { QUOTE_NONE,       "QUOTE_NONE" },
87     { 0 }
88 };
89 
90 typedef struct {
91     PyObject_HEAD
92 
93     char doublequote;           /* is " represented by ""? */
94     char skipinitialspace;      /* ignore spaces following delimiter? */
95     char strict;                /* raise exception on bad CSV */
96     int quoting;                /* style of quoting to write */
97     Py_UCS4 delimiter;          /* field separator */
98     Py_UCS4 quotechar;          /* quote character */
99     Py_UCS4 escapechar;         /* escape character */
100     PyObject *lineterminator;   /* string to write between records */
101 
102 } DialectObj;
103 
104 typedef struct {
105     PyObject_HEAD
106 
107     PyObject *input_iter;   /* iterate over this for input lines */
108 
109     DialectObj *dialect;    /* parsing dialect */
110 
111     PyObject *fields;           /* field list for current record */
112     ParserState state;          /* current CSV parse state */
113     Py_UCS4 *field;             /* temporary buffer */
114     Py_ssize_t field_size;      /* size of allocated buffer */
115     Py_ssize_t field_len;       /* length of current field */
116     int numeric_field;          /* treat field as numeric */
117     unsigned long line_num;     /* Source-file line number */
118 } ReaderObj;
119 
120 typedef struct {
121     PyObject_HEAD
122 
123     PyObject *write;    /* write output lines to this file */
124 
125     DialectObj *dialect;    /* parsing dialect */
126 
127     Py_UCS4 *rec;            /* buffer for parser.join */
128     Py_ssize_t rec_size;        /* size of allocated record */
129     Py_ssize_t rec_len;         /* length of record */
130     int num_fields;             /* number of fields in record */
131 
132     PyObject *error_obj;       /* cached error object */
133 } WriterObj;
134 
135 /*
136  * DIALECT class
137  */
138 
139 static PyObject *
get_dialect_from_registry(PyObject * name_obj,_csvstate * module_state)140 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
141 {
142     PyObject *dialect_obj;
143 
144     dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
145     if (dialect_obj == NULL) {
146         if (!PyErr_Occurred())
147             PyErr_Format(module_state->error_obj, "unknown dialect");
148     }
149     else
150         Py_INCREF(dialect_obj);
151 
152     return dialect_obj;
153 }
154 
155 static PyObject *
get_nullchar_as_None(Py_UCS4 c)156 get_nullchar_as_None(Py_UCS4 c)
157 {
158     if (c == '\0') {
159         Py_RETURN_NONE;
160     }
161     else
162         return PyUnicode_FromOrdinal(c);
163 }
164 
165 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))166 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
167 {
168     Py_XINCREF(self->lineterminator);
169     return self->lineterminator;
170 }
171 
172 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))173 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
174 {
175     return get_nullchar_as_None(self->delimiter);
176 }
177 
178 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))179 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
180 {
181     return get_nullchar_as_None(self->escapechar);
182 }
183 
184 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))185 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
186 {
187     return get_nullchar_as_None(self->quotechar);
188 }
189 
190 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))191 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
192 {
193     return PyLong_FromLong(self->quoting);
194 }
195 
196 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)197 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
198 {
199     if (src == NULL)
200         *target = dflt;
201     else {
202         int b = PyObject_IsTrue(src);
203         if (b < 0)
204             return -1;
205         *target = (char)b;
206     }
207     return 0;
208 }
209 
210 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)211 _set_int(const char *name, int *target, PyObject *src, int dflt)
212 {
213     if (src == NULL)
214         *target = dflt;
215     else {
216         int value;
217         if (!PyLong_CheckExact(src)) {
218             PyErr_Format(PyExc_TypeError,
219                          "\"%s\" must be an integer", name);
220             return -1;
221         }
222         value = _PyLong_AsInt(src);
223         if (value == -1 && PyErr_Occurred()) {
224             return -1;
225         }
226         *target = value;
227     }
228     return 0;
229 }
230 
231 static int
_set_char_or_none(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)232 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
233 {
234     if (src == NULL) {
235         *target = dflt;
236     }
237     else {
238         *target = '\0';
239         if (src != Py_None) {
240             if (!PyUnicode_Check(src)) {
241                 PyErr_Format(PyExc_TypeError,
242                     "\"%s\" must be string or None, not %.200s", name,
243                     Py_TYPE(src)->tp_name);
244                 return -1;
245             }
246             Py_ssize_t len = PyUnicode_GetLength(src);
247             if (len < 0) {
248                 return -1;
249             }
250             if (len > 1) {
251                 PyErr_Format(PyExc_TypeError,
252                     "\"%s\" must be a 1-character string",
253                     name);
254                 return -1;
255             }
256             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
257             else {
258                 *target = PyUnicode_READ_CHAR(src, 0);
259             }
260         }
261     }
262     return 0;
263 }
264 
265 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)266 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
267 {
268     if (src == NULL) {
269         *target = dflt;
270     }
271     else {
272         *target = '\0';
273         if (!PyUnicode_Check(src)) {
274             PyErr_Format(PyExc_TypeError,
275                          "\"%s\" must be string, not %.200s", name,
276                          Py_TYPE(src)->tp_name);
277                 return -1;
278         }
279         Py_ssize_t len = PyUnicode_GetLength(src);
280         if (len < 0) {
281             return -1;
282         }
283         if (len > 1) {
284             PyErr_Format(PyExc_TypeError,
285                          "\"%s\" must be a 1-character string",
286                          name);
287             return -1;
288         }
289         /* PyUnicode_READY() is called in PyUnicode_GetLength() */
290         else {
291             *target = PyUnicode_READ_CHAR(src, 0);
292         }
293     }
294     return 0;
295 }
296 
297 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)298 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
299 {
300     if (src == NULL)
301         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
302     else {
303         if (src == Py_None)
304             *target = NULL;
305         else if (!PyUnicode_Check(src)) {
306             PyErr_Format(PyExc_TypeError,
307                          "\"%s\" must be a string", name);
308             return -1;
309         }
310         else {
311             if (PyUnicode_READY(src) == -1)
312                 return -1;
313             Py_INCREF(src);
314             Py_XSETREF(*target, src);
315         }
316     }
317     return 0;
318 }
319 
320 static int
dialect_check_quoting(int quoting)321 dialect_check_quoting(int quoting)
322 {
323     const StyleDesc *qs;
324 
325     for (qs = quote_styles; qs->name; qs++) {
326         if ((int)qs->style == quoting)
327             return 0;
328     }
329     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
330     return -1;
331 }
332 
333 #define D_OFF(x) offsetof(DialectObj, x)
334 
335 static struct PyMemberDef Dialect_memberlist[] = {
336     { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
337     { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
338     { "strict",             T_BOOL, D_OFF(strict), READONLY },
339     { NULL }
340 };
341 
342 static PyGetSetDef Dialect_getsetlist[] = {
343     { "delimiter",          (getter)Dialect_get_delimiter},
344     { "escapechar",             (getter)Dialect_get_escapechar},
345     { "lineterminator",         (getter)Dialect_get_lineterminator},
346     { "quotechar",              (getter)Dialect_get_quotechar},
347     { "quoting",                (getter)Dialect_get_quoting},
348     {NULL},
349 };
350 
351 static void
Dialect_dealloc(DialectObj * self)352 Dialect_dealloc(DialectObj *self)
353 {
354     PyTypeObject *tp = Py_TYPE(self);
355     PyObject_GC_UnTrack(self);
356     tp->tp_clear((PyObject *)self);
357     PyObject_GC_Del(self);
358     Py_DECREF(tp);
359 }
360 
361 static char *dialect_kws[] = {
362     "dialect",
363     "delimiter",
364     "doublequote",
365     "escapechar",
366     "lineterminator",
367     "quotechar",
368     "quoting",
369     "skipinitialspace",
370     "strict",
371     NULL
372 };
373 
374 static _csvstate *
_csv_state_from_type(PyTypeObject * type,const char * name)375 _csv_state_from_type(PyTypeObject *type, const char *name)
376 {
377     PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
378     if (module == NULL) {
379         return NULL;
380     }
381     _csvstate *module_state = PyModule_GetState(module);
382     if (module_state == NULL) {
383         PyErr_Format(PyExc_SystemError,
384                      "%s: No _csv module state found", name);
385         return NULL;
386     }
387     return module_state;
388 }
389 
390 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)391 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
392 {
393     DialectObj *self;
394     PyObject *ret = NULL;
395     PyObject *dialect = NULL;
396     PyObject *delimiter = NULL;
397     PyObject *doublequote = NULL;
398     PyObject *escapechar = NULL;
399     PyObject *lineterminator = NULL;
400     PyObject *quotechar = NULL;
401     PyObject *quoting = NULL;
402     PyObject *skipinitialspace = NULL;
403     PyObject *strict = NULL;
404 
405     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
406                                      "|OOOOOOOOO", dialect_kws,
407                                      &dialect,
408                                      &delimiter,
409                                      &doublequote,
410                                      &escapechar,
411                                      &lineterminator,
412                                      &quotechar,
413                                      &quoting,
414                                      &skipinitialspace,
415                                      &strict))
416         return NULL;
417 
418     _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
419     if (module_state == NULL) {
420         return NULL;
421     }
422 
423     if (dialect != NULL) {
424         if (PyUnicode_Check(dialect)) {
425             dialect = get_dialect_from_registry(dialect, module_state);
426             if (dialect == NULL)
427                 return NULL;
428         }
429         else
430             Py_INCREF(dialect);
431         /* Can we reuse this instance? */
432         if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
433             delimiter == NULL &&
434             doublequote == NULL &&
435             escapechar == NULL &&
436             lineterminator == NULL &&
437             quotechar == NULL &&
438             quoting == NULL &&
439             skipinitialspace == NULL &&
440             strict == NULL)
441             return dialect;
442     }
443 
444     self = (DialectObj *)type->tp_alloc(type, 0);
445     if (self == NULL) {
446         Py_CLEAR(dialect);
447         return NULL;
448     }
449     self->lineterminator = NULL;
450 
451     Py_XINCREF(delimiter);
452     Py_XINCREF(doublequote);
453     Py_XINCREF(escapechar);
454     Py_XINCREF(lineterminator);
455     Py_XINCREF(quotechar);
456     Py_XINCREF(quoting);
457     Py_XINCREF(skipinitialspace);
458     Py_XINCREF(strict);
459     if (dialect != NULL) {
460 #define DIALECT_GETATTR(v, n)                            \
461         do {                                             \
462             if (v == NULL) {                             \
463                 v = PyObject_GetAttrString(dialect, n);  \
464                 if (v == NULL)                           \
465                     PyErr_Clear();                       \
466             }                                            \
467         } while (0)
468         DIALECT_GETATTR(delimiter, "delimiter");
469         DIALECT_GETATTR(doublequote, "doublequote");
470         DIALECT_GETATTR(escapechar, "escapechar");
471         DIALECT_GETATTR(lineterminator, "lineterminator");
472         DIALECT_GETATTR(quotechar, "quotechar");
473         DIALECT_GETATTR(quoting, "quoting");
474         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
475         DIALECT_GETATTR(strict, "strict");
476     }
477 
478     /* check types and convert to C values */
479 #define DIASET(meth, name, target, src, dflt) \
480     if (meth(name, target, src, dflt)) \
481         goto err
482     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
483     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
484     DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, 0);
485     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
486     DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
487     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
488     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
489     DIASET(_set_bool, "strict", &self->strict, strict, false);
490 
491     /* validate options */
492     if (dialect_check_quoting(self->quoting))
493         goto err;
494     if (self->delimiter == 0) {
495         PyErr_SetString(PyExc_TypeError,
496                         "\"delimiter\" must be a 1-character string");
497         goto err;
498     }
499     if (quotechar == Py_None && quoting == NULL)
500         self->quoting = QUOTE_NONE;
501     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
502         PyErr_SetString(PyExc_TypeError,
503                         "quotechar must be set if quoting enabled");
504         goto err;
505     }
506     if (self->lineterminator == 0) {
507         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
508         goto err;
509     }
510 
511     ret = (PyObject *)self;
512     Py_INCREF(self);
513 err:
514     Py_CLEAR(self);
515     Py_CLEAR(dialect);
516     Py_CLEAR(delimiter);
517     Py_CLEAR(doublequote);
518     Py_CLEAR(escapechar);
519     Py_CLEAR(lineterminator);
520     Py_CLEAR(quotechar);
521     Py_CLEAR(quoting);
522     Py_CLEAR(skipinitialspace);
523     Py_CLEAR(strict);
524     return ret;
525 }
526 
527 /* Since dialect is now a heap type, it inherits pickling method for
528  * protocol 0 and 1 from object, therefore it needs to be overridden */
529 
530 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
531 
532 static PyObject *
Dialect_reduce(PyObject * self,PyObject * args)533 Dialect_reduce(PyObject *self, PyObject *args) {
534     PyErr_Format(PyExc_TypeError,
535         "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
536     return NULL;
537 }
538 
539 static struct PyMethodDef dialect_methods[] = {
540     {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
541     {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
542     {NULL, NULL}
543 };
544 
545 PyDoc_STRVAR(Dialect_Type_doc,
546 "CSV dialect\n"
547 "\n"
548 "The Dialect type records CSV parsing and generation options.\n");
549 
550 static int
Dialect_clear(DialectObj * self)551 Dialect_clear(DialectObj *self)
552 {
553     Py_CLEAR(self->lineterminator);
554     return 0;
555 }
556 
557 static int
Dialect_traverse(DialectObj * self,visitproc visit,void * arg)558 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
559 {
560     Py_VISIT(self->lineterminator);
561     Py_VISIT(Py_TYPE(self));
562     return 0;
563 }
564 
565 static PyType_Slot Dialect_Type_slots[] = {
566     {Py_tp_doc, (char*)Dialect_Type_doc},
567     {Py_tp_members, Dialect_memberlist},
568     {Py_tp_getset, Dialect_getsetlist},
569     {Py_tp_new, dialect_new},
570     {Py_tp_methods, dialect_methods},
571     {Py_tp_dealloc, Dialect_dealloc},
572     {Py_tp_clear, Dialect_clear},
573     {Py_tp_traverse, Dialect_traverse},
574     {0, NULL}
575 };
576 
577 PyType_Spec Dialect_Type_spec = {
578     .name = "_csv.Dialect",
579     .basicsize = sizeof(DialectObj),
580     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
581               Py_TPFLAGS_IMMUTABLETYPE),
582     .slots = Dialect_Type_slots,
583 };
584 
585 
586 /*
587  * Return an instance of the dialect type, given a Python instance or kwarg
588  * description of the dialect
589  */
590 static PyObject *
_call_dialect(_csvstate * module_state,PyObject * dialect_inst,PyObject * kwargs)591 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
592 {
593     PyObject *type = (PyObject *)module_state->dialect_type;
594     if (dialect_inst) {
595         return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
596     }
597     else {
598         return PyObject_VectorcallDict(type, NULL, 0, kwargs);
599     }
600 }
601 
602 /*
603  * READER
604  */
605 static int
parse_save_field(ReaderObj * self)606 parse_save_field(ReaderObj *self)
607 {
608     PyObject *field;
609 
610     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
611                                       (void *) self->field, self->field_len);
612     if (field == NULL)
613         return -1;
614     self->field_len = 0;
615     if (self->numeric_field) {
616         PyObject *tmp;
617 
618         self->numeric_field = 0;
619         tmp = PyNumber_Float(field);
620         Py_DECREF(field);
621         if (tmp == NULL)
622             return -1;
623         field = tmp;
624     }
625     if (PyList_Append(self->fields, field) < 0) {
626         Py_DECREF(field);
627         return -1;
628     }
629     Py_DECREF(field);
630     return 0;
631 }
632 
633 static int
parse_grow_buff(ReaderObj * self)634 parse_grow_buff(ReaderObj *self)
635 {
636     assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
637 
638     Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
639     Py_UCS4 *field_new = self->field;
640     PyMem_Resize(field_new, Py_UCS4, field_size_new);
641     if (field_new == NULL) {
642         PyErr_NoMemory();
643         return 0;
644     }
645     self->field = field_new;
646     self->field_size = field_size_new;
647     return 1;
648 }
649 
650 static int
parse_add_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)651 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
652 {
653     if (self->field_len >= module_state->field_limit) {
654         PyErr_Format(module_state->error_obj,
655                      "field larger than field limit (%ld)",
656                      module_state->field_limit);
657         return -1;
658     }
659     if (self->field_len == self->field_size && !parse_grow_buff(self))
660         return -1;
661     self->field[self->field_len++] = c;
662     return 0;
663 }
664 
665 static int
parse_process_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)666 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
667 {
668     DialectObj *dialect = self->dialect;
669 
670     switch (self->state) {
671     case START_RECORD:
672         /* start of record */
673         if (c == '\0')
674             /* empty line - return [] */
675             break;
676         else if (c == '\n' || c == '\r') {
677             self->state = EAT_CRNL;
678             break;
679         }
680         /* normal character - handle as START_FIELD */
681         self->state = START_FIELD;
682         /* fallthru */
683     case START_FIELD:
684         /* expecting field */
685         if (c == '\n' || c == '\r' || c == '\0') {
686             /* save empty field - return [fields] */
687             if (parse_save_field(self) < 0)
688                 return -1;
689             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
690         }
691         else if (c == dialect->quotechar &&
692                  dialect->quoting != QUOTE_NONE) {
693             /* start quoted field */
694             self->state = IN_QUOTED_FIELD;
695         }
696         else if (c == dialect->escapechar) {
697             /* possible escaped character */
698             self->state = ESCAPED_CHAR;
699         }
700         else if (c == ' ' && dialect->skipinitialspace)
701             /* ignore space at start of field */
702             ;
703         else if (c == dialect->delimiter) {
704             /* save empty field */
705             if (parse_save_field(self) < 0)
706                 return -1;
707         }
708         else {
709             /* begin new unquoted field */
710             if (dialect->quoting == QUOTE_NONNUMERIC)
711                 self->numeric_field = 1;
712             if (parse_add_char(self, module_state, c) < 0)
713                 return -1;
714             self->state = IN_FIELD;
715         }
716         break;
717 
718     case ESCAPED_CHAR:
719         if (c == '\n' || c=='\r') {
720             if (parse_add_char(self, module_state, c) < 0)
721                 return -1;
722             self->state = AFTER_ESCAPED_CRNL;
723             break;
724         }
725         if (c == '\0')
726             c = '\n';
727         if (parse_add_char(self, module_state, c) < 0)
728             return -1;
729         self->state = IN_FIELD;
730         break;
731 
732     case AFTER_ESCAPED_CRNL:
733         if (c == '\0')
734             break;
735         /*fallthru*/
736 
737     case IN_FIELD:
738         /* in unquoted field */
739         if (c == '\n' || c == '\r' || c == '\0') {
740             /* end of line - return [fields] */
741             if (parse_save_field(self) < 0)
742                 return -1;
743             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
744         }
745         else if (c == dialect->escapechar) {
746             /* possible escaped character */
747             self->state = ESCAPED_CHAR;
748         }
749         else if (c == dialect->delimiter) {
750             /* save field - wait for new field */
751             if (parse_save_field(self) < 0)
752                 return -1;
753             self->state = START_FIELD;
754         }
755         else {
756             /* normal character - save in field */
757             if (parse_add_char(self, module_state, c) < 0)
758                 return -1;
759         }
760         break;
761 
762     case IN_QUOTED_FIELD:
763         /* in quoted field */
764         if (c == '\0')
765             ;
766         else if (c == dialect->escapechar) {
767             /* Possible escape character */
768             self->state = ESCAPE_IN_QUOTED_FIELD;
769         }
770         else if (c == dialect->quotechar &&
771                  dialect->quoting != QUOTE_NONE) {
772             if (dialect->doublequote) {
773                 /* doublequote; " represented by "" */
774                 self->state = QUOTE_IN_QUOTED_FIELD;
775             }
776             else {
777                 /* end of quote part of field */
778                 self->state = IN_FIELD;
779             }
780         }
781         else {
782             /* normal character - save in field */
783             if (parse_add_char(self, module_state, c) < 0)
784                 return -1;
785         }
786         break;
787 
788     case ESCAPE_IN_QUOTED_FIELD:
789         if (c == '\0')
790             c = '\n';
791         if (parse_add_char(self, module_state, c) < 0)
792             return -1;
793         self->state = IN_QUOTED_FIELD;
794         break;
795 
796     case QUOTE_IN_QUOTED_FIELD:
797         /* doublequote - seen a quote in a quoted field */
798         if (dialect->quoting != QUOTE_NONE &&
799             c == dialect->quotechar) {
800             /* save "" as " */
801             if (parse_add_char(self, module_state, c) < 0)
802                 return -1;
803             self->state = IN_QUOTED_FIELD;
804         }
805         else if (c == dialect->delimiter) {
806             /* save field - wait for new field */
807             if (parse_save_field(self) < 0)
808                 return -1;
809             self->state = START_FIELD;
810         }
811         else if (c == '\n' || c == '\r' || c == '\0') {
812             /* end of line - return [fields] */
813             if (parse_save_field(self) < 0)
814                 return -1;
815             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
816         }
817         else if (!dialect->strict) {
818             if (parse_add_char(self, module_state, c) < 0)
819                 return -1;
820             self->state = IN_FIELD;
821         }
822         else {
823             /* illegal */
824             PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
825                             dialect->delimiter,
826                             dialect->quotechar);
827             return -1;
828         }
829         break;
830 
831     case EAT_CRNL:
832         if (c == '\n' || c == '\r')
833             ;
834         else if (c == '\0')
835             self->state = START_RECORD;
836         else {
837             PyErr_Format(module_state->error_obj,
838                          "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
839             return -1;
840         }
841         break;
842 
843     }
844     return 0;
845 }
846 
847 static int
parse_reset(ReaderObj * self)848 parse_reset(ReaderObj *self)
849 {
850     Py_XSETREF(self->fields, PyList_New(0));
851     if (self->fields == NULL)
852         return -1;
853     self->field_len = 0;
854     self->state = START_RECORD;
855     self->numeric_field = 0;
856     return 0;
857 }
858 
859 static PyObject *
Reader_iternext(ReaderObj * self)860 Reader_iternext(ReaderObj *self)
861 {
862     PyObject *fields = NULL;
863     Py_UCS4 c;
864     Py_ssize_t pos, linelen;
865     unsigned int kind;
866     const void *data;
867     PyObject *lineobj;
868 
869     _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
870                                                    "Reader.__next__");
871     if (module_state == NULL) {
872         return NULL;
873     }
874 
875     if (parse_reset(self) < 0)
876         return NULL;
877     do {
878         lineobj = PyIter_Next(self->input_iter);
879         if (lineobj == NULL) {
880             /* End of input OR exception */
881             if (!PyErr_Occurred() && (self->field_len != 0 ||
882                                       self->state == IN_QUOTED_FIELD)) {
883                 if (self->dialect->strict)
884                     PyErr_SetString(module_state->error_obj,
885                                     "unexpected end of data");
886                 else if (parse_save_field(self) >= 0)
887                     break;
888             }
889             return NULL;
890         }
891         if (!PyUnicode_Check(lineobj)) {
892             PyErr_Format(module_state->error_obj,
893                          "iterator should return strings, "
894                          "not %.200s "
895                          "(the file should be opened in text mode)",
896                          Py_TYPE(lineobj)->tp_name
897                 );
898             Py_DECREF(lineobj);
899             return NULL;
900         }
901         if (PyUnicode_READY(lineobj) == -1) {
902             Py_DECREF(lineobj);
903             return NULL;
904         }
905         ++self->line_num;
906         kind = PyUnicode_KIND(lineobj);
907         data = PyUnicode_DATA(lineobj);
908         pos = 0;
909         linelen = PyUnicode_GET_LENGTH(lineobj);
910         while (linelen--) {
911             c = PyUnicode_READ(kind, data, pos);
912             if (c == '\0') {
913                 Py_DECREF(lineobj);
914                 PyErr_Format(module_state->error_obj,
915                              "line contains NUL");
916                 goto err;
917             }
918             if (parse_process_char(self, module_state, c) < 0) {
919                 Py_DECREF(lineobj);
920                 goto err;
921             }
922             pos++;
923         }
924         Py_DECREF(lineobj);
925         if (parse_process_char(self, module_state, 0) < 0)
926             goto err;
927     } while (self->state != START_RECORD);
928 
929     fields = self->fields;
930     self->fields = NULL;
931 err:
932     return fields;
933 }
934 
935 static void
Reader_dealloc(ReaderObj * self)936 Reader_dealloc(ReaderObj *self)
937 {
938     PyTypeObject *tp = Py_TYPE(self);
939     PyObject_GC_UnTrack(self);
940     tp->tp_clear((PyObject *)self);
941     if (self->field != NULL) {
942         PyMem_Free(self->field);
943         self->field = NULL;
944     }
945     PyObject_GC_Del(self);
946     Py_DECREF(tp);
947 }
948 
949 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)950 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951 {
952     Py_VISIT(self->dialect);
953     Py_VISIT(self->input_iter);
954     Py_VISIT(self->fields);
955     Py_VISIT(Py_TYPE(self));
956     return 0;
957 }
958 
959 static int
Reader_clear(ReaderObj * self)960 Reader_clear(ReaderObj *self)
961 {
962     Py_CLEAR(self->dialect);
963     Py_CLEAR(self->input_iter);
964     Py_CLEAR(self->fields);
965     return 0;
966 }
967 
968 PyDoc_STRVAR(Reader_Type_doc,
969 "CSV reader\n"
970 "\n"
971 "Reader objects are responsible for reading and parsing tabular data\n"
972 "in CSV format.\n"
973 );
974 
975 static struct PyMethodDef Reader_methods[] = {
976     { NULL, NULL }
977 };
978 #define R_OFF(x) offsetof(ReaderObj, x)
979 
980 static struct PyMemberDef Reader_memberlist[] = {
981     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983     { NULL }
984 };
985 
986 
987 static PyType_Slot Reader_Type_slots[] = {
988     {Py_tp_doc, (char*)Reader_Type_doc},
989     {Py_tp_traverse, Reader_traverse},
990     {Py_tp_iter, PyObject_SelfIter},
991     {Py_tp_iternext, Reader_iternext},
992     {Py_tp_methods, Reader_methods},
993     {Py_tp_members, Reader_memberlist},
994     {Py_tp_clear, Reader_clear},
995     {Py_tp_dealloc, Reader_dealloc},
996     {0, NULL}
997 };
998 
999 PyType_Spec Reader_Type_spec = {
1000     .name = "_csv.reader",
1001     .basicsize = sizeof(ReaderObj),
1002     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003               Py_TPFLAGS_IMMUTABLETYPE),
1004     .slots = Reader_Type_slots
1005 };
1006 
1007 
1008 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)1009 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010 {
1011     PyObject * iterator, * dialect = NULL;
1012     _csvstate *module_state = get_csv_state(module);
1013     ReaderObj * self = PyObject_GC_New(
1014         ReaderObj,
1015         module_state->reader_type);
1016 
1017     if (!self)
1018         return NULL;
1019 
1020     self->dialect = NULL;
1021     self->fields = NULL;
1022     self->input_iter = NULL;
1023     self->field = NULL;
1024     self->field_size = 0;
1025     self->line_num = 0;
1026 
1027     if (parse_reset(self) < 0) {
1028         Py_DECREF(self);
1029         return NULL;
1030     }
1031 
1032     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033         Py_DECREF(self);
1034         return NULL;
1035     }
1036     self->input_iter = PyObject_GetIter(iterator);
1037     if (self->input_iter == NULL) {
1038         Py_DECREF(self);
1039         return NULL;
1040     }
1041     self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042                                                 keyword_args);
1043     if (self->dialect == NULL) {
1044         Py_DECREF(self);
1045         return NULL;
1046     }
1047 
1048     PyObject_GC_Track(self);
1049     return (PyObject *)self;
1050 }
1051 
1052 /*
1053  * WRITER
1054  */
1055 /* ---------------------------------------------------------------- */
1056 static void
join_reset(WriterObj * self)1057 join_reset(WriterObj *self)
1058 {
1059     self->rec_len = 0;
1060     self->num_fields = 0;
1061 }
1062 
1063 #define MEM_INCR 32768
1064 
1065 /* Calculate new record length or append field to record.  Return new
1066  * record length.
1067  */
1068 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1069 join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
1070                  Py_ssize_t field_len, int *quoted,
1071                  int copy_phase)
1072 {
1073     DialectObj *dialect = self->dialect;
1074     int i;
1075     Py_ssize_t rec_len;
1076 
1077 #define INCLEN \
1078     do {\
1079         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1080             goto overflow; \
1081         } \
1082         rec_len++; \
1083     } while(0)
1084 
1085 #define ADDCH(c)                                \
1086     do {\
1087         if (copy_phase) \
1088             self->rec[rec_len] = c;\
1089         INCLEN;\
1090     } while(0)
1091 
1092     rec_len = self->rec_len;
1093 
1094     /* If this is not the first field we need a field separator */
1095     if (self->num_fields > 0)
1096         ADDCH(dialect->delimiter);
1097 
1098     /* Handle preceding quote */
1099     if (copy_phase && *quoted)
1100         ADDCH(dialect->quotechar);
1101 
1102     /* Copy/count field data */
1103     /* If field is null just pass over */
1104     for (i = 0; field_data && (i < field_len); i++) {
1105         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106         int want_escape = 0;
1107 
1108         if (c == dialect->delimiter ||
1109             c == dialect->escapechar ||
1110             c == dialect->quotechar  ||
1111             PyUnicode_FindChar(
1112                 dialect->lineterminator, c, 0,
1113                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1114             if (dialect->quoting == QUOTE_NONE)
1115                 want_escape = 1;
1116             else {
1117                 if (c == dialect->quotechar) {
1118                     if (dialect->doublequote)
1119                         ADDCH(dialect->quotechar);
1120                     else
1121                         want_escape = 1;
1122                 }
1123                 else if (c == dialect->escapechar) {
1124                     want_escape = 1;
1125                 }
1126                 if (!want_escape)
1127                     *quoted = 1;
1128             }
1129             if (want_escape) {
1130                 if (!dialect->escapechar) {
1131                     PyErr_Format(self->error_obj,
1132                                  "need to escape, but no escapechar set");
1133                     return -1;
1134                 }
1135                 ADDCH(dialect->escapechar);
1136             }
1137         }
1138         /* Copy field character into record buffer.
1139          */
1140         ADDCH(c);
1141     }
1142 
1143     if (*quoted) {
1144         if (copy_phase)
1145             ADDCH(dialect->quotechar);
1146         else {
1147             INCLEN; /* starting quote */
1148             INCLEN; /* ending quote */
1149         }
1150     }
1151     return rec_len;
1152 
1153   overflow:
1154     PyErr_NoMemory();
1155     return -1;
1156 #undef ADDCH
1157 #undef INCLEN
1158 }
1159 
1160 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1161 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162 {
1163     assert(rec_len >= 0);
1164 
1165     if (rec_len > self->rec_size) {
1166         size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167         Py_UCS4 *rec_new = self->rec;
1168         PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169         if (rec_new == NULL) {
1170             PyErr_NoMemory();
1171             return 0;
1172         }
1173         self->rec = rec_new;
1174         self->rec_size = (Py_ssize_t)rec_size_new;
1175     }
1176     return 1;
1177 }
1178 
1179 static int
join_append(WriterObj * self,PyObject * field,int quoted)1180 join_append(WriterObj *self, PyObject *field, int quoted)
1181 {
1182     unsigned int field_kind = -1;
1183     const void *field_data = NULL;
1184     Py_ssize_t field_len = 0;
1185     Py_ssize_t rec_len;
1186 
1187     if (field != NULL) {
1188         if (PyUnicode_READY(field) == -1)
1189             return 0;
1190         field_kind = PyUnicode_KIND(field);
1191         field_data = PyUnicode_DATA(field);
1192         field_len = PyUnicode_GET_LENGTH(field);
1193     }
1194     rec_len = join_append_data(self, field_kind, field_data, field_len,
1195                                &quoted, 0);
1196     if (rec_len < 0)
1197         return 0;
1198 
1199     /* grow record buffer if necessary */
1200     if (!join_check_rec_size(self, rec_len))
1201         return 0;
1202 
1203     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204                                      &quoted, 1);
1205     self->num_fields++;
1206 
1207     return 1;
1208 }
1209 
1210 static int
join_append_lineterminator(WriterObj * self)1211 join_append_lineterminator(WriterObj *self)
1212 {
1213     Py_ssize_t terminator_len, i;
1214     unsigned int term_kind;
1215     const void *term_data;
1216 
1217     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218     if (terminator_len == -1)
1219         return 0;
1220 
1221     /* grow record buffer if necessary */
1222     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223         return 0;
1224 
1225     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227     for (i = 0; i < terminator_len; i++)
1228         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229     self->rec_len += terminator_len;
1230 
1231     return 1;
1232 }
1233 
1234 PyDoc_STRVAR(csv_writerow_doc,
1235 "writerow(iterable)\n"
1236 "\n"
1237 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1238 "elements will be converted to string.");
1239 
1240 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1241 csv_writerow(WriterObj *self, PyObject *seq)
1242 {
1243     DialectObj *dialect = self->dialect;
1244     PyObject *iter, *field, *line, *result;
1245 
1246     iter = PyObject_GetIter(seq);
1247     if (iter == NULL) {
1248         if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249             PyErr_Format(self->error_obj,
1250                          "iterable expected, not %.200s",
1251                          Py_TYPE(seq)->tp_name);
1252         }
1253         return NULL;
1254     }
1255 
1256     /* Join all fields in internal buffer.
1257      */
1258     join_reset(self);
1259     while ((field = PyIter_Next(iter))) {
1260         int append_ok;
1261         int quoted;
1262 
1263         switch (dialect->quoting) {
1264         case QUOTE_NONNUMERIC:
1265             quoted = !PyNumber_Check(field);
1266             break;
1267         case QUOTE_ALL:
1268             quoted = 1;
1269             break;
1270         default:
1271             quoted = 0;
1272             break;
1273         }
1274 
1275         if (PyUnicode_Check(field)) {
1276             append_ok = join_append(self, field, quoted);
1277             Py_DECREF(field);
1278         }
1279         else if (field == Py_None) {
1280             append_ok = join_append(self, NULL, quoted);
1281             Py_DECREF(field);
1282         }
1283         else {
1284             PyObject *str;
1285 
1286             str = PyObject_Str(field);
1287             Py_DECREF(field);
1288             if (str == NULL) {
1289                 Py_DECREF(iter);
1290                 return NULL;
1291             }
1292             append_ok = join_append(self, str, quoted);
1293             Py_DECREF(str);
1294         }
1295         if (!append_ok) {
1296             Py_DECREF(iter);
1297             return NULL;
1298         }
1299     }
1300     Py_DECREF(iter);
1301     if (PyErr_Occurred())
1302         return NULL;
1303 
1304     if (self->num_fields > 0 && self->rec_len == 0) {
1305         if (dialect->quoting == QUOTE_NONE) {
1306             PyErr_Format(self->error_obj,
1307                 "single empty field record must be quoted");
1308             return NULL;
1309         }
1310         self->num_fields--;
1311         if (!join_append(self, NULL, 1))
1312             return NULL;
1313     }
1314 
1315     /* Add line terminator.
1316      */
1317     if (!join_append_lineterminator(self)) {
1318         return NULL;
1319     }
1320 
1321     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322                                      (void *) self->rec, self->rec_len);
1323     if (line == NULL) {
1324         return NULL;
1325     }
1326     result = PyObject_CallOneArg(self->write, line);
1327     Py_DECREF(line);
1328     return result;
1329 }
1330 
1331 PyDoc_STRVAR(csv_writerows_doc,
1332 "writerows(iterable of iterables)\n"
1333 "\n"
1334 "Construct and write a series of iterables to a csv file.  Non-string\n"
1335 "elements will be converted to string.");
1336 
1337 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1338 csv_writerows(WriterObj *self, PyObject *seqseq)
1339 {
1340     PyObject *row_iter, *row_obj, *result;
1341 
1342     row_iter = PyObject_GetIter(seqseq);
1343     if (row_iter == NULL) {
1344         return NULL;
1345     }
1346     while ((row_obj = PyIter_Next(row_iter))) {
1347         result = csv_writerow(self, row_obj);
1348         Py_DECREF(row_obj);
1349         if (!result) {
1350             Py_DECREF(row_iter);
1351             return NULL;
1352         }
1353         else
1354              Py_DECREF(result);
1355     }
1356     Py_DECREF(row_iter);
1357     if (PyErr_Occurred())
1358         return NULL;
1359     Py_RETURN_NONE;
1360 }
1361 
1362 static struct PyMethodDef Writer_methods[] = {
1363     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365     { NULL, NULL }
1366 };
1367 
1368 #define W_OFF(x) offsetof(WriterObj, x)
1369 
1370 static struct PyMemberDef Writer_memberlist[] = {
1371     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372     { NULL }
1373 };
1374 
1375 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1376 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1377 {
1378     Py_VISIT(self->dialect);
1379     Py_VISIT(self->write);
1380     Py_VISIT(self->error_obj);
1381     Py_VISIT(Py_TYPE(self));
1382     return 0;
1383 }
1384 
1385 static int
Writer_clear(WriterObj * self)1386 Writer_clear(WriterObj *self)
1387 {
1388     Py_CLEAR(self->dialect);
1389     Py_CLEAR(self->write);
1390     Py_CLEAR(self->error_obj);
1391     return 0;
1392 }
1393 
1394 static void
Writer_dealloc(WriterObj * self)1395 Writer_dealloc(WriterObj *self)
1396 {
1397     PyTypeObject *tp = Py_TYPE(self);
1398     PyObject_GC_UnTrack(self);
1399     tp->tp_clear((PyObject *)self);
1400     if (self->rec != NULL) {
1401         PyMem_Free(self->rec);
1402     }
1403     PyObject_GC_Del(self);
1404     Py_DECREF(tp);
1405 }
1406 
1407 PyDoc_STRVAR(Writer_Type_doc,
1408 "CSV writer\n"
1409 "\n"
1410 "Writer objects are responsible for generating tabular data\n"
1411 "in CSV format from sequence input.\n"
1412 );
1413 
1414 static PyType_Slot Writer_Type_slots[] = {
1415     {Py_tp_doc, (char*)Writer_Type_doc},
1416     {Py_tp_traverse, Writer_traverse},
1417     {Py_tp_clear, Writer_clear},
1418     {Py_tp_dealloc, Writer_dealloc},
1419     {Py_tp_methods, Writer_methods},
1420     {Py_tp_members, Writer_memberlist},
1421     {0, NULL}
1422 };
1423 
1424 PyType_Spec Writer_Type_spec = {
1425     .name = "_csv.writer",
1426     .basicsize = sizeof(WriterObj),
1427     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1428               Py_TPFLAGS_IMMUTABLETYPE),
1429     .slots = Writer_Type_slots,
1430 };
1431 
1432 
1433 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1434 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1435 {
1436     PyObject * output_file, * dialect = NULL;
1437     _csvstate *module_state = get_csv_state(module);
1438     WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1439     _Py_IDENTIFIER(write);
1440 
1441     if (!self)
1442         return NULL;
1443 
1444     self->dialect = NULL;
1445     self->write = NULL;
1446 
1447     self->rec = NULL;
1448     self->rec_size = 0;
1449     self->rec_len = 0;
1450     self->num_fields = 0;
1451 
1452     self->error_obj = Py_NewRef(module_state->error_obj);
1453 
1454     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1455         Py_DECREF(self);
1456         return NULL;
1457     }
1458     if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1459         Py_DECREF(self);
1460         return NULL;
1461     }
1462     if (self->write == NULL || !PyCallable_Check(self->write)) {
1463         PyErr_SetString(PyExc_TypeError,
1464                         "argument 1 must have a \"write\" method");
1465         Py_DECREF(self);
1466         return NULL;
1467     }
1468     self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1469                                                 keyword_args);
1470     if (self->dialect == NULL) {
1471         Py_DECREF(self);
1472         return NULL;
1473     }
1474     PyObject_GC_Track(self);
1475     return (PyObject *)self;
1476 }
1477 
1478 /*
1479  * DIALECT REGISTRY
1480  */
1481 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1482 csv_list_dialects(PyObject *module, PyObject *args)
1483 {
1484     return PyDict_Keys(get_csv_state(module)->dialects);
1485 }
1486 
1487 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1488 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1489 {
1490     PyObject *name_obj, *dialect_obj = NULL;
1491     _csvstate *module_state = get_csv_state(module);
1492     PyObject *dialect;
1493 
1494     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1495         return NULL;
1496     if (!PyUnicode_Check(name_obj)) {
1497         PyErr_SetString(PyExc_TypeError,
1498                         "dialect name must be a string");
1499         return NULL;
1500     }
1501     if (PyUnicode_READY(name_obj) == -1)
1502         return NULL;
1503     dialect = _call_dialect(module_state, dialect_obj, kwargs);
1504     if (dialect == NULL)
1505         return NULL;
1506     if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1507         Py_DECREF(dialect);
1508         return NULL;
1509     }
1510     Py_DECREF(dialect);
1511     Py_RETURN_NONE;
1512 }
1513 
1514 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1515 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1516 {
1517     _csvstate *module_state = get_csv_state(module);
1518     if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
1519         if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1520             PyErr_Format(module_state->error_obj, "unknown dialect");
1521         }
1522         return NULL;
1523     }
1524     Py_RETURN_NONE;
1525 }
1526 
1527 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1528 csv_get_dialect(PyObject *module, PyObject *name_obj)
1529 {
1530     return get_dialect_from_registry(name_obj, get_csv_state(module));
1531 }
1532 
1533 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1534 csv_field_size_limit(PyObject *module, PyObject *args)
1535 {
1536     PyObject *new_limit = NULL;
1537     _csvstate *module_state = get_csv_state(module);
1538     long old_limit = module_state->field_limit;
1539 
1540     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1541         return NULL;
1542     if (new_limit != NULL) {
1543         if (!PyLong_CheckExact(new_limit)) {
1544             PyErr_Format(PyExc_TypeError,
1545                          "limit must be an integer");
1546             return NULL;
1547         }
1548         module_state->field_limit = PyLong_AsLong(new_limit);
1549         if (module_state->field_limit == -1 && PyErr_Occurred()) {
1550             module_state->field_limit = old_limit;
1551             return NULL;
1552         }
1553     }
1554     return PyLong_FromLong(old_limit);
1555 }
1556 
1557 static PyType_Slot error_slots[] = {
1558     {0, NULL},
1559 };
1560 
1561 PyType_Spec error_spec = {
1562     .name = "_csv.Error",
1563     .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1564     .slots = error_slots,
1565 };
1566 
1567 /*
1568  * MODULE
1569  */
1570 
1571 PyDoc_STRVAR(csv_module_doc,
1572 "CSV parsing and writing.\n"
1573 "\n"
1574 "This module provides classes that assist in the reading and writing\n"
1575 "of Comma Separated Value (CSV) files, and implements the interface\n"
1576 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1577 "the format is not formally defined by a stable specification and\n"
1578 "is subtle enough that parsing lines of a CSV file with something\n"
1579 "like line.split(\",\") is bound to fail.  The module supports three\n"
1580 "basic APIs: reading, writing, and registration of dialects.\n"
1581 "\n"
1582 "\n"
1583 "DIALECT REGISTRATION:\n"
1584 "\n"
1585 "Readers and writers support a dialect argument, which is a convenient\n"
1586 "handle on a group of settings.  When the dialect argument is a string,\n"
1587 "it identifies one of the dialects previously registered with the module.\n"
1588 "If it is a class or instance, the attributes of the argument are used as\n"
1589 "the settings for the reader or writer:\n"
1590 "\n"
1591 "    class excel:\n"
1592 "        delimiter = ','\n"
1593 "        quotechar = '\"'\n"
1594 "        escapechar = None\n"
1595 "        doublequote = True\n"
1596 "        skipinitialspace = False\n"
1597 "        lineterminator = '\\r\\n'\n"
1598 "        quoting = QUOTE_MINIMAL\n"
1599 "\n"
1600 "SETTINGS:\n"
1601 "\n"
1602 "    * quotechar - specifies a one-character string to use as the\n"
1603 "        quoting character.  It defaults to '\"'.\n"
1604 "    * delimiter - specifies a one-character string to use as the\n"
1605 "        field separator.  It defaults to ','.\n"
1606 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1607 "        immediately follows a delimiter.  It defaults to False, which\n"
1608 "        means that whitespace immediately following a delimiter is part\n"
1609 "        of the following field.\n"
1610 "    * lineterminator -  specifies the character sequence which should\n"
1611 "        terminate rows.\n"
1612 "    * quoting - controls when quotes should be generated by the writer.\n"
1613 "        It can take on any of the following module constants:\n"
1614 "\n"
1615 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1616 "            field contains either the quotechar or the delimiter\n"
1617 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1618 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1619 "            fields which do not parse as integers or floating point\n"
1620 "            numbers.\n"
1621 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1622 "    * escapechar - specifies a one-character string used to escape\n"
1623 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1624 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1625 "        True, two consecutive quotes are interpreted as one during read,\n"
1626 "        and when writing, each quote character embedded in the data is\n"
1627 "        written as two quotes\n");
1628 
1629 PyDoc_STRVAR(csv_reader_doc,
1630 "    csv_reader = reader(iterable [, dialect='excel']\n"
1631 "                        [optional keyword args])\n"
1632 "    for row in csv_reader:\n"
1633 "        process(row)\n"
1634 "\n"
1635 "The \"iterable\" argument can be any object that returns a line\n"
1636 "of input for each iteration, such as a file object or a list.  The\n"
1637 "optional \"dialect\" parameter is discussed below.  The function\n"
1638 "also accepts optional keyword arguments which override settings\n"
1639 "provided by the dialect.\n"
1640 "\n"
1641 "The returned object is an iterator.  Each iteration returns a row\n"
1642 "of the CSV file (which can span multiple input lines).\n");
1643 
1644 PyDoc_STRVAR(csv_writer_doc,
1645 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1646 "                            [optional keyword args])\n"
1647 "    for row in sequence:\n"
1648 "        csv_writer.writerow(row)\n"
1649 "\n"
1650 "    [or]\n"
1651 "\n"
1652 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1653 "                            [optional keyword args])\n"
1654 "    csv_writer.writerows(rows)\n"
1655 "\n"
1656 "The \"fileobj\" argument can be any object that supports the file API.\n");
1657 
1658 PyDoc_STRVAR(csv_list_dialects_doc,
1659 "Return a list of all know dialect names.\n"
1660 "    names = csv.list_dialects()");
1661 
1662 PyDoc_STRVAR(csv_get_dialect_doc,
1663 "Return the dialect instance associated with name.\n"
1664 "    dialect = csv.get_dialect(name)");
1665 
1666 PyDoc_STRVAR(csv_register_dialect_doc,
1667 "Create a mapping from a string name to a dialect class.\n"
1668 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1669 
1670 PyDoc_STRVAR(csv_unregister_dialect_doc,
1671 "Delete the name/dialect mapping associated with a string name.\n"
1672 "    csv.unregister_dialect(name)");
1673 
1674 PyDoc_STRVAR(csv_field_size_limit_doc,
1675 "Sets an upper limit on parsed fields.\n"
1676 "    csv.field_size_limit([limit])\n"
1677 "\n"
1678 "Returns old limit. If limit is not given, no new limit is set and\n"
1679 "the old limit is returned");
1680 
1681 static struct PyMethodDef csv_methods[] = {
1682     { "reader", (PyCFunction)(void(*)(void))csv_reader,
1683         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1684     { "writer", (PyCFunction)(void(*)(void))csv_writer,
1685         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1686     { "list_dialects", (PyCFunction)csv_list_dialects,
1687         METH_NOARGS, csv_list_dialects_doc},
1688     { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
1689         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1690     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1691         METH_O, csv_unregister_dialect_doc},
1692     { "get_dialect", (PyCFunction)csv_get_dialect,
1693         METH_O, csv_get_dialect_doc},
1694     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1695         METH_VARARGS, csv_field_size_limit_doc},
1696     { NULL, NULL }
1697 };
1698 
1699 static int
csv_exec(PyObject * module)1700 csv_exec(PyObject *module) {
1701     const StyleDesc *style;
1702     PyObject *temp;
1703     _csvstate *module_state = get_csv_state(module);
1704 
1705     temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1706     module_state->dialect_type = (PyTypeObject *)temp;
1707     if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1708         return -1;
1709     }
1710 
1711     temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1712     module_state->reader_type = (PyTypeObject *)temp;
1713     if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1714         return -1;
1715     }
1716 
1717     temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1718     module_state->writer_type = (PyTypeObject *)temp;
1719     if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1720         return -1;
1721     }
1722 
1723     /* Add version to the module. */
1724     if (PyModule_AddStringConstant(module, "__version__",
1725                                    MODULE_VERSION) == -1) {
1726         return -1;
1727     }
1728 
1729     /* Set the field limit */
1730     module_state->field_limit = 128 * 1024;
1731 
1732     /* Add _dialects dictionary */
1733     module_state->dialects = PyDict_New();
1734     if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1735         return -1;
1736     }
1737 
1738     /* Add quote styles into dictionary */
1739     for (style = quote_styles; style->name; style++) {
1740         if (PyModule_AddIntConstant(module, style->name,
1741                                     style->style) == -1)
1742             return -1;
1743     }
1744 
1745     /* Add the CSV exception object to the module. */
1746     PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1747     if (bases == NULL) {
1748         return -1;
1749     }
1750     module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1751                                                        bases);
1752     Py_DECREF(bases);
1753     if (module_state->error_obj == NULL) {
1754         return -1;
1755     }
1756     if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1757         return -1;
1758     }
1759 
1760     return 0;
1761 }
1762 
1763 static PyModuleDef_Slot csv_slots[] = {
1764     {Py_mod_exec, csv_exec},
1765     {0, NULL}
1766 };
1767 
1768 static struct PyModuleDef _csvmodule = {
1769     PyModuleDef_HEAD_INIT,
1770     "_csv",
1771     csv_module_doc,
1772     sizeof(_csvstate),
1773     csv_methods,
1774     csv_slots,
1775     _csv_traverse,
1776     _csv_clear,
1777     _csv_free
1778 };
1779 
1780 PyMODINIT_FUNC
PyInit__csv(void)1781 PyInit__csv(void)
1782 {
1783     return PyModuleDef_Init(&_csvmodule);
1784 }
1785