• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 #define MODULE_VERSION "1.0"
12 
13 #include "Python.h"
14 #include "structmember.h"         // PyMemberDef
15 #include <stdbool.h>
16 
17 
18 typedef struct {
19     PyObject *error_obj;   /* CSV exception */
20     PyObject *dialects;   /* Dialect registry */
21     long field_limit;   /* max parsed field size */
22 } _csvstate;
23 
24 static inline _csvstate*
get_csv_state(PyObject * module)25 get_csv_state(PyObject *module)
26 {
27     void *state = PyModule_GetState(module);
28     assert(state != NULL);
29     return (_csvstate *)state;
30 }
31 
32 static int
_csv_clear(PyObject * m)33 _csv_clear(PyObject *m)
34 {
35     Py_CLEAR(get_csv_state(m)->error_obj);
36     Py_CLEAR(get_csv_state(m)->dialects);
37     return 0;
38 }
39 
40 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)41 _csv_traverse(PyObject *m, visitproc visit, void *arg)
42 {
43     Py_VISIT(get_csv_state(m)->error_obj);
44     Py_VISIT(get_csv_state(m)->dialects);
45     return 0;
46 }
47 
48 static void
_csv_free(void * m)49 _csv_free(void *m)
50 {
51    _csv_clear((PyObject *)m);
52 }
53 
54 static struct PyModuleDef _csvmodule;
55 
56 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
57 
58 typedef enum {
59     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
60     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
61     EAT_CRNL,AFTER_ESCAPED_CRNL
62 } ParserState;
63 
64 typedef enum {
65     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
66 } QuoteStyle;
67 
68 typedef struct {
69     QuoteStyle style;
70     const char *name;
71 } StyleDesc;
72 
73 static const StyleDesc quote_styles[] = {
74     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
75     { QUOTE_ALL,        "QUOTE_ALL" },
76     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
77     { QUOTE_NONE,       "QUOTE_NONE" },
78     { 0 }
79 };
80 
81 typedef struct {
82     PyObject_HEAD
83 
84     char doublequote;           /* is " represented by ""? */
85     char skipinitialspace;      /* ignore spaces following delimiter? */
86     char strict;                /* raise exception on bad CSV */
87     int quoting;                /* style of quoting to write */
88     Py_UCS4 delimiter;          /* field separator */
89     Py_UCS4 quotechar;          /* quote character */
90     Py_UCS4 escapechar;         /* escape character */
91     PyObject *lineterminator;   /* string to write between records */
92 
93 } DialectObj;
94 
95 static PyTypeObject Dialect_Type;
96 
97 typedef struct {
98     PyObject_HEAD
99 
100     PyObject *input_iter;   /* iterate over this for input lines */
101 
102     DialectObj *dialect;    /* parsing dialect */
103 
104     PyObject *fields;           /* field list for current record */
105     ParserState state;          /* current CSV parse state */
106     Py_UCS4 *field;             /* temporary buffer */
107     Py_ssize_t field_size;      /* size of allocated buffer */
108     Py_ssize_t field_len;       /* length of current field */
109     int numeric_field;          /* treat field as numeric */
110     unsigned long line_num;     /* Source-file line number */
111 } ReaderObj;
112 
113 static PyTypeObject Reader_Type;
114 
115 #define ReaderObject_Check(v)   Py_IS_TYPE(v, &Reader_Type)
116 
117 typedef struct {
118     PyObject_HEAD
119 
120     PyObject *write;    /* write output lines to this file */
121 
122     DialectObj *dialect;    /* parsing dialect */
123 
124     Py_UCS4 *rec;            /* buffer for parser.join */
125     Py_ssize_t rec_size;        /* size of allocated record */
126     Py_ssize_t rec_len;         /* length of record */
127     int num_fields;             /* number of fields in record */
128 } WriterObj;
129 
130 static PyTypeObject Writer_Type;
131 
132 /*
133  * DIALECT class
134  */
135 
136 static PyObject *
get_dialect_from_registry(PyObject * name_obj)137 get_dialect_from_registry(PyObject * name_obj)
138 {
139     PyObject *dialect_obj;
140 
141     dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
142     if (dialect_obj == NULL) {
143         if (!PyErr_Occurred())
144             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
145     }
146     else
147         Py_INCREF(dialect_obj);
148     return dialect_obj;
149 }
150 
151 static PyObject *
get_string(PyObject * str)152 get_string(PyObject *str)
153 {
154     Py_XINCREF(str);
155     return str;
156 }
157 
158 static PyObject *
get_nullchar_as_None(Py_UCS4 c)159 get_nullchar_as_None(Py_UCS4 c)
160 {
161     if (c == '\0') {
162         Py_RETURN_NONE;
163     }
164     else
165         return PyUnicode_FromOrdinal(c);
166 }
167 
168 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))169 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
170 {
171     return get_string(self->lineterminator);
172 }
173 
174 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))175 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
176 {
177     return get_nullchar_as_None(self->delimiter);
178 }
179 
180 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))181 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
182 {
183     return get_nullchar_as_None(self->escapechar);
184 }
185 
186 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))187 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
188 {
189     return get_nullchar_as_None(self->quotechar);
190 }
191 
192 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))193 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
194 {
195     return PyLong_FromLong(self->quoting);
196 }
197 
198 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)199 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
200 {
201     if (src == NULL)
202         *target = dflt;
203     else {
204         int b = PyObject_IsTrue(src);
205         if (b < 0)
206             return -1;
207         *target = (char)b;
208     }
209     return 0;
210 }
211 
212 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)213 _set_int(const char *name, int *target, PyObject *src, int dflt)
214 {
215     if (src == NULL)
216         *target = dflt;
217     else {
218         int value;
219         if (!PyLong_CheckExact(src)) {
220             PyErr_Format(PyExc_TypeError,
221                          "\"%s\" must be an integer", name);
222             return -1;
223         }
224         value = _PyLong_AsInt(src);
225         if (value == -1 && PyErr_Occurred()) {
226             return -1;
227         }
228         *target = value;
229     }
230     return 0;
231 }
232 
233 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)234 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
235 {
236     if (src == NULL)
237         *target = dflt;
238     else {
239         *target = '\0';
240         if (src != Py_None) {
241             Py_ssize_t len;
242             if (!PyUnicode_Check(src)) {
243                 PyErr_Format(PyExc_TypeError,
244                     "\"%s\" must be string, not %.200s", name,
245                     Py_TYPE(src)->tp_name);
246                 return -1;
247             }
248             len = PyUnicode_GetLength(src);
249             if (len > 1) {
250                 PyErr_Format(PyExc_TypeError,
251                     "\"%s\" must be a 1-character string",
252                     name);
253                 return -1;
254             }
255             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
256             if (len > 0)
257                 *target = PyUnicode_READ_CHAR(src, 0);
258         }
259     }
260     return 0;
261 }
262 
263 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)264 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265 {
266     if (src == NULL)
267         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268     else {
269         if (src == Py_None)
270             *target = NULL;
271         else if (!PyUnicode_Check(src)) {
272             PyErr_Format(PyExc_TypeError,
273                          "\"%s\" must be a string", name);
274             return -1;
275         }
276         else {
277             if (PyUnicode_READY(src) == -1)
278                 return -1;
279             Py_INCREF(src);
280             Py_XSETREF(*target, src);
281         }
282     }
283     return 0;
284 }
285 
286 static int
dialect_check_quoting(int quoting)287 dialect_check_quoting(int quoting)
288 {
289     const StyleDesc *qs;
290 
291     for (qs = quote_styles; qs->name; qs++) {
292         if ((int)qs->style == quoting)
293             return 0;
294     }
295     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296     return -1;
297 }
298 
299 #define D_OFF(x) offsetof(DialectObj, x)
300 
301 static struct PyMemberDef Dialect_memberlist[] = {
302     { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
303     { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
304     { "strict",             T_BOOL, D_OFF(strict), READONLY },
305     { NULL }
306 };
307 
308 static PyGetSetDef Dialect_getsetlist[] = {
309     { "delimiter",          (getter)Dialect_get_delimiter},
310     { "escapechar",             (getter)Dialect_get_escapechar},
311     { "lineterminator",         (getter)Dialect_get_lineterminator},
312     { "quotechar",              (getter)Dialect_get_quotechar},
313     { "quoting",                (getter)Dialect_get_quoting},
314     {NULL},
315 };
316 
317 static void
Dialect_dealloc(DialectObj * self)318 Dialect_dealloc(DialectObj *self)
319 {
320     Py_XDECREF(self->lineterminator);
321     Py_TYPE(self)->tp_free((PyObject *)self);
322 }
323 
324 static char *dialect_kws[] = {
325     "dialect",
326     "delimiter",
327     "doublequote",
328     "escapechar",
329     "lineterminator",
330     "quotechar",
331     "quoting",
332     "skipinitialspace",
333     "strict",
334     NULL
335 };
336 
337 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340     DialectObj *self;
341     PyObject *ret = NULL;
342     PyObject *dialect = NULL;
343     PyObject *delimiter = NULL;
344     PyObject *doublequote = NULL;
345     PyObject *escapechar = NULL;
346     PyObject *lineterminator = NULL;
347     PyObject *quotechar = NULL;
348     PyObject *quoting = NULL;
349     PyObject *skipinitialspace = NULL;
350     PyObject *strict = NULL;
351 
352     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353                                      "|OOOOOOOOO", dialect_kws,
354                                      &dialect,
355                                      &delimiter,
356                                      &doublequote,
357                                      &escapechar,
358                                      &lineterminator,
359                                      &quotechar,
360                                      &quoting,
361                                      &skipinitialspace,
362                                      &strict))
363         return NULL;
364 
365     if (dialect != NULL) {
366         if (PyUnicode_Check(dialect)) {
367             dialect = get_dialect_from_registry(dialect);
368             if (dialect == NULL)
369                 return NULL;
370         }
371         else
372             Py_INCREF(dialect);
373         /* Can we reuse this instance? */
374         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375             delimiter == NULL &&
376             doublequote == NULL &&
377             escapechar == NULL &&
378             lineterminator == NULL &&
379             quotechar == NULL &&
380             quoting == NULL &&
381             skipinitialspace == NULL &&
382             strict == NULL)
383             return dialect;
384     }
385 
386     self = (DialectObj *)type->tp_alloc(type, 0);
387     if (self == NULL) {
388         Py_XDECREF(dialect);
389         return NULL;
390     }
391     self->lineterminator = NULL;
392 
393     Py_XINCREF(delimiter);
394     Py_XINCREF(doublequote);
395     Py_XINCREF(escapechar);
396     Py_XINCREF(lineterminator);
397     Py_XINCREF(quotechar);
398     Py_XINCREF(quoting);
399     Py_XINCREF(skipinitialspace);
400     Py_XINCREF(strict);
401     if (dialect != NULL) {
402 #define DIALECT_GETATTR(v, n) \
403         if (v == NULL) \
404             v = PyObject_GetAttrString(dialect, n)
405         DIALECT_GETATTR(delimiter, "delimiter");
406         DIALECT_GETATTR(doublequote, "doublequote");
407         DIALECT_GETATTR(escapechar, "escapechar");
408         DIALECT_GETATTR(lineterminator, "lineterminator");
409         DIALECT_GETATTR(quotechar, "quotechar");
410         DIALECT_GETATTR(quoting, "quoting");
411         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412         DIALECT_GETATTR(strict, "strict");
413         PyErr_Clear();
414     }
415 
416     /* check types and convert to C values */
417 #define DIASET(meth, name, target, src, dflt) \
418     if (meth(name, target, src, dflt)) \
419         goto err
420     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
422     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
427     DIASET(_set_bool, "strict", &self->strict, strict, false);
428 
429     /* validate options */
430     if (dialect_check_quoting(self->quoting))
431         goto err;
432     if (self->delimiter == 0) {
433         PyErr_SetString(PyExc_TypeError,
434                         "\"delimiter\" must be a 1-character string");
435         goto err;
436     }
437     if (quotechar == Py_None && quoting == NULL)
438         self->quoting = QUOTE_NONE;
439     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440         PyErr_SetString(PyExc_TypeError,
441                         "quotechar must be set if quoting enabled");
442         goto err;
443     }
444     if (self->lineterminator == 0) {
445         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446         goto err;
447     }
448 
449     ret = (PyObject *)self;
450     Py_INCREF(self);
451 err:
452     Py_XDECREF(self);
453     Py_XDECREF(dialect);
454     Py_XDECREF(delimiter);
455     Py_XDECREF(doublequote);
456     Py_XDECREF(escapechar);
457     Py_XDECREF(lineterminator);
458     Py_XDECREF(quotechar);
459     Py_XDECREF(quoting);
460     Py_XDECREF(skipinitialspace);
461     Py_XDECREF(strict);
462     return ret;
463 }
464 
465 
466 PyDoc_STRVAR(Dialect_Type_doc,
467 "CSV dialect\n"
468 "\n"
469 "The Dialect type records CSV parsing and generation options.\n");
470 
471 static PyTypeObject Dialect_Type = {
472     PyVarObject_HEAD_INIT(NULL, 0)
473     "_csv.Dialect",                         /* tp_name */
474     sizeof(DialectObj),                     /* tp_basicsize */
475     0,                                      /* tp_itemsize */
476     /*  methods  */
477     (destructor)Dialect_dealloc,            /* tp_dealloc */
478     0,                                      /* tp_vectorcall_offset */
479     (getattrfunc)0,                         /* tp_getattr */
480     (setattrfunc)0,                         /* tp_setattr */
481     0,                                      /* tp_as_async */
482     (reprfunc)0,                            /* tp_repr */
483     0,                                      /* tp_as_number */
484     0,                                      /* tp_as_sequence */
485     0,                                      /* tp_as_mapping */
486     (hashfunc)0,                            /* tp_hash */
487     (ternaryfunc)0,                         /* tp_call */
488     (reprfunc)0,                                /* tp_str */
489     0,                                      /* tp_getattro */
490     0,                                      /* tp_setattro */
491     0,                                      /* tp_as_buffer */
492     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493     Dialect_Type_doc,                       /* tp_doc */
494     0,                                      /* tp_traverse */
495     0,                                      /* tp_clear */
496     0,                                      /* tp_richcompare */
497     0,                                      /* tp_weaklistoffset */
498     0,                                      /* tp_iter */
499     0,                                      /* tp_iternext */
500     0,                                          /* tp_methods */
501     Dialect_memberlist,                     /* tp_members */
502     Dialect_getsetlist,                     /* tp_getset */
503     0,                                          /* tp_base */
504     0,                                          /* tp_dict */
505     0,                                          /* tp_descr_get */
506     0,                                          /* tp_descr_set */
507     0,                                          /* tp_dictoffset */
508     0,                                          /* tp_init */
509     0,                                          /* tp_alloc */
510     dialect_new,                                /* tp_new */
511     0,                                          /* tp_free */
512 };
513 
514 /*
515  * Return an instance of the dialect type, given a Python instance or kwarg
516  * description of the dialect
517  */
518 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520 {
521     PyObject *type = (PyObject *)&Dialect_Type;
522     if (dialect_inst) {
523         return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
524     }
525     else {
526         return PyObject_VectorcallDict(type, NULL, 0, kwargs);
527     }
528 }
529 
530 /*
531  * READER
532  */
533 static int
parse_save_field(ReaderObj * self)534 parse_save_field(ReaderObj *self)
535 {
536     PyObject *field;
537 
538     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
539                                       (void *) self->field, self->field_len);
540     if (field == NULL)
541         return -1;
542     self->field_len = 0;
543     if (self->numeric_field) {
544         PyObject *tmp;
545 
546         self->numeric_field = 0;
547         tmp = PyNumber_Float(field);
548         Py_DECREF(field);
549         if (tmp == NULL)
550             return -1;
551         field = tmp;
552     }
553     if (PyList_Append(self->fields, field) < 0) {
554         Py_DECREF(field);
555         return -1;
556     }
557     Py_DECREF(field);
558     return 0;
559 }
560 
561 static int
parse_grow_buff(ReaderObj * self)562 parse_grow_buff(ReaderObj *self)
563 {
564     assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
565 
566     Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
567     Py_UCS4 *field_new = self->field;
568     PyMem_Resize(field_new, Py_UCS4, field_size_new);
569     if (field_new == NULL) {
570         PyErr_NoMemory();
571         return 0;
572     }
573     self->field = field_new;
574     self->field_size = field_size_new;
575     return 1;
576 }
577 
578 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)579 parse_add_char(ReaderObj *self, Py_UCS4 c)
580 {
581     if (self->field_len >= _csvstate_global->field_limit) {
582         PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
583                      _csvstate_global->field_limit);
584         return -1;
585     }
586     if (self->field_len == self->field_size && !parse_grow_buff(self))
587         return -1;
588     self->field[self->field_len++] = c;
589     return 0;
590 }
591 
592 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)593 parse_process_char(ReaderObj *self, Py_UCS4 c)
594 {
595     DialectObj *dialect = self->dialect;
596 
597     switch (self->state) {
598     case START_RECORD:
599         /* start of record */
600         if (c == '\0')
601             /* empty line - return [] */
602             break;
603         else if (c == '\n' || c == '\r') {
604             self->state = EAT_CRNL;
605             break;
606         }
607         /* normal character - handle as START_FIELD */
608         self->state = START_FIELD;
609         /* fallthru */
610     case START_FIELD:
611         /* expecting field */
612         if (c == '\n' || c == '\r' || c == '\0') {
613             /* save empty field - return [fields] */
614             if (parse_save_field(self) < 0)
615                 return -1;
616             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
617         }
618         else if (c == dialect->quotechar &&
619                  dialect->quoting != QUOTE_NONE) {
620             /* start quoted field */
621             self->state = IN_QUOTED_FIELD;
622         }
623         else if (c == dialect->escapechar) {
624             /* possible escaped character */
625             self->state = ESCAPED_CHAR;
626         }
627         else if (c == ' ' && dialect->skipinitialspace)
628             /* ignore space at start of field */
629             ;
630         else if (c == dialect->delimiter) {
631             /* save empty field */
632             if (parse_save_field(self) < 0)
633                 return -1;
634         }
635         else {
636             /* begin new unquoted field */
637             if (dialect->quoting == QUOTE_NONNUMERIC)
638                 self->numeric_field = 1;
639             if (parse_add_char(self, c) < 0)
640                 return -1;
641             self->state = IN_FIELD;
642         }
643         break;
644 
645     case ESCAPED_CHAR:
646         if (c == '\n' || c=='\r') {
647             if (parse_add_char(self, c) < 0)
648                 return -1;
649             self->state = AFTER_ESCAPED_CRNL;
650             break;
651         }
652         if (c == '\0')
653             c = '\n';
654         if (parse_add_char(self, c) < 0)
655             return -1;
656         self->state = IN_FIELD;
657         break;
658 
659     case AFTER_ESCAPED_CRNL:
660         if (c == '\0')
661             break;
662         /*fallthru*/
663 
664     case IN_FIELD:
665         /* in unquoted field */
666         if (c == '\n' || c == '\r' || c == '\0') {
667             /* end of line - return [fields] */
668             if (parse_save_field(self) < 0)
669                 return -1;
670             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
671         }
672         else if (c == dialect->escapechar) {
673             /* possible escaped character */
674             self->state = ESCAPED_CHAR;
675         }
676         else if (c == dialect->delimiter) {
677             /* save field - wait for new field */
678             if (parse_save_field(self) < 0)
679                 return -1;
680             self->state = START_FIELD;
681         }
682         else {
683             /* normal character - save in field */
684             if (parse_add_char(self, c) < 0)
685                 return -1;
686         }
687         break;
688 
689     case IN_QUOTED_FIELD:
690         /* in quoted field */
691         if (c == '\0')
692             ;
693         else if (c == dialect->escapechar) {
694             /* Possible escape character */
695             self->state = ESCAPE_IN_QUOTED_FIELD;
696         }
697         else if (c == dialect->quotechar &&
698                  dialect->quoting != QUOTE_NONE) {
699             if (dialect->doublequote) {
700                 /* doublequote; " represented by "" */
701                 self->state = QUOTE_IN_QUOTED_FIELD;
702             }
703             else {
704                 /* end of quote part of field */
705                 self->state = IN_FIELD;
706             }
707         }
708         else {
709             /* normal character - save in field */
710             if (parse_add_char(self, c) < 0)
711                 return -1;
712         }
713         break;
714 
715     case ESCAPE_IN_QUOTED_FIELD:
716         if (c == '\0')
717             c = '\n';
718         if (parse_add_char(self, c) < 0)
719             return -1;
720         self->state = IN_QUOTED_FIELD;
721         break;
722 
723     case QUOTE_IN_QUOTED_FIELD:
724         /* doublequote - seen a quote in a quoted field */
725         if (dialect->quoting != QUOTE_NONE &&
726             c == dialect->quotechar) {
727             /* save "" as " */
728             if (parse_add_char(self, c) < 0)
729                 return -1;
730             self->state = IN_QUOTED_FIELD;
731         }
732         else if (c == dialect->delimiter) {
733             /* save field - wait for new field */
734             if (parse_save_field(self) < 0)
735                 return -1;
736             self->state = START_FIELD;
737         }
738         else if (c == '\n' || c == '\r' || c == '\0') {
739             /* end of line - return [fields] */
740             if (parse_save_field(self) < 0)
741                 return -1;
742             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
743         }
744         else if (!dialect->strict) {
745             if (parse_add_char(self, c) < 0)
746                 return -1;
747             self->state = IN_FIELD;
748         }
749         else {
750             /* illegal */
751             PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
752                             dialect->delimiter,
753                             dialect->quotechar);
754             return -1;
755         }
756         break;
757 
758     case EAT_CRNL:
759         if (c == '\n' || c == '\r')
760             ;
761         else if (c == '\0')
762             self->state = START_RECORD;
763         else {
764             PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
765             return -1;
766         }
767         break;
768 
769     }
770     return 0;
771 }
772 
773 static int
parse_reset(ReaderObj * self)774 parse_reset(ReaderObj *self)
775 {
776     Py_XSETREF(self->fields, PyList_New(0));
777     if (self->fields == NULL)
778         return -1;
779     self->field_len = 0;
780     self->state = START_RECORD;
781     self->numeric_field = 0;
782     return 0;
783 }
784 
785 static PyObject *
Reader_iternext(ReaderObj * self)786 Reader_iternext(ReaderObj *self)
787 {
788     PyObject *fields = NULL;
789     Py_UCS4 c;
790     Py_ssize_t pos, linelen;
791     unsigned int kind;
792     const void *data;
793     PyObject *lineobj;
794 
795     if (parse_reset(self) < 0)
796         return NULL;
797     do {
798         lineobj = PyIter_Next(self->input_iter);
799         if (lineobj == NULL) {
800             /* End of input OR exception */
801             if (!PyErr_Occurred() && (self->field_len != 0 ||
802                                       self->state == IN_QUOTED_FIELD)) {
803                 if (self->dialect->strict)
804                     PyErr_SetString(_csvstate_global->error_obj,
805                                     "unexpected end of data");
806                 else if (parse_save_field(self) >= 0)
807                     break;
808             }
809             return NULL;
810         }
811         if (!PyUnicode_Check(lineobj)) {
812             PyErr_Format(_csvstate_global->error_obj,
813                          "iterator should return strings, "
814                          "not %.200s "
815                          "(did you open the file in text mode?)",
816                          Py_TYPE(lineobj)->tp_name
817                 );
818             Py_DECREF(lineobj);
819             return NULL;
820         }
821         if (PyUnicode_READY(lineobj) == -1) {
822             Py_DECREF(lineobj);
823             return NULL;
824         }
825         ++self->line_num;
826         kind = PyUnicode_KIND(lineobj);
827         data = PyUnicode_DATA(lineobj);
828         pos = 0;
829         linelen = PyUnicode_GET_LENGTH(lineobj);
830         while (linelen--) {
831             c = PyUnicode_READ(kind, data, pos);
832             if (c == '\0') {
833                 Py_DECREF(lineobj);
834                 PyErr_Format(_csvstate_global->error_obj,
835                              "line contains NUL");
836                 goto err;
837             }
838             if (parse_process_char(self, c) < 0) {
839                 Py_DECREF(lineobj);
840                 goto err;
841             }
842             pos++;
843         }
844         Py_DECREF(lineobj);
845         if (parse_process_char(self, 0) < 0)
846             goto err;
847     } while (self->state != START_RECORD);
848 
849     fields = self->fields;
850     self->fields = NULL;
851 err:
852     return fields;
853 }
854 
855 static void
Reader_dealloc(ReaderObj * self)856 Reader_dealloc(ReaderObj *self)
857 {
858     PyObject_GC_UnTrack(self);
859     Py_XDECREF(self->dialect);
860     Py_XDECREF(self->input_iter);
861     Py_XDECREF(self->fields);
862     if (self->field != NULL)
863         PyMem_Free(self->field);
864     PyObject_GC_Del(self);
865 }
866 
867 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)868 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
869 {
870     Py_VISIT(self->dialect);
871     Py_VISIT(self->input_iter);
872     Py_VISIT(self->fields);
873     return 0;
874 }
875 
876 static int
Reader_clear(ReaderObj * self)877 Reader_clear(ReaderObj *self)
878 {
879     Py_CLEAR(self->dialect);
880     Py_CLEAR(self->input_iter);
881     Py_CLEAR(self->fields);
882     return 0;
883 }
884 
885 PyDoc_STRVAR(Reader_Type_doc,
886 "CSV reader\n"
887 "\n"
888 "Reader objects are responsible for reading and parsing tabular data\n"
889 "in CSV format.\n"
890 );
891 
892 static struct PyMethodDef Reader_methods[] = {
893     { NULL, NULL }
894 };
895 #define R_OFF(x) offsetof(ReaderObj, x)
896 
897 static struct PyMemberDef Reader_memberlist[] = {
898     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
899     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
900     { NULL }
901 };
902 
903 
904 static PyTypeObject Reader_Type = {
905     PyVarObject_HEAD_INIT(NULL, 0)
906     "_csv.reader",                          /*tp_name*/
907     sizeof(ReaderObj),                      /*tp_basicsize*/
908     0,                                      /*tp_itemsize*/
909     /* methods */
910     (destructor)Reader_dealloc,             /*tp_dealloc*/
911     0,                                      /*tp_vectorcall_offset*/
912     (getattrfunc)0,                         /*tp_getattr*/
913     (setattrfunc)0,                         /*tp_setattr*/
914     0,                                      /*tp_as_async*/
915     (reprfunc)0,                            /*tp_repr*/
916     0,                                      /*tp_as_number*/
917     0,                                      /*tp_as_sequence*/
918     0,                                      /*tp_as_mapping*/
919     (hashfunc)0,                            /*tp_hash*/
920     (ternaryfunc)0,                         /*tp_call*/
921     (reprfunc)0,                                /*tp_str*/
922     0,                                      /*tp_getattro*/
923     0,                                      /*tp_setattro*/
924     0,                                      /*tp_as_buffer*/
925     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
926         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
927     Reader_Type_doc,                        /*tp_doc*/
928     (traverseproc)Reader_traverse,          /*tp_traverse*/
929     (inquiry)Reader_clear,                  /*tp_clear*/
930     0,                                      /*tp_richcompare*/
931     0,                                      /*tp_weaklistoffset*/
932     PyObject_SelfIter,                          /*tp_iter*/
933     (getiterfunc)Reader_iternext,           /*tp_iternext*/
934     Reader_methods,                         /*tp_methods*/
935     Reader_memberlist,                      /*tp_members*/
936     0,                                      /*tp_getset*/
937 
938 };
939 
940 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)941 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
942 {
943     PyObject * iterator, * dialect = NULL;
944     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
945 
946     if (!self)
947         return NULL;
948 
949     self->dialect = NULL;
950     self->fields = NULL;
951     self->input_iter = NULL;
952     self->field = NULL;
953     self->field_size = 0;
954     self->line_num = 0;
955 
956     if (parse_reset(self) < 0) {
957         Py_DECREF(self);
958         return NULL;
959     }
960 
961     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
962         Py_DECREF(self);
963         return NULL;
964     }
965     self->input_iter = PyObject_GetIter(iterator);
966     if (self->input_iter == NULL) {
967         Py_DECREF(self);
968         return NULL;
969     }
970     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
971     if (self->dialect == NULL) {
972         Py_DECREF(self);
973         return NULL;
974     }
975 
976     PyObject_GC_Track(self);
977     return (PyObject *)self;
978 }
979 
980 /*
981  * WRITER
982  */
983 /* ---------------------------------------------------------------- */
984 static void
join_reset(WriterObj * self)985 join_reset(WriterObj *self)
986 {
987     self->rec_len = 0;
988     self->num_fields = 0;
989 }
990 
991 #define MEM_INCR 32768
992 
993 /* Calculate new record length or append field to record.  Return new
994  * record length.
995  */
996 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)997 join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
998                  Py_ssize_t field_len, int *quoted,
999                  int copy_phase)
1000 {
1001     DialectObj *dialect = self->dialect;
1002     int i;
1003     Py_ssize_t rec_len;
1004 
1005 #define INCLEN \
1006     do {\
1007         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1008             goto overflow; \
1009         } \
1010         rec_len++; \
1011     } while(0)
1012 
1013 #define ADDCH(c)                                \
1014     do {\
1015         if (copy_phase) \
1016             self->rec[rec_len] = c;\
1017         INCLEN;\
1018     } while(0)
1019 
1020     rec_len = self->rec_len;
1021 
1022     /* If this is not the first field we need a field separator */
1023     if (self->num_fields > 0)
1024         ADDCH(dialect->delimiter);
1025 
1026     /* Handle preceding quote */
1027     if (copy_phase && *quoted)
1028         ADDCH(dialect->quotechar);
1029 
1030     /* Copy/count field data */
1031     /* If field is null just pass over */
1032     for (i = 0; field_data && (i < field_len); i++) {
1033         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1034         int want_escape = 0;
1035 
1036         if (c == dialect->delimiter ||
1037             c == dialect->escapechar ||
1038             c == dialect->quotechar  ||
1039             PyUnicode_FindChar(
1040                 dialect->lineterminator, c, 0,
1041                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1042             if (dialect->quoting == QUOTE_NONE)
1043                 want_escape = 1;
1044             else {
1045                 if (c == dialect->quotechar) {
1046                     if (dialect->doublequote)
1047                         ADDCH(dialect->quotechar);
1048                     else
1049                         want_escape = 1;
1050                 }
1051                 if (!want_escape)
1052                     *quoted = 1;
1053             }
1054             if (want_escape) {
1055                 if (!dialect->escapechar) {
1056                     PyErr_Format(_csvstate_global->error_obj,
1057                                  "need to escape, but no escapechar set");
1058                     return -1;
1059                 }
1060                 ADDCH(dialect->escapechar);
1061             }
1062         }
1063         /* Copy field character into record buffer.
1064          */
1065         ADDCH(c);
1066     }
1067 
1068     if (*quoted) {
1069         if (copy_phase)
1070             ADDCH(dialect->quotechar);
1071         else {
1072             INCLEN; /* starting quote */
1073             INCLEN; /* ending quote */
1074         }
1075     }
1076     return rec_len;
1077 
1078   overflow:
1079     PyErr_NoMemory();
1080     return -1;
1081 #undef ADDCH
1082 #undef INCLEN
1083 }
1084 
1085 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1086 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1087 {
1088     assert(rec_len >= 0);
1089 
1090     if (rec_len > self->rec_size) {
1091         size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1092         Py_UCS4 *rec_new = self->rec;
1093         PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1094         if (rec_new == NULL) {
1095             PyErr_NoMemory();
1096             return 0;
1097         }
1098         self->rec = rec_new;
1099         self->rec_size = (Py_ssize_t)rec_size_new;
1100     }
1101     return 1;
1102 }
1103 
1104 static int
join_append(WriterObj * self,PyObject * field,int quoted)1105 join_append(WriterObj *self, PyObject *field, int quoted)
1106 {
1107     unsigned int field_kind = -1;
1108     const void *field_data = NULL;
1109     Py_ssize_t field_len = 0;
1110     Py_ssize_t rec_len;
1111 
1112     if (field != NULL) {
1113         if (PyUnicode_READY(field) == -1)
1114             return 0;
1115         field_kind = PyUnicode_KIND(field);
1116         field_data = PyUnicode_DATA(field);
1117         field_len = PyUnicode_GET_LENGTH(field);
1118     }
1119     rec_len = join_append_data(self, field_kind, field_data, field_len,
1120                                &quoted, 0);
1121     if (rec_len < 0)
1122         return 0;
1123 
1124     /* grow record buffer if necessary */
1125     if (!join_check_rec_size(self, rec_len))
1126         return 0;
1127 
1128     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1129                                      &quoted, 1);
1130     self->num_fields++;
1131 
1132     return 1;
1133 }
1134 
1135 static int
join_append_lineterminator(WriterObj * self)1136 join_append_lineterminator(WriterObj *self)
1137 {
1138     Py_ssize_t terminator_len, i;
1139     unsigned int term_kind;
1140     const void *term_data;
1141 
1142     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1143     if (terminator_len == -1)
1144         return 0;
1145 
1146     /* grow record buffer if necessary */
1147     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1148         return 0;
1149 
1150     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1151     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1152     for (i = 0; i < terminator_len; i++)
1153         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1154     self->rec_len += terminator_len;
1155 
1156     return 1;
1157 }
1158 
1159 PyDoc_STRVAR(csv_writerow_doc,
1160 "writerow(iterable)\n"
1161 "\n"
1162 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1163 "elements will be converted to string.");
1164 
1165 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1166 csv_writerow(WriterObj *self, PyObject *seq)
1167 {
1168     DialectObj *dialect = self->dialect;
1169     PyObject *iter, *field, *line, *result;
1170 
1171     iter = PyObject_GetIter(seq);
1172     if (iter == NULL) {
1173         if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1174             PyErr_Format(_csvstate_global->error_obj,
1175                          "iterable expected, not %.200s",
1176                          Py_TYPE(seq)->tp_name);
1177         }
1178         return NULL;
1179     }
1180 
1181     /* Join all fields in internal buffer.
1182      */
1183     join_reset(self);
1184     while ((field = PyIter_Next(iter))) {
1185         int append_ok;
1186         int quoted;
1187 
1188         switch (dialect->quoting) {
1189         case QUOTE_NONNUMERIC:
1190             quoted = !PyNumber_Check(field);
1191             break;
1192         case QUOTE_ALL:
1193             quoted = 1;
1194             break;
1195         default:
1196             quoted = 0;
1197             break;
1198         }
1199 
1200         if (PyUnicode_Check(field)) {
1201             append_ok = join_append(self, field, quoted);
1202             Py_DECREF(field);
1203         }
1204         else if (field == Py_None) {
1205             append_ok = join_append(self, NULL, quoted);
1206             Py_DECREF(field);
1207         }
1208         else {
1209             PyObject *str;
1210 
1211             str = PyObject_Str(field);
1212             Py_DECREF(field);
1213             if (str == NULL) {
1214                 Py_DECREF(iter);
1215                 return NULL;
1216             }
1217             append_ok = join_append(self, str, quoted);
1218             Py_DECREF(str);
1219         }
1220         if (!append_ok) {
1221             Py_DECREF(iter);
1222             return NULL;
1223         }
1224     }
1225     Py_DECREF(iter);
1226     if (PyErr_Occurred())
1227         return NULL;
1228 
1229     if (self->num_fields > 0 && self->rec_len == 0) {
1230         if (dialect->quoting == QUOTE_NONE) {
1231             PyErr_Format(_csvstate_global->error_obj,
1232                 "single empty field record must be quoted");
1233             return NULL;
1234         }
1235         self->num_fields--;
1236         if (!join_append(self, NULL, 1))
1237             return NULL;
1238     }
1239 
1240     /* Add line terminator.
1241      */
1242     if (!join_append_lineterminator(self)) {
1243         return NULL;
1244     }
1245 
1246     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1247                                      (void *) self->rec, self->rec_len);
1248     if (line == NULL) {
1249         return NULL;
1250     }
1251     result = PyObject_CallOneArg(self->write, line);
1252     Py_DECREF(line);
1253     return result;
1254 }
1255 
1256 PyDoc_STRVAR(csv_writerows_doc,
1257 "writerows(iterable of iterables)\n"
1258 "\n"
1259 "Construct and write a series of iterables to a csv file.  Non-string\n"
1260 "elements will be converted to string.");
1261 
1262 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1263 csv_writerows(WriterObj *self, PyObject *seqseq)
1264 {
1265     PyObject *row_iter, *row_obj, *result;
1266 
1267     row_iter = PyObject_GetIter(seqseq);
1268     if (row_iter == NULL) {
1269         return NULL;
1270     }
1271     while ((row_obj = PyIter_Next(row_iter))) {
1272         result = csv_writerow(self, row_obj);
1273         Py_DECREF(row_obj);
1274         if (!result) {
1275             Py_DECREF(row_iter);
1276             return NULL;
1277         }
1278         else
1279              Py_DECREF(result);
1280     }
1281     Py_DECREF(row_iter);
1282     if (PyErr_Occurred())
1283         return NULL;
1284     Py_RETURN_NONE;
1285 }
1286 
1287 static struct PyMethodDef Writer_methods[] = {
1288     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1289     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1290     { NULL, NULL }
1291 };
1292 
1293 #define W_OFF(x) offsetof(WriterObj, x)
1294 
1295 static struct PyMemberDef Writer_memberlist[] = {
1296     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1297     { NULL }
1298 };
1299 
1300 static void
Writer_dealloc(WriterObj * self)1301 Writer_dealloc(WriterObj *self)
1302 {
1303     PyObject_GC_UnTrack(self);
1304     Py_XDECREF(self->dialect);
1305     Py_XDECREF(self->write);
1306     if (self->rec != NULL)
1307         PyMem_Free(self->rec);
1308     PyObject_GC_Del(self);
1309 }
1310 
1311 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1312 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1313 {
1314     Py_VISIT(self->dialect);
1315     Py_VISIT(self->write);
1316     return 0;
1317 }
1318 
1319 static int
Writer_clear(WriterObj * self)1320 Writer_clear(WriterObj *self)
1321 {
1322     Py_CLEAR(self->dialect);
1323     Py_CLEAR(self->write);
1324     return 0;
1325 }
1326 
1327 PyDoc_STRVAR(Writer_Type_doc,
1328 "CSV writer\n"
1329 "\n"
1330 "Writer objects are responsible for generating tabular data\n"
1331 "in CSV format from sequence input.\n"
1332 );
1333 
1334 static PyTypeObject Writer_Type = {
1335     PyVarObject_HEAD_INIT(NULL, 0)
1336     "_csv.writer",                          /*tp_name*/
1337     sizeof(WriterObj),                      /*tp_basicsize*/
1338     0,                                      /*tp_itemsize*/
1339     /* methods */
1340     (destructor)Writer_dealloc,             /*tp_dealloc*/
1341     0,                                      /*tp_vectorcall_offset*/
1342     (getattrfunc)0,                         /*tp_getattr*/
1343     (setattrfunc)0,                         /*tp_setattr*/
1344     0,                                      /*tp_as_async*/
1345     (reprfunc)0,                            /*tp_repr*/
1346     0,                                      /*tp_as_number*/
1347     0,                                      /*tp_as_sequence*/
1348     0,                                      /*tp_as_mapping*/
1349     (hashfunc)0,                            /*tp_hash*/
1350     (ternaryfunc)0,                         /*tp_call*/
1351     (reprfunc)0,                            /*tp_str*/
1352     0,                                      /*tp_getattro*/
1353     0,                                      /*tp_setattro*/
1354     0,                                      /*tp_as_buffer*/
1355     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1356         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1357     Writer_Type_doc,
1358     (traverseproc)Writer_traverse,          /*tp_traverse*/
1359     (inquiry)Writer_clear,                  /*tp_clear*/
1360     0,                                      /*tp_richcompare*/
1361     0,                                      /*tp_weaklistoffset*/
1362     (getiterfunc)0,                         /*tp_iter*/
1363     (getiterfunc)0,                         /*tp_iternext*/
1364     Writer_methods,                         /*tp_methods*/
1365     Writer_memberlist,                      /*tp_members*/
1366     0,                                      /*tp_getset*/
1367 };
1368 
1369 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1370 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1371 {
1372     PyObject * output_file, * dialect = NULL;
1373     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1374     _Py_IDENTIFIER(write);
1375 
1376     if (!self)
1377         return NULL;
1378 
1379     self->dialect = NULL;
1380     self->write = NULL;
1381 
1382     self->rec = NULL;
1383     self->rec_size = 0;
1384     self->rec_len = 0;
1385     self->num_fields = 0;
1386 
1387     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1388         Py_DECREF(self);
1389         return NULL;
1390     }
1391     if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1392         Py_DECREF(self);
1393         return NULL;
1394     }
1395     if (self->write == NULL || !PyCallable_Check(self->write)) {
1396         PyErr_SetString(PyExc_TypeError,
1397                         "argument 1 must have a \"write\" method");
1398         Py_DECREF(self);
1399         return NULL;
1400     }
1401     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1402     if (self->dialect == NULL) {
1403         Py_DECREF(self);
1404         return NULL;
1405     }
1406     PyObject_GC_Track(self);
1407     return (PyObject *)self;
1408 }
1409 
1410 /*
1411  * DIALECT REGISTRY
1412  */
1413 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1414 csv_list_dialects(PyObject *module, PyObject *args)
1415 {
1416     return PyDict_Keys(_csvstate_global->dialects);
1417 }
1418 
1419 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1420 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1421 {
1422     PyObject *name_obj, *dialect_obj = NULL;
1423     PyObject *dialect;
1424 
1425     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1426         return NULL;
1427     if (!PyUnicode_Check(name_obj)) {
1428         PyErr_SetString(PyExc_TypeError,
1429                         "dialect name must be a string");
1430         return NULL;
1431     }
1432     if (PyUnicode_READY(name_obj) == -1)
1433         return NULL;
1434     dialect = _call_dialect(dialect_obj, kwargs);
1435     if (dialect == NULL)
1436         return NULL;
1437     if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1438         Py_DECREF(dialect);
1439         return NULL;
1440     }
1441     Py_DECREF(dialect);
1442     Py_RETURN_NONE;
1443 }
1444 
1445 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1446 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1447 {
1448     if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1449         if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1450             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1451         }
1452         return NULL;
1453     }
1454     Py_RETURN_NONE;
1455 }
1456 
1457 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1458 csv_get_dialect(PyObject *module, PyObject *name_obj)
1459 {
1460     return get_dialect_from_registry(name_obj);
1461 }
1462 
1463 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1464 csv_field_size_limit(PyObject *module, PyObject *args)
1465 {
1466     PyObject *new_limit = NULL;
1467     long old_limit = _csvstate_global->field_limit;
1468 
1469     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1470         return NULL;
1471     if (new_limit != NULL) {
1472         if (!PyLong_CheckExact(new_limit)) {
1473             PyErr_Format(PyExc_TypeError,
1474                          "limit must be an integer");
1475             return NULL;
1476         }
1477         _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1478         if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1479             _csvstate_global->field_limit = old_limit;
1480             return NULL;
1481         }
1482     }
1483     return PyLong_FromLong(old_limit);
1484 }
1485 
1486 /*
1487  * MODULE
1488  */
1489 
1490 PyDoc_STRVAR(csv_module_doc,
1491 "CSV parsing and writing.\n"
1492 "\n"
1493 "This module provides classes that assist in the reading and writing\n"
1494 "of Comma Separated Value (CSV) files, and implements the interface\n"
1495 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1496 "the format is not formally defined by a stable specification and\n"
1497 "is subtle enough that parsing lines of a CSV file with something\n"
1498 "like line.split(\",\") is bound to fail.  The module supports three\n"
1499 "basic APIs: reading, writing, and registration of dialects.\n"
1500 "\n"
1501 "\n"
1502 "DIALECT REGISTRATION:\n"
1503 "\n"
1504 "Readers and writers support a dialect argument, which is a convenient\n"
1505 "handle on a group of settings.  When the dialect argument is a string,\n"
1506 "it identifies one of the dialects previously registered with the module.\n"
1507 "If it is a class or instance, the attributes of the argument are used as\n"
1508 "the settings for the reader or writer:\n"
1509 "\n"
1510 "    class excel:\n"
1511 "        delimiter = ','\n"
1512 "        quotechar = '\"'\n"
1513 "        escapechar = None\n"
1514 "        doublequote = True\n"
1515 "        skipinitialspace = False\n"
1516 "        lineterminator = '\\r\\n'\n"
1517 "        quoting = QUOTE_MINIMAL\n"
1518 "\n"
1519 "SETTINGS:\n"
1520 "\n"
1521 "    * quotechar - specifies a one-character string to use as the\n"
1522 "        quoting character.  It defaults to '\"'.\n"
1523 "    * delimiter - specifies a one-character string to use as the\n"
1524 "        field separator.  It defaults to ','.\n"
1525 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1526 "        immediately follows a delimiter.  It defaults to False, which\n"
1527 "        means that whitespace immediately following a delimiter is part\n"
1528 "        of the following field.\n"
1529 "    * lineterminator -  specifies the character sequence which should\n"
1530 "        terminate rows.\n"
1531 "    * quoting - controls when quotes should be generated by the writer.\n"
1532 "        It can take on any of the following module constants:\n"
1533 "\n"
1534 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1535 "            field contains either the quotechar or the delimiter\n"
1536 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1537 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1538 "            fields which do not parse as integers or floating point\n"
1539 "            numbers.\n"
1540 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1541 "    * escapechar - specifies a one-character string used to escape\n"
1542 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1543 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1544 "        True, two consecutive quotes are interpreted as one during read,\n"
1545 "        and when writing, each quote character embedded in the data is\n"
1546 "        written as two quotes\n");
1547 
1548 PyDoc_STRVAR(csv_reader_doc,
1549 "    csv_reader = reader(iterable [, dialect='excel']\n"
1550 "                        [optional keyword args])\n"
1551 "    for row in csv_reader:\n"
1552 "        process(row)\n"
1553 "\n"
1554 "The \"iterable\" argument can be any object that returns a line\n"
1555 "of input for each iteration, such as a file object or a list.  The\n"
1556 "optional \"dialect\" parameter is discussed below.  The function\n"
1557 "also accepts optional keyword arguments which override settings\n"
1558 "provided by the dialect.\n"
1559 "\n"
1560 "The returned object is an iterator.  Each iteration returns a row\n"
1561 "of the CSV file (which can span multiple input lines).\n");
1562 
1563 PyDoc_STRVAR(csv_writer_doc,
1564 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1565 "                            [optional keyword args])\n"
1566 "    for row in sequence:\n"
1567 "        csv_writer.writerow(row)\n"
1568 "\n"
1569 "    [or]\n"
1570 "\n"
1571 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1572 "                            [optional keyword args])\n"
1573 "    csv_writer.writerows(rows)\n"
1574 "\n"
1575 "The \"fileobj\" argument can be any object that supports the file API.\n");
1576 
1577 PyDoc_STRVAR(csv_list_dialects_doc,
1578 "Return a list of all know dialect names.\n"
1579 "    names = csv.list_dialects()");
1580 
1581 PyDoc_STRVAR(csv_get_dialect_doc,
1582 "Return the dialect instance associated with name.\n"
1583 "    dialect = csv.get_dialect(name)");
1584 
1585 PyDoc_STRVAR(csv_register_dialect_doc,
1586 "Create a mapping from a string name to a dialect class.\n"
1587 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1588 
1589 PyDoc_STRVAR(csv_unregister_dialect_doc,
1590 "Delete the name/dialect mapping associated with a string name.\n"
1591 "    csv.unregister_dialect(name)");
1592 
1593 PyDoc_STRVAR(csv_field_size_limit_doc,
1594 "Sets an upper limit on parsed fields.\n"
1595 "    csv.field_size_limit([limit])\n"
1596 "\n"
1597 "Returns old limit. If limit is not given, no new limit is set and\n"
1598 "the old limit is returned");
1599 
1600 static struct PyMethodDef csv_methods[] = {
1601     { "reader", (PyCFunction)(void(*)(void))csv_reader,
1602         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1603     { "writer", (PyCFunction)(void(*)(void))csv_writer,
1604         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1605     { "list_dialects", (PyCFunction)csv_list_dialects,
1606         METH_NOARGS, csv_list_dialects_doc},
1607     { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
1608         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1609     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1610         METH_O, csv_unregister_dialect_doc},
1611     { "get_dialect", (PyCFunction)csv_get_dialect,
1612         METH_O, csv_get_dialect_doc},
1613     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1614         METH_VARARGS, csv_field_size_limit_doc},
1615     { NULL, NULL }
1616 };
1617 
1618 static struct PyModuleDef _csvmodule = {
1619     PyModuleDef_HEAD_INIT,
1620     "_csv",
1621     csv_module_doc,
1622     sizeof(_csvstate),
1623     csv_methods,
1624     NULL,
1625     _csv_traverse,
1626     _csv_clear,
1627     _csv_free
1628 };
1629 
1630 PyMODINIT_FUNC
PyInit__csv(void)1631 PyInit__csv(void)
1632 {
1633     PyObject *module;
1634     const StyleDesc *style;
1635 
1636     if (PyType_Ready(&Reader_Type) < 0)
1637         return NULL;
1638 
1639     if (PyType_Ready(&Writer_Type) < 0)
1640         return NULL;
1641 
1642     /* Create the module and add the functions */
1643     module = PyModule_Create(&_csvmodule);
1644     if (module == NULL)
1645         return NULL;
1646 
1647     /* Add version to the module. */
1648     if (PyModule_AddStringConstant(module, "__version__",
1649                                    MODULE_VERSION) == -1)
1650         return NULL;
1651 
1652     /* Set the field limit */
1653     get_csv_state(module)->field_limit = 128 * 1024;
1654     /* Do I still need to add this var to the Module Dict? */
1655 
1656     /* Add _dialects dictionary */
1657     get_csv_state(module)->dialects = PyDict_New();
1658     if (get_csv_state(module)->dialects == NULL)
1659         return NULL;
1660     Py_INCREF(get_csv_state(module)->dialects);
1661     if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects))
1662         return NULL;
1663 
1664     /* Add quote styles into dictionary */
1665     for (style = quote_styles; style->name; style++) {
1666         if (PyModule_AddIntConstant(module, style->name,
1667                                     style->style) == -1)
1668             return NULL;
1669     }
1670 
1671     if (PyModule_AddType(module, &Dialect_Type)) {
1672         return NULL;
1673     }
1674 
1675     /* Add the CSV exception object to the module. */
1676     get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1677     if (get_csv_state(module)->error_obj == NULL)
1678         return NULL;
1679     Py_INCREF(get_csv_state(module)->error_obj);
1680     PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj);
1681     return module;
1682 }
1683