• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 #define MODULE_VERSION "1.0"
12 
13 #include "Python.h"
14 #include "structmember.h"
15 #include <stdbool.h>
16 
17 
18 typedef struct {
19     PyObject *error_obj;   /* CSV exception */
20     PyObject *dialects;   /* Dialect registry */
21     long field_limit;   /* max parsed field size */
22 } _csvstate;
23 
24 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
25 
26 static int
_csv_clear(PyObject * m)27 _csv_clear(PyObject *m)
28 {
29     Py_CLEAR(_csvstate(m)->error_obj);
30     Py_CLEAR(_csvstate(m)->dialects);
31     return 0;
32 }
33 
34 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)35 _csv_traverse(PyObject *m, visitproc visit, void *arg)
36 {
37     Py_VISIT(_csvstate(m)->error_obj);
38     Py_VISIT(_csvstate(m)->dialects);
39     return 0;
40 }
41 
42 static void
_csv_free(void * m)43 _csv_free(void *m)
44 {
45    _csv_clear((PyObject *)m);
46 }
47 
48 static struct PyModuleDef _csvmodule;
49 
50 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
51 
52 typedef enum {
53     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
54     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
55     EAT_CRNL,AFTER_ESCAPED_CRNL
56 } ParserState;
57 
58 typedef enum {
59     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
60 } QuoteStyle;
61 
62 typedef struct {
63     QuoteStyle style;
64     const char *name;
65 } StyleDesc;
66 
67 static const StyleDesc quote_styles[] = {
68     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
69     { QUOTE_ALL,        "QUOTE_ALL" },
70     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
71     { QUOTE_NONE,       "QUOTE_NONE" },
72     { 0 }
73 };
74 
75 typedef struct {
76     PyObject_HEAD
77 
78     char doublequote;           /* is " represented by ""? */
79     char skipinitialspace;      /* ignore spaces following delimiter? */
80     char strict;                /* raise exception on bad CSV */
81     int quoting;                /* style of quoting to write */
82     Py_UCS4 delimiter;          /* field separator */
83     Py_UCS4 quotechar;          /* quote character */
84     Py_UCS4 escapechar;         /* escape character */
85     PyObject *lineterminator;   /* string to write between records */
86 
87 } DialectObj;
88 
89 static PyTypeObject Dialect_Type;
90 
91 typedef struct {
92     PyObject_HEAD
93 
94     PyObject *input_iter;   /* iterate over this for input lines */
95 
96     DialectObj *dialect;    /* parsing dialect */
97 
98     PyObject *fields;           /* field list for current record */
99     ParserState state;          /* current CSV parse state */
100     Py_UCS4 *field;             /* temporary buffer */
101     Py_ssize_t field_size;      /* size of allocated buffer */
102     Py_ssize_t field_len;       /* length of current field */
103     int numeric_field;          /* treat field as numeric */
104     unsigned long line_num;     /* Source-file line number */
105 } ReaderObj;
106 
107 static PyTypeObject Reader_Type;
108 
109 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
110 
111 typedef struct {
112     PyObject_HEAD
113 
114     PyObject *write;    /* write output lines to this file */
115 
116     DialectObj *dialect;    /* parsing dialect */
117 
118     Py_UCS4 *rec;            /* buffer for parser.join */
119     Py_ssize_t rec_size;        /* size of allocated record */
120     Py_ssize_t rec_len;         /* length of record */
121     int num_fields;             /* number of fields in record */
122 } WriterObj;
123 
124 static PyTypeObject Writer_Type;
125 
126 /*
127  * DIALECT class
128  */
129 
130 static PyObject *
get_dialect_from_registry(PyObject * name_obj)131 get_dialect_from_registry(PyObject * name_obj)
132 {
133     PyObject *dialect_obj;
134 
135     dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
136     if (dialect_obj == NULL) {
137         if (!PyErr_Occurred())
138             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
139     }
140     else
141         Py_INCREF(dialect_obj);
142     return dialect_obj;
143 }
144 
145 static PyObject *
get_string(PyObject * str)146 get_string(PyObject *str)
147 {
148     Py_XINCREF(str);
149     return str;
150 }
151 
152 static PyObject *
get_nullchar_as_None(Py_UCS4 c)153 get_nullchar_as_None(Py_UCS4 c)
154 {
155     if (c == '\0') {
156         Py_RETURN_NONE;
157     }
158     else
159         return PyUnicode_FromOrdinal(c);
160 }
161 
162 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))163 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
164 {
165     return get_string(self->lineterminator);
166 }
167 
168 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))169 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
170 {
171     return get_nullchar_as_None(self->delimiter);
172 }
173 
174 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))175 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
176 {
177     return get_nullchar_as_None(self->escapechar);
178 }
179 
180 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))181 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
182 {
183     return get_nullchar_as_None(self->quotechar);
184 }
185 
186 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))187 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
188 {
189     return PyLong_FromLong(self->quoting);
190 }
191 
192 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)193 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
194 {
195     if (src == NULL)
196         *target = dflt;
197     else {
198         int b = PyObject_IsTrue(src);
199         if (b < 0)
200             return -1;
201         *target = (char)b;
202     }
203     return 0;
204 }
205 
206 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)207 _set_int(const char *name, int *target, PyObject *src, int dflt)
208 {
209     if (src == NULL)
210         *target = dflt;
211     else {
212         int value;
213         if (!PyLong_CheckExact(src)) {
214             PyErr_Format(PyExc_TypeError,
215                          "\"%s\" must be an integer", name);
216             return -1;
217         }
218         value = _PyLong_AsInt(src);
219         if (value == -1 && PyErr_Occurred()) {
220             return -1;
221         }
222         *target = value;
223     }
224     return 0;
225 }
226 
227 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)228 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
229 {
230     if (src == NULL)
231         *target = dflt;
232     else {
233         *target = '\0';
234         if (src != Py_None) {
235             Py_ssize_t len;
236             if (!PyUnicode_Check(src)) {
237                 PyErr_Format(PyExc_TypeError,
238                     "\"%s\" must be string, not %.200s", name,
239                     src->ob_type->tp_name);
240                 return -1;
241             }
242             len = PyUnicode_GetLength(src);
243             if (len > 1) {
244                 PyErr_Format(PyExc_TypeError,
245                     "\"%s\" must be a 1-character string",
246                     name);
247                 return -1;
248             }
249             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
250             if (len > 0)
251                 *target = PyUnicode_READ_CHAR(src, 0);
252         }
253     }
254     return 0;
255 }
256 
257 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)258 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259 {
260     if (src == NULL)
261         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262     else {
263         if (src == Py_None)
264             *target = NULL;
265         else if (!PyUnicode_Check(src)) {
266             PyErr_Format(PyExc_TypeError,
267                          "\"%s\" must be a string", name);
268             return -1;
269         }
270         else {
271             if (PyUnicode_READY(src) == -1)
272                 return -1;
273             Py_INCREF(src);
274             Py_XSETREF(*target, src);
275         }
276     }
277     return 0;
278 }
279 
280 static int
dialect_check_quoting(int quoting)281 dialect_check_quoting(int quoting)
282 {
283     const StyleDesc *qs;
284 
285     for (qs = quote_styles; qs->name; qs++) {
286         if ((int)qs->style == quoting)
287             return 0;
288     }
289     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290     return -1;
291 }
292 
293 #define D_OFF(x) offsetof(DialectObj, x)
294 
295 static struct PyMemberDef Dialect_memberlist[] = {
296     { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
297     { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
298     { "strict",             T_BOOL, D_OFF(strict), READONLY },
299     { NULL }
300 };
301 
302 static PyGetSetDef Dialect_getsetlist[] = {
303     { "delimiter",          (getter)Dialect_get_delimiter},
304     { "escapechar",             (getter)Dialect_get_escapechar},
305     { "lineterminator",         (getter)Dialect_get_lineterminator},
306     { "quotechar",              (getter)Dialect_get_quotechar},
307     { "quoting",                (getter)Dialect_get_quoting},
308     {NULL},
309 };
310 
311 static void
Dialect_dealloc(DialectObj * self)312 Dialect_dealloc(DialectObj *self)
313 {
314     Py_XDECREF(self->lineterminator);
315     Py_TYPE(self)->tp_free((PyObject *)self);
316 }
317 
318 static char *dialect_kws[] = {
319     "dialect",
320     "delimiter",
321     "doublequote",
322     "escapechar",
323     "lineterminator",
324     "quotechar",
325     "quoting",
326     "skipinitialspace",
327     "strict",
328     NULL
329 };
330 
331 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)332 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
333 {
334     DialectObj *self;
335     PyObject *ret = NULL;
336     PyObject *dialect = NULL;
337     PyObject *delimiter = NULL;
338     PyObject *doublequote = NULL;
339     PyObject *escapechar = NULL;
340     PyObject *lineterminator = NULL;
341     PyObject *quotechar = NULL;
342     PyObject *quoting = NULL;
343     PyObject *skipinitialspace = NULL;
344     PyObject *strict = NULL;
345 
346     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347                                      "|OOOOOOOOO", dialect_kws,
348                                      &dialect,
349                                      &delimiter,
350                                      &doublequote,
351                                      &escapechar,
352                                      &lineterminator,
353                                      &quotechar,
354                                      &quoting,
355                                      &skipinitialspace,
356                                      &strict))
357         return NULL;
358 
359     if (dialect != NULL) {
360         if (PyUnicode_Check(dialect)) {
361             dialect = get_dialect_from_registry(dialect);
362             if (dialect == NULL)
363                 return NULL;
364         }
365         else
366             Py_INCREF(dialect);
367         /* Can we reuse this instance? */
368         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
369             delimiter == NULL &&
370             doublequote == NULL &&
371             escapechar == NULL &&
372             lineterminator == NULL &&
373             quotechar == NULL &&
374             quoting == NULL &&
375             skipinitialspace == NULL &&
376             strict == NULL)
377             return dialect;
378     }
379 
380     self = (DialectObj *)type->tp_alloc(type, 0);
381     if (self == NULL) {
382         Py_XDECREF(dialect);
383         return NULL;
384     }
385     self->lineterminator = NULL;
386 
387     Py_XINCREF(delimiter);
388     Py_XINCREF(doublequote);
389     Py_XINCREF(escapechar);
390     Py_XINCREF(lineterminator);
391     Py_XINCREF(quotechar);
392     Py_XINCREF(quoting);
393     Py_XINCREF(skipinitialspace);
394     Py_XINCREF(strict);
395     if (dialect != NULL) {
396 #define DIALECT_GETATTR(v, n) \
397         if (v == NULL) \
398             v = PyObject_GetAttrString(dialect, n)
399         DIALECT_GETATTR(delimiter, "delimiter");
400         DIALECT_GETATTR(doublequote, "doublequote");
401         DIALECT_GETATTR(escapechar, "escapechar");
402         DIALECT_GETATTR(lineterminator, "lineterminator");
403         DIALECT_GETATTR(quotechar, "quotechar");
404         DIALECT_GETATTR(quoting, "quoting");
405         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406         DIALECT_GETATTR(strict, "strict");
407         PyErr_Clear();
408     }
409 
410     /* check types and convert to C values */
411 #define DIASET(meth, name, target, src, dflt) \
412     if (meth(name, target, src, dflt)) \
413         goto err
414     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
415     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
416     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
420     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
421     DIASET(_set_bool, "strict", &self->strict, strict, false);
422 
423     /* validate options */
424     if (dialect_check_quoting(self->quoting))
425         goto err;
426     if (self->delimiter == 0) {
427         PyErr_SetString(PyExc_TypeError,
428                         "\"delimiter\" must be a 1-character string");
429         goto err;
430     }
431     if (quotechar == Py_None && quoting == NULL)
432         self->quoting = QUOTE_NONE;
433     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434         PyErr_SetString(PyExc_TypeError,
435                         "quotechar must be set if quoting enabled");
436         goto err;
437     }
438     if (self->lineterminator == 0) {
439         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440         goto err;
441     }
442 
443     ret = (PyObject *)self;
444     Py_INCREF(self);
445 err:
446     Py_XDECREF(self);
447     Py_XDECREF(dialect);
448     Py_XDECREF(delimiter);
449     Py_XDECREF(doublequote);
450     Py_XDECREF(escapechar);
451     Py_XDECREF(lineterminator);
452     Py_XDECREF(quotechar);
453     Py_XDECREF(quoting);
454     Py_XDECREF(skipinitialspace);
455     Py_XDECREF(strict);
456     return ret;
457 }
458 
459 
460 PyDoc_STRVAR(Dialect_Type_doc,
461 "CSV dialect\n"
462 "\n"
463 "The Dialect type records CSV parsing and generation options.\n");
464 
465 static PyTypeObject Dialect_Type = {
466     PyVarObject_HEAD_INIT(NULL, 0)
467     "_csv.Dialect",                         /* tp_name */
468     sizeof(DialectObj),                     /* tp_basicsize */
469     0,                                      /* tp_itemsize */
470     /*  methods  */
471     (destructor)Dialect_dealloc,            /* tp_dealloc */
472     0,                                      /* tp_vectorcall_offset */
473     (getattrfunc)0,                         /* tp_getattr */
474     (setattrfunc)0,                         /* tp_setattr */
475     0,                                      /* tp_as_async */
476     (reprfunc)0,                            /* tp_repr */
477     0,                                      /* tp_as_number */
478     0,                                      /* tp_as_sequence */
479     0,                                      /* tp_as_mapping */
480     (hashfunc)0,                            /* tp_hash */
481     (ternaryfunc)0,                         /* tp_call */
482     (reprfunc)0,                                /* tp_str */
483     0,                                      /* tp_getattro */
484     0,                                      /* tp_setattro */
485     0,                                      /* tp_as_buffer */
486     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487     Dialect_Type_doc,                       /* tp_doc */
488     0,                                      /* tp_traverse */
489     0,                                      /* tp_clear */
490     0,                                      /* tp_richcompare */
491     0,                                      /* tp_weaklistoffset */
492     0,                                      /* tp_iter */
493     0,                                      /* tp_iternext */
494     0,                                          /* tp_methods */
495     Dialect_memberlist,                     /* tp_members */
496     Dialect_getsetlist,                     /* tp_getset */
497     0,                                          /* tp_base */
498     0,                                          /* tp_dict */
499     0,                                          /* tp_descr_get */
500     0,                                          /* tp_descr_set */
501     0,                                          /* tp_dictoffset */
502     0,                                          /* tp_init */
503     0,                                          /* tp_alloc */
504     dialect_new,                                /* tp_new */
505     0,                                          /* tp_free */
506 };
507 
508 /*
509  * Return an instance of the dialect type, given a Python instance or kwarg
510  * description of the dialect
511  */
512 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)513 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514 {
515     PyObject *type = (PyObject *)&Dialect_Type;
516     if (dialect_inst) {
517         return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
518     }
519     else {
520         return _PyObject_FastCallDict(type, NULL, 0, kwargs);
521     }
522 }
523 
524 /*
525  * READER
526  */
527 static int
parse_save_field(ReaderObj * self)528 parse_save_field(ReaderObj *self)
529 {
530     PyObject *field;
531 
532     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
533                                       (void *) self->field, self->field_len);
534     if (field == NULL)
535         return -1;
536     self->field_len = 0;
537     if (self->numeric_field) {
538         PyObject *tmp;
539 
540         self->numeric_field = 0;
541         tmp = PyNumber_Float(field);
542         Py_DECREF(field);
543         if (tmp == NULL)
544             return -1;
545         field = tmp;
546     }
547     if (PyList_Append(self->fields, field) < 0) {
548         Py_DECREF(field);
549         return -1;
550     }
551     Py_DECREF(field);
552     return 0;
553 }
554 
555 static int
parse_grow_buff(ReaderObj * self)556 parse_grow_buff(ReaderObj *self)
557 {
558     assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
559 
560     Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
561     Py_UCS4 *field_new = self->field;
562     PyMem_Resize(field_new, Py_UCS4, field_size_new);
563     if (field_new == NULL) {
564         PyErr_NoMemory();
565         return 0;
566     }
567     self->field = field_new;
568     self->field_size = field_size_new;
569     return 1;
570 }
571 
572 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)573 parse_add_char(ReaderObj *self, Py_UCS4 c)
574 {
575     if (self->field_len >= _csvstate_global->field_limit) {
576         PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
577                      _csvstate_global->field_limit);
578         return -1;
579     }
580     if (self->field_len == self->field_size && !parse_grow_buff(self))
581         return -1;
582     self->field[self->field_len++] = c;
583     return 0;
584 }
585 
586 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)587 parse_process_char(ReaderObj *self, Py_UCS4 c)
588 {
589     DialectObj *dialect = self->dialect;
590 
591     switch (self->state) {
592     case START_RECORD:
593         /* start of record */
594         if (c == '\0')
595             /* empty line - return [] */
596             break;
597         else if (c == '\n' || c == '\r') {
598             self->state = EAT_CRNL;
599             break;
600         }
601         /* normal character - handle as START_FIELD */
602         self->state = START_FIELD;
603         /* fallthru */
604     case START_FIELD:
605         /* expecting field */
606         if (c == '\n' || c == '\r' || c == '\0') {
607             /* save empty field - return [fields] */
608             if (parse_save_field(self) < 0)
609                 return -1;
610             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
611         }
612         else if (c == dialect->quotechar &&
613                  dialect->quoting != QUOTE_NONE) {
614             /* start quoted field */
615             self->state = IN_QUOTED_FIELD;
616         }
617         else if (c == dialect->escapechar) {
618             /* possible escaped character */
619             self->state = ESCAPED_CHAR;
620         }
621         else if (c == ' ' && dialect->skipinitialspace)
622             /* ignore space at start of field */
623             ;
624         else if (c == dialect->delimiter) {
625             /* save empty field */
626             if (parse_save_field(self) < 0)
627                 return -1;
628         }
629         else {
630             /* begin new unquoted field */
631             if (dialect->quoting == QUOTE_NONNUMERIC)
632                 self->numeric_field = 1;
633             if (parse_add_char(self, c) < 0)
634                 return -1;
635             self->state = IN_FIELD;
636         }
637         break;
638 
639     case ESCAPED_CHAR:
640         if (c == '\n' || c=='\r') {
641             if (parse_add_char(self, c) < 0)
642                 return -1;
643             self->state = AFTER_ESCAPED_CRNL;
644             break;
645         }
646         if (c == '\0')
647             c = '\n';
648         if (parse_add_char(self, c) < 0)
649             return -1;
650         self->state = IN_FIELD;
651         break;
652 
653     case AFTER_ESCAPED_CRNL:
654         if (c == '\0')
655             break;
656         /*fallthru*/
657 
658     case IN_FIELD:
659         /* in unquoted field */
660         if (c == '\n' || c == '\r' || c == '\0') {
661             /* end of line - return [fields] */
662             if (parse_save_field(self) < 0)
663                 return -1;
664             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
665         }
666         else if (c == dialect->escapechar) {
667             /* possible escaped character */
668             self->state = ESCAPED_CHAR;
669         }
670         else if (c == dialect->delimiter) {
671             /* save field - wait for new field */
672             if (parse_save_field(self) < 0)
673                 return -1;
674             self->state = START_FIELD;
675         }
676         else {
677             /* normal character - save in field */
678             if (parse_add_char(self, c) < 0)
679                 return -1;
680         }
681         break;
682 
683     case IN_QUOTED_FIELD:
684         /* in quoted field */
685         if (c == '\0')
686             ;
687         else if (c == dialect->escapechar) {
688             /* Possible escape character */
689             self->state = ESCAPE_IN_QUOTED_FIELD;
690         }
691         else if (c == dialect->quotechar &&
692                  dialect->quoting != QUOTE_NONE) {
693             if (dialect->doublequote) {
694                 /* doublequote; " represented by "" */
695                 self->state = QUOTE_IN_QUOTED_FIELD;
696             }
697             else {
698                 /* end of quote part of field */
699                 self->state = IN_FIELD;
700             }
701         }
702         else {
703             /* normal character - save in field */
704             if (parse_add_char(self, c) < 0)
705                 return -1;
706         }
707         break;
708 
709     case ESCAPE_IN_QUOTED_FIELD:
710         if (c == '\0')
711             c = '\n';
712         if (parse_add_char(self, c) < 0)
713             return -1;
714         self->state = IN_QUOTED_FIELD;
715         break;
716 
717     case QUOTE_IN_QUOTED_FIELD:
718         /* doublequote - seen a quote in a quoted field */
719         if (dialect->quoting != QUOTE_NONE &&
720             c == dialect->quotechar) {
721             /* save "" as " */
722             if (parse_add_char(self, c) < 0)
723                 return -1;
724             self->state = IN_QUOTED_FIELD;
725         }
726         else if (c == dialect->delimiter) {
727             /* save field - wait for new field */
728             if (parse_save_field(self) < 0)
729                 return -1;
730             self->state = START_FIELD;
731         }
732         else if (c == '\n' || c == '\r' || c == '\0') {
733             /* end of line - return [fields] */
734             if (parse_save_field(self) < 0)
735                 return -1;
736             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
737         }
738         else if (!dialect->strict) {
739             if (parse_add_char(self, c) < 0)
740                 return -1;
741             self->state = IN_FIELD;
742         }
743         else {
744             /* illegal */
745             PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
746                             dialect->delimiter,
747                             dialect->quotechar);
748             return -1;
749         }
750         break;
751 
752     case EAT_CRNL:
753         if (c == '\n' || c == '\r')
754             ;
755         else if (c == '\0')
756             self->state = START_RECORD;
757         else {
758             PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
759             return -1;
760         }
761         break;
762 
763     }
764     return 0;
765 }
766 
767 static int
parse_reset(ReaderObj * self)768 parse_reset(ReaderObj *self)
769 {
770     Py_XSETREF(self->fields, PyList_New(0));
771     if (self->fields == NULL)
772         return -1;
773     self->field_len = 0;
774     self->state = START_RECORD;
775     self->numeric_field = 0;
776     return 0;
777 }
778 
779 static PyObject *
Reader_iternext(ReaderObj * self)780 Reader_iternext(ReaderObj *self)
781 {
782     PyObject *fields = NULL;
783     Py_UCS4 c;
784     Py_ssize_t pos, linelen;
785     unsigned int kind;
786     void *data;
787     PyObject *lineobj;
788 
789     if (parse_reset(self) < 0)
790         return NULL;
791     do {
792         lineobj = PyIter_Next(self->input_iter);
793         if (lineobj == NULL) {
794             /* End of input OR exception */
795             if (!PyErr_Occurred() && (self->field_len != 0 ||
796                                       self->state == IN_QUOTED_FIELD)) {
797                 if (self->dialect->strict)
798                     PyErr_SetString(_csvstate_global->error_obj,
799                                     "unexpected end of data");
800                 else if (parse_save_field(self) >= 0)
801                     break;
802             }
803             return NULL;
804         }
805         if (!PyUnicode_Check(lineobj)) {
806             PyErr_Format(_csvstate_global->error_obj,
807                          "iterator should return strings, "
808                          "not %.200s "
809                          "(did you open the file in text mode?)",
810                          lineobj->ob_type->tp_name
811                 );
812             Py_DECREF(lineobj);
813             return NULL;
814         }
815         if (PyUnicode_READY(lineobj) == -1) {
816             Py_DECREF(lineobj);
817             return NULL;
818         }
819         ++self->line_num;
820         kind = PyUnicode_KIND(lineobj);
821         data = PyUnicode_DATA(lineobj);
822         pos = 0;
823         linelen = PyUnicode_GET_LENGTH(lineobj);
824         while (linelen--) {
825             c = PyUnicode_READ(kind, data, pos);
826             if (c == '\0') {
827                 Py_DECREF(lineobj);
828                 PyErr_Format(_csvstate_global->error_obj,
829                              "line contains NUL");
830                 goto err;
831             }
832             if (parse_process_char(self, c) < 0) {
833                 Py_DECREF(lineobj);
834                 goto err;
835             }
836             pos++;
837         }
838         Py_DECREF(lineobj);
839         if (parse_process_char(self, 0) < 0)
840             goto err;
841     } while (self->state != START_RECORD);
842 
843     fields = self->fields;
844     self->fields = NULL;
845 err:
846     return fields;
847 }
848 
849 static void
Reader_dealloc(ReaderObj * self)850 Reader_dealloc(ReaderObj *self)
851 {
852     PyObject_GC_UnTrack(self);
853     Py_XDECREF(self->dialect);
854     Py_XDECREF(self->input_iter);
855     Py_XDECREF(self->fields);
856     if (self->field != NULL)
857         PyMem_Free(self->field);
858     PyObject_GC_Del(self);
859 }
860 
861 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)862 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
863 {
864     Py_VISIT(self->dialect);
865     Py_VISIT(self->input_iter);
866     Py_VISIT(self->fields);
867     return 0;
868 }
869 
870 static int
Reader_clear(ReaderObj * self)871 Reader_clear(ReaderObj *self)
872 {
873     Py_CLEAR(self->dialect);
874     Py_CLEAR(self->input_iter);
875     Py_CLEAR(self->fields);
876     return 0;
877 }
878 
879 PyDoc_STRVAR(Reader_Type_doc,
880 "CSV reader\n"
881 "\n"
882 "Reader objects are responsible for reading and parsing tabular data\n"
883 "in CSV format.\n"
884 );
885 
886 static struct PyMethodDef Reader_methods[] = {
887     { NULL, NULL }
888 };
889 #define R_OFF(x) offsetof(ReaderObj, x)
890 
891 static struct PyMemberDef Reader_memberlist[] = {
892     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
893     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
894     { NULL }
895 };
896 
897 
898 static PyTypeObject Reader_Type = {
899     PyVarObject_HEAD_INIT(NULL, 0)
900     "_csv.reader",                          /*tp_name*/
901     sizeof(ReaderObj),                      /*tp_basicsize*/
902     0,                                      /*tp_itemsize*/
903     /* methods */
904     (destructor)Reader_dealloc,             /*tp_dealloc*/
905     0,                                      /*tp_vectorcall_offset*/
906     (getattrfunc)0,                         /*tp_getattr*/
907     (setattrfunc)0,                         /*tp_setattr*/
908     0,                                      /*tp_as_async*/
909     (reprfunc)0,                            /*tp_repr*/
910     0,                                      /*tp_as_number*/
911     0,                                      /*tp_as_sequence*/
912     0,                                      /*tp_as_mapping*/
913     (hashfunc)0,                            /*tp_hash*/
914     (ternaryfunc)0,                         /*tp_call*/
915     (reprfunc)0,                                /*tp_str*/
916     0,                                      /*tp_getattro*/
917     0,                                      /*tp_setattro*/
918     0,                                      /*tp_as_buffer*/
919     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
920         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
921     Reader_Type_doc,                        /*tp_doc*/
922     (traverseproc)Reader_traverse,          /*tp_traverse*/
923     (inquiry)Reader_clear,                  /*tp_clear*/
924     0,                                      /*tp_richcompare*/
925     0,                                      /*tp_weaklistoffset*/
926     PyObject_SelfIter,                          /*tp_iter*/
927     (getiterfunc)Reader_iternext,           /*tp_iternext*/
928     Reader_methods,                         /*tp_methods*/
929     Reader_memberlist,                      /*tp_members*/
930     0,                                      /*tp_getset*/
931 
932 };
933 
934 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)935 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
936 {
937     PyObject * iterator, * dialect = NULL;
938     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
939 
940     if (!self)
941         return NULL;
942 
943     self->dialect = NULL;
944     self->fields = NULL;
945     self->input_iter = NULL;
946     self->field = NULL;
947     self->field_size = 0;
948     self->line_num = 0;
949 
950     if (parse_reset(self) < 0) {
951         Py_DECREF(self);
952         return NULL;
953     }
954 
955     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
956         Py_DECREF(self);
957         return NULL;
958     }
959     self->input_iter = PyObject_GetIter(iterator);
960     if (self->input_iter == NULL) {
961         PyErr_SetString(PyExc_TypeError,
962                         "argument 1 must be an iterator");
963         Py_DECREF(self);
964         return NULL;
965     }
966     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
967     if (self->dialect == NULL) {
968         Py_DECREF(self);
969         return NULL;
970     }
971 
972     PyObject_GC_Track(self);
973     return (PyObject *)self;
974 }
975 
976 /*
977  * WRITER
978  */
979 /* ---------------------------------------------------------------- */
980 static void
join_reset(WriterObj * self)981 join_reset(WriterObj *self)
982 {
983     self->rec_len = 0;
984     self->num_fields = 0;
985 }
986 
987 #define MEM_INCR 32768
988 
989 /* Calculate new record length or append field to record.  Return new
990  * record length.
991  */
992 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)993 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
994                  Py_ssize_t field_len, int *quoted,
995                  int copy_phase)
996 {
997     DialectObj *dialect = self->dialect;
998     int i;
999     Py_ssize_t rec_len;
1000 
1001 #define INCLEN \
1002     do {\
1003         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1004             goto overflow; \
1005         } \
1006         rec_len++; \
1007     } while(0)
1008 
1009 #define ADDCH(c)                                \
1010     do {\
1011         if (copy_phase) \
1012             self->rec[rec_len] = c;\
1013         INCLEN;\
1014     } while(0)
1015 
1016     rec_len = self->rec_len;
1017 
1018     /* If this is not the first field we need a field separator */
1019     if (self->num_fields > 0)
1020         ADDCH(dialect->delimiter);
1021 
1022     /* Handle preceding quote */
1023     if (copy_phase && *quoted)
1024         ADDCH(dialect->quotechar);
1025 
1026     /* Copy/count field data */
1027     /* If field is null just pass over */
1028     for (i = 0; field_data && (i < field_len); i++) {
1029         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1030         int want_escape = 0;
1031 
1032         if (c == dialect->delimiter ||
1033             c == dialect->escapechar ||
1034             c == dialect->quotechar  ||
1035             PyUnicode_FindChar(
1036                 dialect->lineterminator, c, 0,
1037                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1038             if (dialect->quoting == QUOTE_NONE)
1039                 want_escape = 1;
1040             else {
1041                 if (c == dialect->quotechar) {
1042                     if (dialect->doublequote)
1043                         ADDCH(dialect->quotechar);
1044                     else
1045                         want_escape = 1;
1046                 }
1047                 if (!want_escape)
1048                     *quoted = 1;
1049             }
1050             if (want_escape) {
1051                 if (!dialect->escapechar) {
1052                     PyErr_Format(_csvstate_global->error_obj,
1053                                  "need to escape, but no escapechar set");
1054                     return -1;
1055                 }
1056                 ADDCH(dialect->escapechar);
1057             }
1058         }
1059         /* Copy field character into record buffer.
1060          */
1061         ADDCH(c);
1062     }
1063 
1064     if (*quoted) {
1065         if (copy_phase)
1066             ADDCH(dialect->quotechar);
1067         else {
1068             INCLEN; /* starting quote */
1069             INCLEN; /* ending quote */
1070         }
1071     }
1072     return rec_len;
1073 
1074   overflow:
1075     PyErr_NoMemory();
1076     return -1;
1077 #undef ADDCH
1078 #undef INCLEN
1079 }
1080 
1081 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1082 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1083 {
1084     assert(rec_len >= 0);
1085 
1086     if (rec_len > self->rec_size) {
1087         size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1088         Py_UCS4 *rec_new = self->rec;
1089         PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1090         if (rec_new == NULL) {
1091             PyErr_NoMemory();
1092             return 0;
1093         }
1094         self->rec = rec_new;
1095         self->rec_size = (Py_ssize_t)rec_size_new;
1096     }
1097     return 1;
1098 }
1099 
1100 static int
join_append(WriterObj * self,PyObject * field,int quoted)1101 join_append(WriterObj *self, PyObject *field, int quoted)
1102 {
1103     unsigned int field_kind = -1;
1104     void *field_data = NULL;
1105     Py_ssize_t field_len = 0;
1106     Py_ssize_t rec_len;
1107 
1108     if (field != NULL) {
1109         if (PyUnicode_READY(field) == -1)
1110             return 0;
1111         field_kind = PyUnicode_KIND(field);
1112         field_data = PyUnicode_DATA(field);
1113         field_len = PyUnicode_GET_LENGTH(field);
1114     }
1115     rec_len = join_append_data(self, field_kind, field_data, field_len,
1116                                &quoted, 0);
1117     if (rec_len < 0)
1118         return 0;
1119 
1120     /* grow record buffer if necessary */
1121     if (!join_check_rec_size(self, rec_len))
1122         return 0;
1123 
1124     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1125                                      &quoted, 1);
1126     self->num_fields++;
1127 
1128     return 1;
1129 }
1130 
1131 static int
join_append_lineterminator(WriterObj * self)1132 join_append_lineterminator(WriterObj *self)
1133 {
1134     Py_ssize_t terminator_len, i;
1135     unsigned int term_kind;
1136     void *term_data;
1137 
1138     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1139     if (terminator_len == -1)
1140         return 0;
1141 
1142     /* grow record buffer if necessary */
1143     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1144         return 0;
1145 
1146     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1147     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1148     for (i = 0; i < terminator_len; i++)
1149         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1150     self->rec_len += terminator_len;
1151 
1152     return 1;
1153 }
1154 
1155 PyDoc_STRVAR(csv_writerow_doc,
1156 "writerow(iterable)\n"
1157 "\n"
1158 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1159 "elements will be converted to string.");
1160 
1161 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1162 csv_writerow(WriterObj *self, PyObject *seq)
1163 {
1164     DialectObj *dialect = self->dialect;
1165     PyObject *iter, *field, *line, *result;
1166 
1167     iter = PyObject_GetIter(seq);
1168     if (iter == NULL)
1169         return PyErr_Format(_csvstate_global->error_obj,
1170                             "iterable expected, not %.200s",
1171                             seq->ob_type->tp_name);
1172 
1173     /* Join all fields in internal buffer.
1174      */
1175     join_reset(self);
1176     while ((field = PyIter_Next(iter))) {
1177         int append_ok;
1178         int quoted;
1179 
1180         switch (dialect->quoting) {
1181         case QUOTE_NONNUMERIC:
1182             quoted = !PyNumber_Check(field);
1183             break;
1184         case QUOTE_ALL:
1185             quoted = 1;
1186             break;
1187         default:
1188             quoted = 0;
1189             break;
1190         }
1191 
1192         if (PyUnicode_Check(field)) {
1193             append_ok = join_append(self, field, quoted);
1194             Py_DECREF(field);
1195         }
1196         else if (field == Py_None) {
1197             append_ok = join_append(self, NULL, quoted);
1198             Py_DECREF(field);
1199         }
1200         else {
1201             PyObject *str;
1202 
1203             str = PyObject_Str(field);
1204             Py_DECREF(field);
1205             if (str == NULL) {
1206                 Py_DECREF(iter);
1207                 return NULL;
1208             }
1209             append_ok = join_append(self, str, quoted);
1210             Py_DECREF(str);
1211         }
1212         if (!append_ok) {
1213             Py_DECREF(iter);
1214             return NULL;
1215         }
1216     }
1217     Py_DECREF(iter);
1218     if (PyErr_Occurred())
1219         return NULL;
1220 
1221     if (self->num_fields > 0 && self->rec_len == 0) {
1222         if (dialect->quoting == QUOTE_NONE) {
1223             PyErr_Format(_csvstate_global->error_obj,
1224                 "single empty field record must be quoted");
1225             return NULL;
1226         }
1227         self->num_fields--;
1228         if (!join_append(self, NULL, 1))
1229             return NULL;
1230     }
1231 
1232     /* Add line terminator.
1233      */
1234     if (!join_append_lineterminator(self)) {
1235         return NULL;
1236     }
1237 
1238     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1239                                      (void *) self->rec, self->rec_len);
1240     if (line == NULL) {
1241         return NULL;
1242     }
1243     result = PyObject_CallFunctionObjArgs(self->write, line, NULL);
1244     Py_DECREF(line);
1245     return result;
1246 }
1247 
1248 PyDoc_STRVAR(csv_writerows_doc,
1249 "writerows(iterable of iterables)\n"
1250 "\n"
1251 "Construct and write a series of iterables to a csv file.  Non-string\n"
1252 "elements will be converted to string.");
1253 
1254 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1255 csv_writerows(WriterObj *self, PyObject *seqseq)
1256 {
1257     PyObject *row_iter, *row_obj, *result;
1258 
1259     row_iter = PyObject_GetIter(seqseq);
1260     if (row_iter == NULL) {
1261         PyErr_SetString(PyExc_TypeError,
1262                         "writerows() argument must be iterable");
1263         return NULL;
1264     }
1265     while ((row_obj = PyIter_Next(row_iter))) {
1266         result = csv_writerow(self, row_obj);
1267         Py_DECREF(row_obj);
1268         if (!result) {
1269             Py_DECREF(row_iter);
1270             return NULL;
1271         }
1272         else
1273              Py_DECREF(result);
1274     }
1275     Py_DECREF(row_iter);
1276     if (PyErr_Occurred())
1277         return NULL;
1278     Py_RETURN_NONE;
1279 }
1280 
1281 static struct PyMethodDef Writer_methods[] = {
1282     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1283     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1284     { NULL, NULL }
1285 };
1286 
1287 #define W_OFF(x) offsetof(WriterObj, x)
1288 
1289 static struct PyMemberDef Writer_memberlist[] = {
1290     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1291     { NULL }
1292 };
1293 
1294 static void
Writer_dealloc(WriterObj * self)1295 Writer_dealloc(WriterObj *self)
1296 {
1297     PyObject_GC_UnTrack(self);
1298     Py_XDECREF(self->dialect);
1299     Py_XDECREF(self->write);
1300     if (self->rec != NULL)
1301         PyMem_Free(self->rec);
1302     PyObject_GC_Del(self);
1303 }
1304 
1305 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1306 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1307 {
1308     Py_VISIT(self->dialect);
1309     Py_VISIT(self->write);
1310     return 0;
1311 }
1312 
1313 static int
Writer_clear(WriterObj * self)1314 Writer_clear(WriterObj *self)
1315 {
1316     Py_CLEAR(self->dialect);
1317     Py_CLEAR(self->write);
1318     return 0;
1319 }
1320 
1321 PyDoc_STRVAR(Writer_Type_doc,
1322 "CSV writer\n"
1323 "\n"
1324 "Writer objects are responsible for generating tabular data\n"
1325 "in CSV format from sequence input.\n"
1326 );
1327 
1328 static PyTypeObject Writer_Type = {
1329     PyVarObject_HEAD_INIT(NULL, 0)
1330     "_csv.writer",                          /*tp_name*/
1331     sizeof(WriterObj),                      /*tp_basicsize*/
1332     0,                                      /*tp_itemsize*/
1333     /* methods */
1334     (destructor)Writer_dealloc,             /*tp_dealloc*/
1335     0,                                      /*tp_vectorcall_offset*/
1336     (getattrfunc)0,                         /*tp_getattr*/
1337     (setattrfunc)0,                         /*tp_setattr*/
1338     0,                                      /*tp_as_async*/
1339     (reprfunc)0,                            /*tp_repr*/
1340     0,                                      /*tp_as_number*/
1341     0,                                      /*tp_as_sequence*/
1342     0,                                      /*tp_as_mapping*/
1343     (hashfunc)0,                            /*tp_hash*/
1344     (ternaryfunc)0,                         /*tp_call*/
1345     (reprfunc)0,                            /*tp_str*/
1346     0,                                      /*tp_getattro*/
1347     0,                                      /*tp_setattro*/
1348     0,                                      /*tp_as_buffer*/
1349     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1350         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1351     Writer_Type_doc,
1352     (traverseproc)Writer_traverse,          /*tp_traverse*/
1353     (inquiry)Writer_clear,                  /*tp_clear*/
1354     0,                                      /*tp_richcompare*/
1355     0,                                      /*tp_weaklistoffset*/
1356     (getiterfunc)0,                         /*tp_iter*/
1357     (getiterfunc)0,                         /*tp_iternext*/
1358     Writer_methods,                         /*tp_methods*/
1359     Writer_memberlist,                      /*tp_members*/
1360     0,                                      /*tp_getset*/
1361 };
1362 
1363 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1364 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1365 {
1366     PyObject * output_file, * dialect = NULL;
1367     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1368     _Py_IDENTIFIER(write);
1369 
1370     if (!self)
1371         return NULL;
1372 
1373     self->dialect = NULL;
1374     self->write = NULL;
1375 
1376     self->rec = NULL;
1377     self->rec_size = 0;
1378     self->rec_len = 0;
1379     self->num_fields = 0;
1380 
1381     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1382         Py_DECREF(self);
1383         return NULL;
1384     }
1385     if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1386         Py_DECREF(self);
1387         return NULL;
1388     }
1389     if (self->write == NULL || !PyCallable_Check(self->write)) {
1390         PyErr_SetString(PyExc_TypeError,
1391                         "argument 1 must have a \"write\" method");
1392         Py_DECREF(self);
1393         return NULL;
1394     }
1395     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1396     if (self->dialect == NULL) {
1397         Py_DECREF(self);
1398         return NULL;
1399     }
1400     PyObject_GC_Track(self);
1401     return (PyObject *)self;
1402 }
1403 
1404 /*
1405  * DIALECT REGISTRY
1406  */
1407 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1408 csv_list_dialects(PyObject *module, PyObject *args)
1409 {
1410     return PyDict_Keys(_csvstate_global->dialects);
1411 }
1412 
1413 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1414 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1415 {
1416     PyObject *name_obj, *dialect_obj = NULL;
1417     PyObject *dialect;
1418 
1419     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1420         return NULL;
1421     if (!PyUnicode_Check(name_obj)) {
1422         PyErr_SetString(PyExc_TypeError,
1423                         "dialect name must be a string");
1424         return NULL;
1425     }
1426     if (PyUnicode_READY(name_obj) == -1)
1427         return NULL;
1428     dialect = _call_dialect(dialect_obj, kwargs);
1429     if (dialect == NULL)
1430         return NULL;
1431     if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1432         Py_DECREF(dialect);
1433         return NULL;
1434     }
1435     Py_DECREF(dialect);
1436     Py_RETURN_NONE;
1437 }
1438 
1439 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1440 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1441 {
1442     if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1443         if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1444             PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1445         }
1446         return NULL;
1447     }
1448     Py_RETURN_NONE;
1449 }
1450 
1451 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1452 csv_get_dialect(PyObject *module, PyObject *name_obj)
1453 {
1454     return get_dialect_from_registry(name_obj);
1455 }
1456 
1457 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1458 csv_field_size_limit(PyObject *module, PyObject *args)
1459 {
1460     PyObject *new_limit = NULL;
1461     long old_limit = _csvstate_global->field_limit;
1462 
1463     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1464         return NULL;
1465     if (new_limit != NULL) {
1466         if (!PyLong_CheckExact(new_limit)) {
1467             PyErr_Format(PyExc_TypeError,
1468                          "limit must be an integer");
1469             return NULL;
1470         }
1471         _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1472         if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1473             _csvstate_global->field_limit = old_limit;
1474             return NULL;
1475         }
1476     }
1477     return PyLong_FromLong(old_limit);
1478 }
1479 
1480 /*
1481  * MODULE
1482  */
1483 
1484 PyDoc_STRVAR(csv_module_doc,
1485 "CSV parsing and writing.\n"
1486 "\n"
1487 "This module provides classes that assist in the reading and writing\n"
1488 "of Comma Separated Value (CSV) files, and implements the interface\n"
1489 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1490 "the format is not formally defined by a stable specification and\n"
1491 "is subtle enough that parsing lines of a CSV file with something\n"
1492 "like line.split(\",\") is bound to fail.  The module supports three\n"
1493 "basic APIs: reading, writing, and registration of dialects.\n"
1494 "\n"
1495 "\n"
1496 "DIALECT REGISTRATION:\n"
1497 "\n"
1498 "Readers and writers support a dialect argument, which is a convenient\n"
1499 "handle on a group of settings.  When the dialect argument is a string,\n"
1500 "it identifies one of the dialects previously registered with the module.\n"
1501 "If it is a class or instance, the attributes of the argument are used as\n"
1502 "the settings for the reader or writer:\n"
1503 "\n"
1504 "    class excel:\n"
1505 "        delimiter = ','\n"
1506 "        quotechar = '\"'\n"
1507 "        escapechar = None\n"
1508 "        doublequote = True\n"
1509 "        skipinitialspace = False\n"
1510 "        lineterminator = '\\r\\n'\n"
1511 "        quoting = QUOTE_MINIMAL\n"
1512 "\n"
1513 "SETTINGS:\n"
1514 "\n"
1515 "    * quotechar - specifies a one-character string to use as the\n"
1516 "        quoting character.  It defaults to '\"'.\n"
1517 "    * delimiter - specifies a one-character string to use as the\n"
1518 "        field separator.  It defaults to ','.\n"
1519 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1520 "        immediately follows a delimiter.  It defaults to False, which\n"
1521 "        means that whitespace immediately following a delimiter is part\n"
1522 "        of the following field.\n"
1523 "    * lineterminator -  specifies the character sequence which should\n"
1524 "        terminate rows.\n"
1525 "    * quoting - controls when quotes should be generated by the writer.\n"
1526 "        It can take on any of the following module constants:\n"
1527 "\n"
1528 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1529 "            field contains either the quotechar or the delimiter\n"
1530 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1531 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1532 "            fields which do not parse as integers or floating point\n"
1533 "            numbers.\n"
1534 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1535 "    * escapechar - specifies a one-character string used to escape\n"
1536 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1537 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1538 "        True, two consecutive quotes are interpreted as one during read,\n"
1539 "        and when writing, each quote character embedded in the data is\n"
1540 "        written as two quotes\n");
1541 
1542 PyDoc_STRVAR(csv_reader_doc,
1543 "    csv_reader = reader(iterable [, dialect='excel']\n"
1544 "                        [optional keyword args])\n"
1545 "    for row in csv_reader:\n"
1546 "        process(row)\n"
1547 "\n"
1548 "The \"iterable\" argument can be any object that returns a line\n"
1549 "of input for each iteration, such as a file object or a list.  The\n"
1550 "optional \"dialect\" parameter is discussed below.  The function\n"
1551 "also accepts optional keyword arguments which override settings\n"
1552 "provided by the dialect.\n"
1553 "\n"
1554 "The returned object is an iterator.  Each iteration returns a row\n"
1555 "of the CSV file (which can span multiple input lines).\n");
1556 
1557 PyDoc_STRVAR(csv_writer_doc,
1558 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1559 "                            [optional keyword args])\n"
1560 "    for row in sequence:\n"
1561 "        csv_writer.writerow(row)\n"
1562 "\n"
1563 "    [or]\n"
1564 "\n"
1565 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1566 "                            [optional keyword args])\n"
1567 "    csv_writer.writerows(rows)\n"
1568 "\n"
1569 "The \"fileobj\" argument can be any object that supports the file API.\n");
1570 
1571 PyDoc_STRVAR(csv_list_dialects_doc,
1572 "Return a list of all know dialect names.\n"
1573 "    names = csv.list_dialects()");
1574 
1575 PyDoc_STRVAR(csv_get_dialect_doc,
1576 "Return the dialect instance associated with name.\n"
1577 "    dialect = csv.get_dialect(name)");
1578 
1579 PyDoc_STRVAR(csv_register_dialect_doc,
1580 "Create a mapping from a string name to a dialect class.\n"
1581 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1582 
1583 PyDoc_STRVAR(csv_unregister_dialect_doc,
1584 "Delete the name/dialect mapping associated with a string name.\n"
1585 "    csv.unregister_dialect(name)");
1586 
1587 PyDoc_STRVAR(csv_field_size_limit_doc,
1588 "Sets an upper limit on parsed fields.\n"
1589 "    csv.field_size_limit([limit])\n"
1590 "\n"
1591 "Returns old limit. If limit is not given, no new limit is set and\n"
1592 "the old limit is returned");
1593 
1594 static struct PyMethodDef csv_methods[] = {
1595     { "reader", (PyCFunction)(void(*)(void))csv_reader,
1596         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1597     { "writer", (PyCFunction)(void(*)(void))csv_writer,
1598         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1599     { "list_dialects", (PyCFunction)csv_list_dialects,
1600         METH_NOARGS, csv_list_dialects_doc},
1601     { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
1602         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1603     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1604         METH_O, csv_unregister_dialect_doc},
1605     { "get_dialect", (PyCFunction)csv_get_dialect,
1606         METH_O, csv_get_dialect_doc},
1607     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1608         METH_VARARGS, csv_field_size_limit_doc},
1609     { NULL, NULL }
1610 };
1611 
1612 static struct PyModuleDef _csvmodule = {
1613     PyModuleDef_HEAD_INIT,
1614     "_csv",
1615     csv_module_doc,
1616     sizeof(_csvstate),
1617     csv_methods,
1618     NULL,
1619     _csv_traverse,
1620     _csv_clear,
1621     _csv_free
1622 };
1623 
1624 PyMODINIT_FUNC
PyInit__csv(void)1625 PyInit__csv(void)
1626 {
1627     PyObject *module;
1628     const StyleDesc *style;
1629 
1630     if (PyType_Ready(&Dialect_Type) < 0)
1631         return NULL;
1632 
1633     if (PyType_Ready(&Reader_Type) < 0)
1634         return NULL;
1635 
1636     if (PyType_Ready(&Writer_Type) < 0)
1637         return NULL;
1638 
1639     /* Create the module and add the functions */
1640     module = PyModule_Create(&_csvmodule);
1641     if (module == NULL)
1642         return NULL;
1643 
1644     /* Add version to the module. */
1645     if (PyModule_AddStringConstant(module, "__version__",
1646                                    MODULE_VERSION) == -1)
1647         return NULL;
1648 
1649     /* Set the field limit */
1650     _csvstate(module)->field_limit = 128 * 1024;
1651     /* Do I still need to add this var to the Module Dict? */
1652 
1653     /* Add _dialects dictionary */
1654     _csvstate(module)->dialects = PyDict_New();
1655     if (_csvstate(module)->dialects == NULL)
1656         return NULL;
1657     Py_INCREF(_csvstate(module)->dialects);
1658     if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1659         return NULL;
1660 
1661     /* Add quote styles into dictionary */
1662     for (style = quote_styles; style->name; style++) {
1663         if (PyModule_AddIntConstant(module, style->name,
1664                                     style->style) == -1)
1665             return NULL;
1666     }
1667 
1668     /* Add the Dialect type */
1669     Py_INCREF(&Dialect_Type);
1670     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1671         return NULL;
1672 
1673     /* Add the CSV exception object to the module. */
1674     _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1675     if (_csvstate(module)->error_obj == NULL)
1676         return NULL;
1677     Py_INCREF(_csvstate(module)->error_obj);
1678     PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1679     return module;
1680 }
1681