• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
11 **** 2.2.
12 
13 */
14 
15 #define MODULE_VERSION "1.0"
16 
17 #include "Python.h"
18 #include "structmember.h"
19 
20 
21 /* begin 2.2 compatibility macros */
22 #ifndef PyDoc_STRVAR
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
28 #else
29 #define PyDoc_STR(str) ""
30 #endif
31 #endif /* ifndef PyDoc_STRVAR */
32 
33 #ifndef PyMODINIT_FUNC
34 #       if defined(__cplusplus)
35 #               define PyMODINIT_FUNC extern "C" void
36 #       else /* __cplusplus */
37 #               define PyMODINIT_FUNC void
38 #       endif /* __cplusplus */
39 #endif
40 
41 #ifndef Py_CLEAR
42 #define Py_CLEAR(op)                                            \
43     do {                                                        \
44         if (op) {                                               \
45             PyObject *tmp = (PyObject *)(op);                   \
46             (op) = NULL;                                        \
47             Py_DECREF(tmp);                                     \
48         }                                                       \
49     } while (0)
50 #endif
51 #ifndef Py_VISIT
52 #define Py_VISIT(op)                                                    \
53     do {                                                                \
54         if (op) {                                                       \
55             int vret = visit((PyObject *)(op), arg);                    \
56             if (vret)                                                   \
57                 return vret;                                            \
58         }                                                               \
59     } while (0)
60 #endif
61 
62 /* end 2.2 compatibility macros */
63 
64 #define IS_BASESTRING(o) \
65     PyObject_TypeCheck(o, &PyBaseString_Type)
66 
67 static PyObject *error_obj;     /* CSV exception */
68 static PyObject *dialects;      /* Dialect registry */
69 static long field_limit = 128 * 1024;   /* max parsed field size */
70 
71 typedef enum {
72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74     EAT_CRNL
75 } ParserState;
76 
77 typedef enum {
78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79 } QuoteStyle;
80 
81 typedef struct {
82     QuoteStyle style;
83     char *name;
84 } StyleDesc;
85 
86 static StyleDesc quote_styles[] = {
87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
88     { QUOTE_ALL,        "QUOTE_ALL" },
89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90     { QUOTE_NONE,       "QUOTE_NONE" },
91     { 0 }
92 };
93 
94 typedef struct {
95     PyObject_HEAD
96 
97     int doublequote;            /* is " represented by ""? */
98     char delimiter;             /* field separator */
99     char quotechar;             /* quote character */
100     char escapechar;            /* escape character */
101     int skipinitialspace;       /* ignore spaces following delimiter? */
102     PyObject *lineterminator; /* string to write between records */
103     int quoting;                /* style of quoting to write */
104 
105     int strict;                 /* raise exception on bad CSV */
106 } DialectObj;
107 
108 staticforward PyTypeObject Dialect_Type;
109 
110 typedef struct {
111     PyObject_HEAD
112 
113     PyObject *input_iter;   /* iterate over this for input lines */
114 
115     DialectObj *dialect;    /* parsing dialect */
116 
117     PyObject *fields;           /* field list for current record */
118     ParserState state;          /* current CSV parse state */
119     char *field;                /* build current field in here */
120     int field_size;             /* size of allocated buffer */
121     int field_len;              /* length of current field */
122     int numeric_field;          /* treat field as numeric */
123     unsigned long line_num;     /* Source-file line number */
124 } ReaderObj;
125 
126 staticforward PyTypeObject Reader_Type;
127 
128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
129 
130 typedef struct {
131     PyObject_HEAD
132 
133     PyObject *writeline;    /* write output lines to this file */
134 
135     DialectObj *dialect;    /* parsing dialect */
136 
137     char *rec;                  /* buffer for parser.join */
138     int rec_size;               /* size of allocated record */
139     int rec_len;                /* length of record */
140     int num_fields;             /* number of fields in record */
141 } WriterObj;
142 
143 staticforward PyTypeObject Writer_Type;
144 
145 /*
146  * DIALECT class
147  */
148 
149 static PyObject *
get_dialect_from_registry(PyObject * name_obj)150 get_dialect_from_registry(PyObject * name_obj)
151 {
152     PyObject *dialect_obj;
153 
154     dialect_obj = PyDict_GetItem(dialects, name_obj);
155     if (dialect_obj == NULL) {
156         if (!PyErr_Occurred())
157             PyErr_Format(error_obj, "unknown dialect");
158     }
159     else
160         Py_INCREF(dialect_obj);
161     return dialect_obj;
162 }
163 
164 static PyObject *
get_string(PyObject * str)165 get_string(PyObject *str)
166 {
167     Py_XINCREF(str);
168     return str;
169 }
170 
171 static PyObject *
get_nullchar_as_None(char c)172 get_nullchar_as_None(char c)
173 {
174     if (c == '\0') {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     else
179         return PyString_FromStringAndSize((char*)&c, 1);
180 }
181 
182 static PyObject *
Dialect_get_lineterminator(DialectObj * self)183 Dialect_get_lineterminator(DialectObj *self)
184 {
185     return get_string(self->lineterminator);
186 }
187 
188 static PyObject *
Dialect_get_escapechar(DialectObj * self)189 Dialect_get_escapechar(DialectObj *self)
190 {
191     return get_nullchar_as_None(self->escapechar);
192 }
193 
194 static PyObject *
Dialect_get_quotechar(DialectObj * self)195 Dialect_get_quotechar(DialectObj *self)
196 {
197     return get_nullchar_as_None(self->quotechar);
198 }
199 
200 static PyObject *
Dialect_get_quoting(DialectObj * self)201 Dialect_get_quoting(DialectObj *self)
202 {
203     return PyInt_FromLong(self->quoting);
204 }
205 
206 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
208 {
209     if (src == NULL)
210         *target = dflt;
211     else {
212         int b = PyObject_IsTrue(src);
213         if (b < 0)
214             return -1;
215         *target = b;
216     }
217     return 0;
218 }
219 
220 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)221 _set_int(const char *name, int *target, PyObject *src, int dflt)
222 {
223     if (src == NULL)
224         *target = dflt;
225     else {
226         if (!PyInt_Check(src)) {
227             PyErr_Format(PyExc_TypeError,
228                          "\"%s\" must be an integer", name);
229             return -1;
230         }
231         *target = PyInt_AsLong(src);
232     }
233     return 0;
234 }
235 
236 static int
_set_char(const char * name,char * target,PyObject * src,char dflt)237 _set_char(const char *name, char *target, PyObject *src, char dflt)
238 {
239     if (src == NULL)
240         *target = dflt;
241     else {
242         *target = '\0';
243         if (src != Py_None) {
244             Py_ssize_t len;
245             if (!PyString_Check(src)) {
246                 PyErr_Format(PyExc_TypeError,
247                     "\"%s\" must be string, not %.200s", name,
248                     src->ob_type->tp_name);
249                 return -1;
250             }
251             len = PyString_GET_SIZE(src);
252             if (len > 1) {
253                 PyErr_Format(PyExc_TypeError,
254                     "\"%s\" must be an 1-character string",
255                     name);
256                 return -1;
257             }
258             if (len > 0)
259                 *target = *PyString_AS_STRING(src);
260         }
261     }
262     return 0;
263 }
264 
265 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)266 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
267 {
268     if (src == NULL)
269         *target = PyString_FromString(dflt);
270     else {
271         if (src == Py_None)
272             *target = NULL;
273         else if (!IS_BASESTRING(src)) {
274             PyErr_Format(PyExc_TypeError,
275                          "\"%s\" must be a string", name);
276             return -1;
277         }
278         else {
279             Py_XDECREF(*target);
280             Py_INCREF(src);
281             *target = src;
282         }
283     }
284     return 0;
285 }
286 
287 static int
dialect_check_quoting(int quoting)288 dialect_check_quoting(int quoting)
289 {
290     StyleDesc *qs = quote_styles;
291 
292     for (qs = quote_styles; qs->name; qs++) {
293         if (qs->style == quoting)
294             return 0;
295     }
296     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
297     return -1;
298 }
299 
300 #define D_OFF(x) offsetof(DialectObj, x)
301 
302 static struct PyMemberDef Dialect_memberlist[] = {
303     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
304     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
305     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
306     { "strict",             T_INT, D_OFF(strict), READONLY },
307     { NULL }
308 };
309 
310 static PyGetSetDef Dialect_getsetlist[] = {
311     { "escapechar",             (getter)Dialect_get_escapechar},
312     { "lineterminator",         (getter)Dialect_get_lineterminator},
313     { "quotechar",              (getter)Dialect_get_quotechar},
314     { "quoting",                (getter)Dialect_get_quoting},
315     {NULL},
316 };
317 
318 static void
Dialect_dealloc(DialectObj * self)319 Dialect_dealloc(DialectObj *self)
320 {
321     Py_XDECREF(self->lineterminator);
322     Py_TYPE(self)->tp_free((PyObject *)self);
323 }
324 
325 static char *dialect_kws[] = {
326     "dialect",
327     "delimiter",
328     "doublequote",
329     "escapechar",
330     "lineterminator",
331     "quotechar",
332     "quoting",
333     "skipinitialspace",
334     "strict",
335     NULL
336 };
337 
338 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)339 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
340 {
341     DialectObj *self;
342     PyObject *ret = NULL;
343     PyObject *dialect = NULL;
344     PyObject *delimiter = NULL;
345     PyObject *doublequote = NULL;
346     PyObject *escapechar = NULL;
347     PyObject *lineterminator = NULL;
348     PyObject *quotechar = NULL;
349     PyObject *quoting = NULL;
350     PyObject *skipinitialspace = NULL;
351     PyObject *strict = NULL;
352 
353     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
354                                      "|OOOOOOOOO", dialect_kws,
355                                      &dialect,
356                                      &delimiter,
357                                      &doublequote,
358                                      &escapechar,
359                                      &lineterminator,
360                                      &quotechar,
361                                      &quoting,
362                                      &skipinitialspace,
363                                      &strict))
364         return NULL;
365 
366     if (dialect != NULL) {
367         if (IS_BASESTRING(dialect)) {
368             dialect = get_dialect_from_registry(dialect);
369             if (dialect == NULL)
370                 return NULL;
371         }
372         else
373             Py_INCREF(dialect);
374         /* Can we reuse this instance? */
375         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
376             delimiter == 0 &&
377             doublequote == 0 &&
378             escapechar == 0 &&
379             lineterminator == 0 &&
380             quotechar == 0 &&
381             quoting == 0 &&
382             skipinitialspace == 0 &&
383             strict == 0)
384             return dialect;
385     }
386 
387     self = (DialectObj *)type->tp_alloc(type, 0);
388     if (self == NULL) {
389         Py_XDECREF(dialect);
390         return NULL;
391     }
392     self->lineterminator = NULL;
393 
394     Py_XINCREF(delimiter);
395     Py_XINCREF(doublequote);
396     Py_XINCREF(escapechar);
397     Py_XINCREF(lineterminator);
398     Py_XINCREF(quotechar);
399     Py_XINCREF(quoting);
400     Py_XINCREF(skipinitialspace);
401     Py_XINCREF(strict);
402     if (dialect != NULL) {
403 #define DIALECT_GETATTR(v, n) \
404         if (v == NULL) \
405             v = PyObject_GetAttrString(dialect, n)
406         DIALECT_GETATTR(delimiter, "delimiter");
407         DIALECT_GETATTR(doublequote, "doublequote");
408         DIALECT_GETATTR(escapechar, "escapechar");
409         DIALECT_GETATTR(lineterminator, "lineterminator");
410         DIALECT_GETATTR(quotechar, "quotechar");
411         DIALECT_GETATTR(quoting, "quoting");
412         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
413         DIALECT_GETATTR(strict, "strict");
414         PyErr_Clear();
415     }
416 
417     /* check types and convert to C values */
418 #define DIASET(meth, name, target, src, dflt) \
419     if (meth(name, target, src, dflt)) \
420         goto err
421     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
422     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
423     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
424     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
425     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
426     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
427     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
428     DIASET(_set_bool, "strict", &self->strict, strict, 0);
429 
430     /* validate options */
431     if (dialect_check_quoting(self->quoting))
432         goto err;
433     if (self->delimiter == 0) {
434         PyErr_SetString(PyExc_TypeError,
435                         "\"delimiter\" must be an 1-character string");
436         goto err;
437     }
438     if (quotechar == Py_None && quoting == NULL)
439         self->quoting = QUOTE_NONE;
440     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
441         PyErr_SetString(PyExc_TypeError,
442                         "quotechar must be set if quoting enabled");
443         goto err;
444     }
445     if (self->lineterminator == 0) {
446         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
447         goto err;
448     }
449 
450     ret = (PyObject *)self;
451     Py_INCREF(self);
452 err:
453     Py_XDECREF(self);
454     Py_XDECREF(dialect);
455     Py_XDECREF(delimiter);
456     Py_XDECREF(doublequote);
457     Py_XDECREF(escapechar);
458     Py_XDECREF(lineterminator);
459     Py_XDECREF(quotechar);
460     Py_XDECREF(quoting);
461     Py_XDECREF(skipinitialspace);
462     Py_XDECREF(strict);
463     return ret;
464 }
465 
466 
467 PyDoc_STRVAR(Dialect_Type_doc,
468 "CSV dialect\n"
469 "\n"
470 "The Dialect type records CSV parsing and generation options.\n");
471 
472 static PyTypeObject Dialect_Type = {
473     PyVarObject_HEAD_INIT(NULL, 0)
474     "_csv.Dialect",                         /* tp_name */
475     sizeof(DialectObj),                     /* tp_basicsize */
476     0,                                      /* tp_itemsize */
477     /*  methods  */
478     (destructor)Dialect_dealloc,            /* tp_dealloc */
479     (printfunc)0,                           /* tp_print */
480     (getattrfunc)0,                         /* tp_getattr */
481     (setattrfunc)0,                         /* tp_setattr */
482     (cmpfunc)0,                             /* tp_compare */
483     (reprfunc)0,                            /* tp_repr */
484     0,                                      /* tp_as_number */
485     0,                                      /* tp_as_sequence */
486     0,                                      /* tp_as_mapping */
487     (hashfunc)0,                            /* tp_hash */
488     (ternaryfunc)0,                         /* tp_call */
489     (reprfunc)0,                                /* tp_str */
490     0,                                      /* tp_getattro */
491     0,                                      /* tp_setattro */
492     0,                                      /* tp_as_buffer */
493     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
494     Dialect_Type_doc,                       /* tp_doc */
495     0,                                      /* tp_traverse */
496     0,                                      /* tp_clear */
497     0,                                      /* tp_richcompare */
498     0,                                      /* tp_weaklistoffset */
499     0,                                      /* tp_iter */
500     0,                                      /* tp_iternext */
501     0,                                          /* tp_methods */
502     Dialect_memberlist,                     /* tp_members */
503     Dialect_getsetlist,                     /* tp_getset */
504     0,                                          /* tp_base */
505     0,                                          /* tp_dict */
506     0,                                          /* tp_descr_get */
507     0,                                          /* tp_descr_set */
508     0,                                          /* tp_dictoffset */
509     0,                                          /* tp_init */
510     0,                                          /* tp_alloc */
511     dialect_new,                                /* tp_new */
512     0,                                          /* tp_free */
513 };
514 
515 /*
516  * Return an instance of the dialect type, given a Python instance or kwarg
517  * description of the dialect
518  */
519 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)520 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
521 {
522     PyObject *ctor_args;
523     PyObject *dialect;
524 
525     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
526     if (ctor_args == NULL)
527         return NULL;
528     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
529     Py_DECREF(ctor_args);
530     return dialect;
531 }
532 
533 /*
534  * READER
535  */
536 static int
parse_save_field(ReaderObj * self)537 parse_save_field(ReaderObj *self)
538 {
539     PyObject *field;
540 
541     field = PyString_FromStringAndSize(self->field, self->field_len);
542     if (field == NULL)
543         return -1;
544     self->field_len = 0;
545     if (self->numeric_field) {
546         PyObject *tmp;
547 
548         self->numeric_field = 0;
549         tmp = PyNumber_Float(field);
550         if (tmp == NULL) {
551             Py_DECREF(field);
552             return -1;
553         }
554         Py_DECREF(field);
555         field = tmp;
556     }
557     PyList_Append(self->fields, field);
558     Py_DECREF(field);
559     return 0;
560 }
561 
562 static int
parse_grow_buff(ReaderObj * self)563 parse_grow_buff(ReaderObj *self)
564 {
565     if (self->field_size == 0) {
566         self->field_size = 4096;
567         if (self->field != NULL)
568             PyMem_Free(self->field);
569         self->field = PyMem_Malloc(self->field_size);
570     }
571     else {
572         if (self->field_size > INT_MAX / 2) {
573             PyErr_NoMemory();
574             return 0;
575         }
576         self->field_size *= 2;
577         self->field = PyMem_Realloc(self->field, self->field_size);
578     }
579     if (self->field == NULL) {
580         PyErr_NoMemory();
581         return 0;
582     }
583     return 1;
584 }
585 
586 static int
parse_add_char(ReaderObj * self,char c)587 parse_add_char(ReaderObj *self, char c)
588 {
589     if (self->field_len >= field_limit) {
590         PyErr_Format(error_obj, "field larger than field limit (%ld)",
591                      field_limit);
592         return -1;
593     }
594     if (self->field_len == self->field_size && !parse_grow_buff(self))
595         return -1;
596     self->field[self->field_len++] = c;
597     return 0;
598 }
599 
600 static int
parse_process_char(ReaderObj * self,char c)601 parse_process_char(ReaderObj *self, char c)
602 {
603     DialectObj *dialect = self->dialect;
604 
605     switch (self->state) {
606     case START_RECORD:
607         /* start of record */
608         if (c == '\0')
609             /* empty line - return [] */
610             break;
611         else if (c == '\n' || c == '\r') {
612             self->state = EAT_CRNL;
613             break;
614         }
615         /* normal character - handle as START_FIELD */
616         self->state = START_FIELD;
617         /* fallthru */
618     case START_FIELD:
619         /* expecting field */
620         if (c == '\n' || c == '\r' || c == '\0') {
621             /* save empty field - return [fields] */
622             if (parse_save_field(self) < 0)
623                 return -1;
624             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
625         }
626         else if (c == dialect->quotechar &&
627                  dialect->quoting != QUOTE_NONE) {
628             /* start quoted field */
629             self->state = IN_QUOTED_FIELD;
630         }
631         else if (c == dialect->escapechar) {
632             /* possible escaped character */
633             self->state = ESCAPED_CHAR;
634         }
635         else if (c == ' ' && dialect->skipinitialspace)
636             /* ignore space at start of field */
637             ;
638         else if (c == dialect->delimiter) {
639             /* save empty field */
640             if (parse_save_field(self) < 0)
641                 return -1;
642         }
643         else {
644             /* begin new unquoted field */
645             if (dialect->quoting == QUOTE_NONNUMERIC)
646                 self->numeric_field = 1;
647             if (parse_add_char(self, c) < 0)
648                 return -1;
649             self->state = IN_FIELD;
650         }
651         break;
652 
653     case ESCAPED_CHAR:
654         if (c == '\0')
655             c = '\n';
656         if (parse_add_char(self, c) < 0)
657             return -1;
658         self->state = IN_FIELD;
659         break;
660 
661     case IN_FIELD:
662         /* in unquoted field */
663         if (c == '\n' || c == '\r' || c == '\0') {
664             /* end of line - return [fields] */
665             if (parse_save_field(self) < 0)
666                 return -1;
667             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
668         }
669         else if (c == dialect->escapechar) {
670             /* possible escaped character */
671             self->state = ESCAPED_CHAR;
672         }
673         else if (c == dialect->delimiter) {
674             /* save field - wait for new field */
675             if (parse_save_field(self) < 0)
676                 return -1;
677             self->state = START_FIELD;
678         }
679         else {
680             /* normal character - save in field */
681             if (parse_add_char(self, c) < 0)
682                 return -1;
683         }
684         break;
685 
686     case IN_QUOTED_FIELD:
687         /* in quoted field */
688         if (c == '\0')
689             ;
690         else if (c == dialect->escapechar) {
691             /* Possible escape character */
692             self->state = ESCAPE_IN_QUOTED_FIELD;
693         }
694         else if (c == dialect->quotechar &&
695                  dialect->quoting != QUOTE_NONE) {
696             if (dialect->doublequote) {
697                 /* doublequote; " represented by "" */
698                 self->state = QUOTE_IN_QUOTED_FIELD;
699             }
700             else {
701                 /* end of quote part of field */
702                 self->state = IN_FIELD;
703             }
704         }
705         else {
706             /* normal character - save in field */
707             if (parse_add_char(self, c) < 0)
708                 return -1;
709         }
710         break;
711 
712     case ESCAPE_IN_QUOTED_FIELD:
713         if (c == '\0')
714             c = '\n';
715         if (parse_add_char(self, c) < 0)
716             return -1;
717         self->state = IN_QUOTED_FIELD;
718         break;
719 
720     case QUOTE_IN_QUOTED_FIELD:
721         /* doublequote - seen a quote in an quoted field */
722         if (dialect->quoting != QUOTE_NONE &&
723             c == dialect->quotechar) {
724             /* save "" as " */
725             if (parse_add_char(self, c) < 0)
726                 return -1;
727             self->state = IN_QUOTED_FIELD;
728         }
729         else if (c == dialect->delimiter) {
730             /* save field - wait for new field */
731             if (parse_save_field(self) < 0)
732                 return -1;
733             self->state = START_FIELD;
734         }
735         else if (c == '\n' || c == '\r' || c == '\0') {
736             /* end of line - return [fields] */
737             if (parse_save_field(self) < 0)
738                 return -1;
739             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
740         }
741         else if (!dialect->strict) {
742             if (parse_add_char(self, c) < 0)
743                 return -1;
744             self->state = IN_FIELD;
745         }
746         else {
747             /* illegal */
748             PyErr_Format(error_obj, "'%c' expected after '%c'",
749                             dialect->delimiter,
750                             dialect->quotechar);
751             return -1;
752         }
753         break;
754 
755     case EAT_CRNL:
756         if (c == '\n' || c == '\r')
757             ;
758         else if (c == '\0')
759             self->state = START_RECORD;
760         else {
761             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
762             return -1;
763         }
764         break;
765 
766     }
767     return 0;
768 }
769 
770 static int
parse_reset(ReaderObj * self)771 parse_reset(ReaderObj *self)
772 {
773     Py_XDECREF(self->fields);
774     self->fields = PyList_New(0);
775     if (self->fields == NULL)
776         return -1;
777     self->field_len = 0;
778     self->state = START_RECORD;
779     self->numeric_field = 0;
780     return 0;
781 }
782 
783 static PyObject *
Reader_iternext(ReaderObj * self)784 Reader_iternext(ReaderObj *self)
785 {
786     PyObject *lineobj;
787     PyObject *fields = NULL;
788     char *line, c;
789     int linelen;
790 
791     if (parse_reset(self) < 0)
792         return NULL;
793     do {
794         lineobj = PyIter_Next(self->input_iter);
795         if (lineobj == NULL) {
796             /* End of input OR exception */
797             if (!PyErr_Occurred() && (self->field_len != 0 ||
798                                       self->state == IN_QUOTED_FIELD)) {
799                 if (self->dialect->strict)
800                     PyErr_SetString(error_obj, "unexpected end of data");
801                 else if (parse_save_field(self) >= 0 )
802                     break;
803             }
804             return NULL;
805         }
806         ++self->line_num;
807 
808         line = PyString_AsString(lineobj);
809         linelen = PyString_Size(lineobj);
810 
811         if (line == NULL || linelen < 0) {
812             Py_DECREF(lineobj);
813             return NULL;
814         }
815         while (linelen--) {
816             c = *line++;
817             if (c == '\0') {
818                 Py_DECREF(lineobj);
819                 PyErr_Format(error_obj,
820                              "line contains NULL byte");
821                 goto err;
822             }
823             if (parse_process_char(self, c) < 0) {
824                 Py_DECREF(lineobj);
825                 goto err;
826             }
827         }
828         Py_DECREF(lineobj);
829         if (parse_process_char(self, 0) < 0)
830             goto err;
831     } while (self->state != START_RECORD);
832 
833     fields = self->fields;
834     self->fields = NULL;
835 err:
836     return fields;
837 }
838 
839 static void
Reader_dealloc(ReaderObj * self)840 Reader_dealloc(ReaderObj *self)
841 {
842     PyObject_GC_UnTrack(self);
843     Py_XDECREF(self->dialect);
844     Py_XDECREF(self->input_iter);
845     Py_XDECREF(self->fields);
846     if (self->field != NULL)
847         PyMem_Free(self->field);
848     PyObject_GC_Del(self);
849 }
850 
851 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)852 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
853 {
854     Py_VISIT(self->dialect);
855     Py_VISIT(self->input_iter);
856     Py_VISIT(self->fields);
857     return 0;
858 }
859 
860 static int
Reader_clear(ReaderObj * self)861 Reader_clear(ReaderObj *self)
862 {
863     Py_CLEAR(self->dialect);
864     Py_CLEAR(self->input_iter);
865     Py_CLEAR(self->fields);
866     return 0;
867 }
868 
869 PyDoc_STRVAR(Reader_Type_doc,
870 "CSV reader\n"
871 "\n"
872 "Reader objects are responsible for reading and parsing tabular data\n"
873 "in CSV format.\n"
874 );
875 
876 static struct PyMethodDef Reader_methods[] = {
877     { NULL, NULL }
878 };
879 #define R_OFF(x) offsetof(ReaderObj, x)
880 
881 static struct PyMemberDef Reader_memberlist[] = {
882     { "dialect", T_OBJECT, R_OFF(dialect), RO },
883     { "line_num", T_ULONG, R_OFF(line_num), RO },
884     { NULL }
885 };
886 
887 
888 static PyTypeObject Reader_Type = {
889     PyVarObject_HEAD_INIT(NULL, 0)
890     "_csv.reader",                          /*tp_name*/
891     sizeof(ReaderObj),                      /*tp_basicsize*/
892     0,                                      /*tp_itemsize*/
893     /* methods */
894     (destructor)Reader_dealloc,             /*tp_dealloc*/
895     (printfunc)0,                           /*tp_print*/
896     (getattrfunc)0,                         /*tp_getattr*/
897     (setattrfunc)0,                         /*tp_setattr*/
898     (cmpfunc)0,                             /*tp_compare*/
899     (reprfunc)0,                            /*tp_repr*/
900     0,                                      /*tp_as_number*/
901     0,                                      /*tp_as_sequence*/
902     0,                                      /*tp_as_mapping*/
903     (hashfunc)0,                            /*tp_hash*/
904     (ternaryfunc)0,                         /*tp_call*/
905     (reprfunc)0,                                /*tp_str*/
906     0,                                      /*tp_getattro*/
907     0,                                      /*tp_setattro*/
908     0,                                      /*tp_as_buffer*/
909     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
910         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
911     Reader_Type_doc,                        /*tp_doc*/
912     (traverseproc)Reader_traverse,          /*tp_traverse*/
913     (inquiry)Reader_clear,                  /*tp_clear*/
914     0,                                      /*tp_richcompare*/
915     0,                                      /*tp_weaklistoffset*/
916     PyObject_SelfIter,                          /*tp_iter*/
917     (getiterfunc)Reader_iternext,           /*tp_iternext*/
918     Reader_methods,                         /*tp_methods*/
919     Reader_memberlist,                      /*tp_members*/
920     0,                                      /*tp_getset*/
921 
922 };
923 
924 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)925 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
926 {
927     PyObject * iterator, * dialect = NULL;
928     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
929 
930     if (!self)
931         return NULL;
932 
933     self->dialect = NULL;
934     self->fields = NULL;
935     self->input_iter = NULL;
936     self->field = NULL;
937     self->field_size = 0;
938     self->line_num = 0;
939 
940     if (parse_reset(self) < 0) {
941         Py_DECREF(self);
942         return NULL;
943     }
944 
945     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
946         Py_DECREF(self);
947         return NULL;
948     }
949     self->input_iter = PyObject_GetIter(iterator);
950     if (self->input_iter == NULL) {
951         PyErr_SetString(PyExc_TypeError,
952                         "argument 1 must be an iterator");
953         Py_DECREF(self);
954         return NULL;
955     }
956     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
957     if (self->dialect == NULL) {
958         Py_DECREF(self);
959         return NULL;
960     }
961 
962     PyObject_GC_Track(self);
963     return (PyObject *)self;
964 }
965 
966 /*
967  * WRITER
968  */
969 /* ---------------------------------------------------------------- */
970 static void
join_reset(WriterObj * self)971 join_reset(WriterObj *self)
972 {
973     self->rec_len = 0;
974     self->num_fields = 0;
975 }
976 
977 #define MEM_INCR 32768
978 
979 /* Calculate new record length or append field to record.  Return new
980  * record length.
981  */
982 static int
join_append_data(WriterObj * self,char * field,int quote_empty,int * quoted,int copy_phase)983 join_append_data(WriterObj *self, char *field, int quote_empty,
984                  int *quoted, int copy_phase)
985 {
986     DialectObj *dialect = self->dialect;
987     int i, rec_len;
988     char *lineterm;
989 
990 #define ADDCH(c) \
991     do {\
992         if (copy_phase) \
993             self->rec[rec_len] = c;\
994         rec_len++;\
995     } while(0)
996 
997     lineterm = PyString_AsString(dialect->lineterminator);
998     if (lineterm == NULL)
999         return -1;
1000 
1001     rec_len = self->rec_len;
1002 
1003     /* If this is not the first field we need a field separator */
1004     if (self->num_fields > 0)
1005         ADDCH(dialect->delimiter);
1006 
1007     /* Handle preceding quote */
1008     if (copy_phase && *quoted)
1009         ADDCH(dialect->quotechar);
1010 
1011     /* Copy/count field data */
1012     for (i = 0;; i++) {
1013         char c = field[i];
1014         int want_escape = 0;
1015 
1016         if (c == '\0')
1017             break;
1018 
1019         if (c == dialect->delimiter ||
1020             c == dialect->escapechar ||
1021             c == dialect->quotechar ||
1022             strchr(lineterm, c)) {
1023             if (dialect->quoting == QUOTE_NONE)
1024                 want_escape = 1;
1025             else {
1026                 if (c == dialect->quotechar) {
1027                     if (dialect->doublequote)
1028                         ADDCH(dialect->quotechar);
1029                     else
1030                         want_escape = 1;
1031                 }
1032                 if (!want_escape)
1033                     *quoted = 1;
1034             }
1035             if (want_escape) {
1036                 if (!dialect->escapechar) {
1037                     PyErr_Format(error_obj,
1038                                  "need to escape, but no escapechar set");
1039                     return -1;
1040                 }
1041                 ADDCH(dialect->escapechar);
1042             }
1043         }
1044         /* Copy field character into record buffer.
1045          */
1046         ADDCH(c);
1047     }
1048 
1049     /* If field is empty check if it needs to be quoted.
1050      */
1051     if (i == 0 && quote_empty) {
1052         if (dialect->quoting == QUOTE_NONE) {
1053             PyErr_Format(error_obj,
1054                          "single empty field record must be quoted");
1055             return -1;
1056         }
1057         else
1058             *quoted = 1;
1059     }
1060 
1061     if (*quoted) {
1062         if (copy_phase)
1063             ADDCH(dialect->quotechar);
1064         else
1065             rec_len += 2;
1066     }
1067     return rec_len;
1068 #undef ADDCH
1069 }
1070 
1071 static int
join_check_rec_size(WriterObj * self,int rec_len)1072 join_check_rec_size(WriterObj *self, int rec_len)
1073 {
1074 
1075     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1076         PyErr_NoMemory();
1077         return 0;
1078     }
1079 
1080     if (rec_len > self->rec_size) {
1081         if (self->rec_size == 0) {
1082             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1083             if (self->rec != NULL)
1084                 PyMem_Free(self->rec);
1085             self->rec = PyMem_Malloc(self->rec_size);
1086         }
1087         else {
1088             char *old_rec = self->rec;
1089 
1090             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1091             self->rec = PyMem_Realloc(self->rec, self->rec_size);
1092             if (self->rec == NULL)
1093                 PyMem_Free(old_rec);
1094         }
1095         if (self->rec == NULL) {
1096             PyErr_NoMemory();
1097             return 0;
1098         }
1099     }
1100     return 1;
1101 }
1102 
1103 static int
join_append(WriterObj * self,char * field,int * quoted,int quote_empty)1104 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1105 {
1106     int rec_len;
1107 
1108     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1109     if (rec_len < 0)
1110         return 0;
1111 
1112     /* grow record buffer if necessary */
1113     if (!join_check_rec_size(self, rec_len))
1114         return 0;
1115 
1116     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1117     self->num_fields++;
1118 
1119     return 1;
1120 }
1121 
1122 static int
join_append_lineterminator(WriterObj * self)1123 join_append_lineterminator(WriterObj *self)
1124 {
1125     int terminator_len;
1126     char *terminator;
1127 
1128     terminator_len = PyString_Size(self->dialect->lineterminator);
1129     if (terminator_len == -1)
1130         return 0;
1131 
1132     /* grow record buffer if necessary */
1133     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1134         return 0;
1135 
1136     terminator = PyString_AsString(self->dialect->lineterminator);
1137     if (terminator == NULL)
1138         return 0;
1139     memmove(self->rec + self->rec_len, terminator, terminator_len);
1140     self->rec_len += terminator_len;
1141 
1142     return 1;
1143 }
1144 
1145 PyDoc_STRVAR(csv_writerow_doc,
1146 "writerow(sequence)\n"
1147 "\n"
1148 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1149 "elements will be converted to string.");
1150 
1151 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1152 csv_writerow(WriterObj *self, PyObject *seq)
1153 {
1154     DialectObj *dialect = self->dialect;
1155     int len, i;
1156 
1157     if (!PySequence_Check(seq))
1158         return PyErr_Format(error_obj, "sequence expected");
1159 
1160     len = PySequence_Length(seq);
1161     if (len < 0)
1162         return NULL;
1163 
1164     /* Join all fields in internal buffer.
1165      */
1166     join_reset(self);
1167     for (i = 0; i < len; i++) {
1168         PyObject *field;
1169         int append_ok;
1170         int quoted;
1171 
1172         field = PySequence_GetItem(seq, i);
1173         if (field == NULL)
1174             return NULL;
1175 
1176         switch (dialect->quoting) {
1177         case QUOTE_NONNUMERIC:
1178             quoted = !PyNumber_Check(field);
1179             break;
1180         case QUOTE_ALL:
1181             quoted = 1;
1182             break;
1183         default:
1184             quoted = 0;
1185             break;
1186         }
1187 
1188         if (PyString_Check(field)) {
1189             append_ok = join_append(self,
1190                                     PyString_AS_STRING(field),
1191                                     &quoted, len == 1);
1192             Py_DECREF(field);
1193         }
1194         else if (field == Py_None) {
1195             append_ok = join_append(self, "", &quoted, len == 1);
1196             Py_DECREF(field);
1197         }
1198         else {
1199             PyObject *str;
1200 
1201             if (PyFloat_Check(field)) {
1202                 str = PyObject_Repr(field);
1203             } else {
1204                 str = PyObject_Str(field);
1205             }
1206             Py_DECREF(field);
1207             if (str == NULL)
1208                 return NULL;
1209 
1210             append_ok = join_append(self, PyString_AS_STRING(str),
1211                                     &quoted, len == 1);
1212             Py_DECREF(str);
1213         }
1214         if (!append_ok)
1215             return NULL;
1216     }
1217 
1218     /* Add line terminator.
1219      */
1220     if (!join_append_lineterminator(self))
1221         return 0;
1222 
1223     return PyObject_CallFunction(self->writeline,
1224                                  "(s#)", self->rec, self->rec_len);
1225 }
1226 
1227 PyDoc_STRVAR(csv_writerows_doc,
1228 "writerows(sequence of sequences)\n"
1229 "\n"
1230 "Construct and write a series of sequences to a csv file.  Non-string\n"
1231 "elements will be converted to string.");
1232 
1233 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1234 csv_writerows(WriterObj *self, PyObject *seqseq)
1235 {
1236     PyObject *row_iter, *row_obj, *result;
1237 
1238     row_iter = PyObject_GetIter(seqseq);
1239     if (row_iter == NULL) {
1240         PyErr_SetString(PyExc_TypeError,
1241                         "writerows() argument must be iterable");
1242         return NULL;
1243     }
1244     while ((row_obj = PyIter_Next(row_iter))) {
1245         result = csv_writerow(self, row_obj);
1246         Py_DECREF(row_obj);
1247         if (!result) {
1248             Py_DECREF(row_iter);
1249             return NULL;
1250         }
1251         else
1252              Py_DECREF(result);
1253     }
1254     Py_DECREF(row_iter);
1255     if (PyErr_Occurred())
1256         return NULL;
1257     Py_INCREF(Py_None);
1258     return Py_None;
1259 }
1260 
1261 static struct PyMethodDef Writer_methods[] = {
1262     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1263     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1264     { NULL, NULL }
1265 };
1266 
1267 #define W_OFF(x) offsetof(WriterObj, x)
1268 
1269 static struct PyMemberDef Writer_memberlist[] = {
1270     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1271     { NULL }
1272 };
1273 
1274 static void
Writer_dealloc(WriterObj * self)1275 Writer_dealloc(WriterObj *self)
1276 {
1277     PyObject_GC_UnTrack(self);
1278     Py_XDECREF(self->dialect);
1279     Py_XDECREF(self->writeline);
1280     if (self->rec != NULL)
1281         PyMem_Free(self->rec);
1282     PyObject_GC_Del(self);
1283 }
1284 
1285 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1286 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1287 {
1288     Py_VISIT(self->dialect);
1289     Py_VISIT(self->writeline);
1290     return 0;
1291 }
1292 
1293 static int
Writer_clear(WriterObj * self)1294 Writer_clear(WriterObj *self)
1295 {
1296     Py_CLEAR(self->dialect);
1297     Py_CLEAR(self->writeline);
1298     return 0;
1299 }
1300 
1301 PyDoc_STRVAR(Writer_Type_doc,
1302 "CSV writer\n"
1303 "\n"
1304 "Writer objects are responsible for generating tabular data\n"
1305 "in CSV format from sequence input.\n"
1306 );
1307 
1308 static PyTypeObject Writer_Type = {
1309     PyVarObject_HEAD_INIT(NULL, 0)
1310     "_csv.writer",                          /*tp_name*/
1311     sizeof(WriterObj),                      /*tp_basicsize*/
1312     0,                                      /*tp_itemsize*/
1313     /* methods */
1314     (destructor)Writer_dealloc,             /*tp_dealloc*/
1315     (printfunc)0,                           /*tp_print*/
1316     (getattrfunc)0,                         /*tp_getattr*/
1317     (setattrfunc)0,                         /*tp_setattr*/
1318     (cmpfunc)0,                             /*tp_compare*/
1319     (reprfunc)0,                            /*tp_repr*/
1320     0,                                      /*tp_as_number*/
1321     0,                                      /*tp_as_sequence*/
1322     0,                                      /*tp_as_mapping*/
1323     (hashfunc)0,                            /*tp_hash*/
1324     (ternaryfunc)0,                         /*tp_call*/
1325     (reprfunc)0,                            /*tp_str*/
1326     0,                                      /*tp_getattro*/
1327     0,                                      /*tp_setattro*/
1328     0,                                      /*tp_as_buffer*/
1329     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1330         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1331     Writer_Type_doc,
1332     (traverseproc)Writer_traverse,          /*tp_traverse*/
1333     (inquiry)Writer_clear,                  /*tp_clear*/
1334     0,                                      /*tp_richcompare*/
1335     0,                                      /*tp_weaklistoffset*/
1336     (getiterfunc)0,                         /*tp_iter*/
1337     (getiterfunc)0,                         /*tp_iternext*/
1338     Writer_methods,                         /*tp_methods*/
1339     Writer_memberlist,                      /*tp_members*/
1340     0,                                      /*tp_getset*/
1341 };
1342 
1343 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1344 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1345 {
1346     PyObject * output_file, * dialect = NULL;
1347     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1348 
1349     if (!self)
1350         return NULL;
1351 
1352     self->dialect = NULL;
1353     self->writeline = NULL;
1354 
1355     self->rec = NULL;
1356     self->rec_size = 0;
1357     self->rec_len = 0;
1358     self->num_fields = 0;
1359 
1360     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1361         Py_DECREF(self);
1362         return NULL;
1363     }
1364     self->writeline = PyObject_GetAttrString(output_file, "write");
1365     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1366         PyErr_SetString(PyExc_TypeError,
1367                         "argument 1 must have a \"write\" method");
1368         Py_DECREF(self);
1369         return NULL;
1370     }
1371     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1372     if (self->dialect == NULL) {
1373         Py_DECREF(self);
1374         return NULL;
1375     }
1376     PyObject_GC_Track(self);
1377     return (PyObject *)self;
1378 }
1379 
1380 /*
1381  * DIALECT REGISTRY
1382  */
1383 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1384 csv_list_dialects(PyObject *module, PyObject *args)
1385 {
1386     return PyDict_Keys(dialects);
1387 }
1388 
1389 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1390 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1391 {
1392     PyObject *name_obj, *dialect_obj = NULL;
1393     PyObject *dialect;
1394 
1395     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1396         return NULL;
1397     if (!IS_BASESTRING(name_obj)) {
1398         PyErr_SetString(PyExc_TypeError,
1399                         "dialect name must be a string or unicode");
1400         return NULL;
1401     }
1402     dialect = _call_dialect(dialect_obj, kwargs);
1403     if (dialect == NULL)
1404         return NULL;
1405     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1406         Py_DECREF(dialect);
1407         return NULL;
1408     }
1409     Py_DECREF(dialect);
1410     Py_INCREF(Py_None);
1411     return Py_None;
1412 }
1413 
1414 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1415 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1416 {
1417     if (PyDict_DelItem(dialects, name_obj) < 0)
1418         return PyErr_Format(error_obj, "unknown dialect");
1419     Py_INCREF(Py_None);
1420     return Py_None;
1421 }
1422 
1423 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1424 csv_get_dialect(PyObject *module, PyObject *name_obj)
1425 {
1426     return get_dialect_from_registry(name_obj);
1427 }
1428 
1429 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1430 csv_field_size_limit(PyObject *module, PyObject *args)
1431 {
1432     PyObject *new_limit = NULL;
1433     long old_limit = field_limit;
1434 
1435     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1436         return NULL;
1437     if (new_limit != NULL) {
1438         if (!PyInt_Check(new_limit)) {
1439             PyErr_Format(PyExc_TypeError,
1440                          "limit must be an integer");
1441             return NULL;
1442         }
1443         field_limit = PyInt_AsLong(new_limit);
1444     }
1445     return PyInt_FromLong(old_limit);
1446 }
1447 
1448 /*
1449  * MODULE
1450  */
1451 
1452 PyDoc_STRVAR(csv_module_doc,
1453 "CSV parsing and writing.\n"
1454 "\n"
1455 "This module provides classes that assist in the reading and writing\n"
1456 "of Comma Separated Value (CSV) files, and implements the interface\n"
1457 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1458 "the format is not formally defined by a stable specification and\n"
1459 "is subtle enough that parsing lines of a CSV file with something\n"
1460 "like line.split(\",\") is bound to fail.  The module supports three\n"
1461 "basic APIs: reading, writing, and registration of dialects.\n"
1462 "\n"
1463 "\n"
1464 "DIALECT REGISTRATION:\n"
1465 "\n"
1466 "Readers and writers support a dialect argument, which is a convenient\n"
1467 "handle on a group of settings.  When the dialect argument is a string,\n"
1468 "it identifies one of the dialects previously registered with the module.\n"
1469 "If it is a class or instance, the attributes of the argument are used as\n"
1470 "the settings for the reader or writer:\n"
1471 "\n"
1472 "    class excel:\n"
1473 "        delimiter = ','\n"
1474 "        quotechar = '\"'\n"
1475 "        escapechar = None\n"
1476 "        doublequote = True\n"
1477 "        skipinitialspace = False\n"
1478 "        lineterminator = '\\r\\n'\n"
1479 "        quoting = QUOTE_MINIMAL\n"
1480 "\n"
1481 "SETTINGS:\n"
1482 "\n"
1483 "    * quotechar - specifies a one-character string to use as the \n"
1484 "        quoting character.  It defaults to '\"'.\n"
1485 "    * delimiter - specifies a one-character string to use as the \n"
1486 "        field separator.  It defaults to ','.\n"
1487 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1488 "        immediately follows a delimiter.  It defaults to False, which\n"
1489 "        means that whitespace immediately following a delimiter is part\n"
1490 "        of the following field.\n"
1491 "    * lineterminator -  specifies the character sequence which should \n"
1492 "        terminate rows.\n"
1493 "    * quoting - controls when quotes should be generated by the writer.\n"
1494 "        It can take on any of the following module constants:\n"
1495 "\n"
1496 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1497 "            field contains either the quotechar or the delimiter\n"
1498 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1499 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1500 "            fields which do not parse as integers or floating point\n"
1501 "            numbers.\n"
1502 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1503 "    * escapechar - specifies a one-character string used to escape \n"
1504 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1505 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1506 "        True, two consecutive quotes are interpreted as one during read,\n"
1507 "        and when writing, each quote character embedded in the data is\n"
1508 "        written as two quotes\n");
1509 
1510 PyDoc_STRVAR(csv_reader_doc,
1511 "    csv_reader = reader(iterable [, dialect='excel']\n"
1512 "                        [optional keyword args])\n"
1513 "    for row in csv_reader:\n"
1514 "        process(row)\n"
1515 "\n"
1516 "The \"iterable\" argument can be any object that returns a line\n"
1517 "of input for each iteration, such as a file object or a list.  The\n"
1518 "optional \"dialect\" parameter is discussed below.  The function\n"
1519 "also accepts optional keyword arguments which override settings\n"
1520 "provided by the dialect.\n"
1521 "\n"
1522 "The returned object is an iterator.  Each iteration returns a row\n"
1523 "of the CSV file (which can span multiple input lines):\n");
1524 
1525 PyDoc_STRVAR(csv_writer_doc,
1526 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1527 "                            [optional keyword args])\n"
1528 "    for row in sequence:\n"
1529 "        csv_writer.writerow(row)\n"
1530 "\n"
1531 "    [or]\n"
1532 "\n"
1533 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1534 "                            [optional keyword args])\n"
1535 "    csv_writer.writerows(rows)\n"
1536 "\n"
1537 "The \"fileobj\" argument can be any object that supports the file API.\n");
1538 
1539 PyDoc_STRVAR(csv_list_dialects_doc,
1540 "Return a list of all know dialect names.\n"
1541 "    names = csv.list_dialects()");
1542 
1543 PyDoc_STRVAR(csv_get_dialect_doc,
1544 "Return the dialect instance associated with name.\n"
1545 "    dialect = csv.get_dialect(name)");
1546 
1547 PyDoc_STRVAR(csv_register_dialect_doc,
1548 "Create a mapping from a string name to a dialect class.\n"
1549 "    dialect = csv.register_dialect(name, dialect)");
1550 
1551 PyDoc_STRVAR(csv_unregister_dialect_doc,
1552 "Delete the name/dialect mapping associated with a string name.\n"
1553 "    csv.unregister_dialect(name)");
1554 
1555 PyDoc_STRVAR(csv_field_size_limit_doc,
1556 "Sets an upper limit on parsed fields.\n"
1557 "    csv.field_size_limit([limit])\n"
1558 "\n"
1559 "Returns old limit. If limit is not given, no new limit is set and\n"
1560 "the old limit is returned");
1561 
1562 static struct PyMethodDef csv_methods[] = {
1563     { "reader", (PyCFunction)csv_reader,
1564         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1565     { "writer", (PyCFunction)csv_writer,
1566         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1567     { "list_dialects", (PyCFunction)csv_list_dialects,
1568         METH_NOARGS, csv_list_dialects_doc},
1569     { "register_dialect", (PyCFunction)csv_register_dialect,
1570         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1571     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1572         METH_O, csv_unregister_dialect_doc},
1573     { "get_dialect", (PyCFunction)csv_get_dialect,
1574         METH_O, csv_get_dialect_doc},
1575     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1576         METH_VARARGS, csv_field_size_limit_doc},
1577     { NULL, NULL }
1578 };
1579 
1580 PyMODINIT_FUNC
init_csv(void)1581 init_csv(void)
1582 {
1583     PyObject *module;
1584     StyleDesc *style;
1585 
1586     if (PyType_Ready(&Dialect_Type) < 0)
1587         return;
1588 
1589     if (PyType_Ready(&Reader_Type) < 0)
1590         return;
1591 
1592     if (PyType_Ready(&Writer_Type) < 0)
1593         return;
1594 
1595     /* Create the module and add the functions */
1596     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1597     if (module == NULL)
1598         return;
1599 
1600     /* Add version to the module. */
1601     if (PyModule_AddStringConstant(module, "__version__",
1602                                    MODULE_VERSION) == -1)
1603         return;
1604 
1605     /* Add _dialects dictionary */
1606     dialects = PyDict_New();
1607     if (dialects == NULL)
1608         return;
1609     if (PyModule_AddObject(module, "_dialects", dialects))
1610         return;
1611 
1612     /* Add quote styles into dictionary */
1613     for (style = quote_styles; style->name; style++) {
1614         if (PyModule_AddIntConstant(module, style->name,
1615                                     style->style) == -1)
1616             return;
1617     }
1618 
1619     /* Add the Dialect type */
1620     Py_INCREF(&Dialect_Type);
1621     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1622         return;
1623 
1624     /* Add the CSV exception object to the module. */
1625     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1626     if (error_obj == NULL)
1627         return;
1628     PyModule_AddObject(module, "Error", error_obj);
1629 }
1630