• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
11 **** 2.2.
12 
13 */
14 
15 #define MODULE_VERSION "1.0"
16 
17 #include "Python.h"
18 #include "structmember.h"
19 
20 
21 /* begin 2.2 compatibility macros */
22 #ifndef PyDoc_STRVAR
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
28 #else
29 #define PyDoc_STR(str) ""
30 #endif
31 #endif /* ifndef PyDoc_STRVAR */
32 
33 #ifndef PyMODINIT_FUNC
34 #       if defined(__cplusplus)
35 #               define PyMODINIT_FUNC extern "C" void
36 #       else /* __cplusplus */
37 #               define PyMODINIT_FUNC void
38 #       endif /* __cplusplus */
39 #endif
40 
41 #ifndef Py_CLEAR
42 #define Py_CLEAR(op)                                            \
43     do {                                                        \
44         if (op) {                                               \
45             PyObject *tmp = (PyObject *)(op);                   \
46             (op) = NULL;                                        \
47             Py_DECREF(tmp);                                     \
48         }                                                       \
49     } while (0)
50 #endif
51 #ifndef Py_VISIT
52 #define Py_VISIT(op)                                                    \
53     do {                                                                \
54         if (op) {                                                       \
55             int vret = visit((PyObject *)(op), arg);                    \
56             if (vret)                                                   \
57                 return vret;                                            \
58         }                                                               \
59     } while (0)
60 #endif
61 
62 /* end 2.2 compatibility macros */
63 
64 #define IS_BASESTRING(o) \
65     PyObject_TypeCheck(o, &PyBaseString_Type)
66 
67 static PyObject *error_obj;     /* CSV exception */
68 static PyObject *dialects;      /* Dialect registry */
69 static long field_limit = 128 * 1024;   /* max parsed field size */
70 
71 typedef enum {
72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74     EAT_CRNL
75 } ParserState;
76 
77 typedef enum {
78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79 } QuoteStyle;
80 
81 typedef struct {
82     QuoteStyle style;
83     char *name;
84 } StyleDesc;
85 
86 static StyleDesc quote_styles[] = {
87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
88     { QUOTE_ALL,        "QUOTE_ALL" },
89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90     { QUOTE_NONE,       "QUOTE_NONE" },
91     { 0 }
92 };
93 
94 typedef struct {
95     PyObject_HEAD
96 
97     int doublequote;            /* is " represented by ""? */
98     char delimiter;             /* field separator */
99     char quotechar;             /* quote character */
100     char escapechar;            /* escape character */
101     int skipinitialspace;       /* ignore spaces following delimiter? */
102     PyObject *lineterminator; /* string to write between records */
103     int quoting;                /* style of quoting to write */
104 
105     int strict;                 /* raise exception on bad CSV */
106 } DialectObj;
107 
108 staticforward PyTypeObject Dialect_Type;
109 
110 typedef struct {
111     PyObject_HEAD
112 
113     PyObject *input_iter;   /* iterate over this for input lines */
114 
115     DialectObj *dialect;    /* parsing dialect */
116 
117     PyObject *fields;           /* field list for current record */
118     ParserState state;          /* current CSV parse state */
119     char *field;                /* build current field in here */
120     int field_size;             /* size of allocated buffer */
121     int field_len;              /* length of current field */
122     int numeric_field;          /* treat field as numeric */
123     unsigned long line_num;     /* Source-file line number */
124 } ReaderObj;
125 
126 staticforward PyTypeObject Reader_Type;
127 
128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
129 
130 typedef struct {
131     PyObject_HEAD
132 
133     PyObject *writeline;    /* write output lines to this file */
134 
135     DialectObj *dialect;    /* parsing dialect */
136 
137     char *rec;                  /* buffer for parser.join */
138     int rec_size;               /* size of allocated record */
139     int rec_len;                /* length of record */
140     int num_fields;             /* number of fields in record */
141 } WriterObj;
142 
143 staticforward PyTypeObject Writer_Type;
144 
145 /*
146  * DIALECT class
147  */
148 
149 static PyObject *
get_dialect_from_registry(PyObject * name_obj)150 get_dialect_from_registry(PyObject * name_obj)
151 {
152     PyObject *dialect_obj;
153 
154     dialect_obj = PyDict_GetItem(dialects, name_obj);
155     if (dialect_obj == NULL) {
156         if (!PyErr_Occurred())
157             PyErr_Format(error_obj, "unknown dialect");
158     }
159     else
160         Py_INCREF(dialect_obj);
161     return dialect_obj;
162 }
163 
164 static PyObject *
get_string(PyObject * str)165 get_string(PyObject *str)
166 {
167     Py_XINCREF(str);
168     return str;
169 }
170 
171 static PyObject *
get_nullchar_as_None(char c)172 get_nullchar_as_None(char c)
173 {
174     if (c == '\0') {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     else
179         return PyString_FromStringAndSize((char*)&c, 1);
180 }
181 
182 static PyObject *
Dialect_get_lineterminator(DialectObj * self)183 Dialect_get_lineterminator(DialectObj *self)
184 {
185     return get_string(self->lineterminator);
186 }
187 
188 static PyObject *
Dialect_get_escapechar(DialectObj * self)189 Dialect_get_escapechar(DialectObj *self)
190 {
191     return get_nullchar_as_None(self->escapechar);
192 }
193 
194 static PyObject *
Dialect_get_quotechar(DialectObj * self)195 Dialect_get_quotechar(DialectObj *self)
196 {
197     return get_nullchar_as_None(self->quotechar);
198 }
199 
200 static PyObject *
Dialect_get_quoting(DialectObj * self)201 Dialect_get_quoting(DialectObj *self)
202 {
203     return PyInt_FromLong(self->quoting);
204 }
205 
206 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
208 {
209     if (src == NULL)
210         *target = dflt;
211     else {
212         int b = PyObject_IsTrue(src);
213         if (b < 0)
214             return -1;
215         *target = b;
216     }
217     return 0;
218 }
219 
220 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)221 _set_int(const char *name, int *target, PyObject *src, int dflt)
222 {
223     int value;
224     if (src == NULL)
225         *target = dflt;
226     else {
227         if (!_PyAnyInt_Check(src)) {
228             PyErr_Format(PyExc_TypeError,
229                          "\"%s\" must be an integer", name);
230             return -1;
231         }
232         value = PyInt_AsLong(src);
233         if (value == -1 && PyErr_Occurred())
234             return -1;
235         *target = value;
236     }
237     return 0;
238 }
239 
240 static int
_set_char(const char * name,char * target,PyObject * src,char dflt)241 _set_char(const char *name, char *target, PyObject *src, char dflt)
242 {
243     if (src == NULL)
244         *target = dflt;
245     else {
246         *target = '\0';
247         if (src != Py_None) {
248             Py_ssize_t len;
249             if (!PyString_Check(src)) {
250                 PyErr_Format(PyExc_TypeError,
251                     "\"%s\" must be string, not %.200s", name,
252                     src->ob_type->tp_name);
253                 return -1;
254             }
255             len = PyString_GET_SIZE(src);
256             if (len > 1) {
257                 PyErr_Format(PyExc_TypeError,
258                     "\"%s\" must be an 1-character string",
259                     name);
260                 return -1;
261             }
262             if (len > 0)
263                 *target = *PyString_AS_STRING(src);
264         }
265     }
266     return 0;
267 }
268 
269 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)270 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
271 {
272     if (src == NULL)
273         *target = PyString_FromString(dflt);
274     else {
275         if (src == Py_None)
276             *target = NULL;
277         else if (!IS_BASESTRING(src)) {
278             PyErr_Format(PyExc_TypeError,
279                          "\"%s\" must be a string", name);
280             return -1;
281         }
282         else {
283             Py_INCREF(src);
284             Py_XSETREF(*target, src);
285         }
286     }
287     return 0;
288 }
289 
290 static int
dialect_check_quoting(int quoting)291 dialect_check_quoting(int quoting)
292 {
293     StyleDesc *qs = quote_styles;
294 
295     for (qs = quote_styles; qs->name; qs++) {
296         if (qs->style == quoting)
297             return 0;
298     }
299     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
300     return -1;
301 }
302 
303 #define D_OFF(x) offsetof(DialectObj, x)
304 
305 static struct PyMemberDef Dialect_memberlist[] = {
306     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
307     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
308     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
309     { "strict",             T_INT, D_OFF(strict), READONLY },
310     { NULL }
311 };
312 
313 static PyGetSetDef Dialect_getsetlist[] = {
314     { "escapechar",             (getter)Dialect_get_escapechar},
315     { "lineterminator",         (getter)Dialect_get_lineterminator},
316     { "quotechar",              (getter)Dialect_get_quotechar},
317     { "quoting",                (getter)Dialect_get_quoting},
318     {NULL},
319 };
320 
321 static void
Dialect_dealloc(DialectObj * self)322 Dialect_dealloc(DialectObj *self)
323 {
324     Py_XDECREF(self->lineterminator);
325     Py_TYPE(self)->tp_free((PyObject *)self);
326 }
327 
328 static char *dialect_kws[] = {
329     "dialect",
330     "delimiter",
331     "doublequote",
332     "escapechar",
333     "lineterminator",
334     "quotechar",
335     "quoting",
336     "skipinitialspace",
337     "strict",
338     NULL
339 };
340 
341 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)342 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
343 {
344     DialectObj *self;
345     PyObject *ret = NULL;
346     PyObject *dialect = NULL;
347     PyObject *delimiter = NULL;
348     PyObject *doublequote = NULL;
349     PyObject *escapechar = NULL;
350     PyObject *lineterminator = NULL;
351     PyObject *quotechar = NULL;
352     PyObject *quoting = NULL;
353     PyObject *skipinitialspace = NULL;
354     PyObject *strict = NULL;
355 
356     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
357                                      "|OOOOOOOOO", dialect_kws,
358                                      &dialect,
359                                      &delimiter,
360                                      &doublequote,
361                                      &escapechar,
362                                      &lineterminator,
363                                      &quotechar,
364                                      &quoting,
365                                      &skipinitialspace,
366                                      &strict))
367         return NULL;
368 
369     if (dialect != NULL) {
370         if (IS_BASESTRING(dialect)) {
371             dialect = get_dialect_from_registry(dialect);
372             if (dialect == NULL)
373                 return NULL;
374         }
375         else
376             Py_INCREF(dialect);
377         /* Can we reuse this instance? */
378         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
379             delimiter == 0 &&
380             doublequote == 0 &&
381             escapechar == 0 &&
382             lineterminator == 0 &&
383             quotechar == 0 &&
384             quoting == 0 &&
385             skipinitialspace == 0 &&
386             strict == 0)
387             return dialect;
388     }
389 
390     self = (DialectObj *)type->tp_alloc(type, 0);
391     if (self == NULL) {
392         Py_XDECREF(dialect);
393         return NULL;
394     }
395     self->lineterminator = NULL;
396 
397     Py_XINCREF(delimiter);
398     Py_XINCREF(doublequote);
399     Py_XINCREF(escapechar);
400     Py_XINCREF(lineterminator);
401     Py_XINCREF(quotechar);
402     Py_XINCREF(quoting);
403     Py_XINCREF(skipinitialspace);
404     Py_XINCREF(strict);
405     if (dialect != NULL) {
406 #define DIALECT_GETATTR(v, n) \
407         if (v == NULL) \
408             v = PyObject_GetAttrString(dialect, n)
409         DIALECT_GETATTR(delimiter, "delimiter");
410         DIALECT_GETATTR(doublequote, "doublequote");
411         DIALECT_GETATTR(escapechar, "escapechar");
412         DIALECT_GETATTR(lineterminator, "lineterminator");
413         DIALECT_GETATTR(quotechar, "quotechar");
414         DIALECT_GETATTR(quoting, "quoting");
415         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
416         DIALECT_GETATTR(strict, "strict");
417         PyErr_Clear();
418     }
419 
420     /* check types and convert to C values */
421 #define DIASET(meth, name, target, src, dflt) \
422     if (meth(name, target, src, dflt)) \
423         goto err
424     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
425     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
426     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
427     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
428     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
429     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
430     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
431     DIASET(_set_bool, "strict", &self->strict, strict, 0);
432 
433     /* validate options */
434     if (dialect_check_quoting(self->quoting))
435         goto err;
436     if (self->delimiter == 0) {
437         PyErr_SetString(PyExc_TypeError,
438                         "\"delimiter\" must be an 1-character string");
439         goto err;
440     }
441     if (quotechar == Py_None && quoting == NULL)
442         self->quoting = QUOTE_NONE;
443     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
444         PyErr_SetString(PyExc_TypeError,
445                         "quotechar must be set if quoting enabled");
446         goto err;
447     }
448     if (self->lineterminator == 0) {
449         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
450         goto err;
451     }
452 
453     ret = (PyObject *)self;
454     Py_INCREF(self);
455 err:
456     Py_XDECREF(self);
457     Py_XDECREF(dialect);
458     Py_XDECREF(delimiter);
459     Py_XDECREF(doublequote);
460     Py_XDECREF(escapechar);
461     Py_XDECREF(lineterminator);
462     Py_XDECREF(quotechar);
463     Py_XDECREF(quoting);
464     Py_XDECREF(skipinitialspace);
465     Py_XDECREF(strict);
466     return ret;
467 }
468 
469 
470 PyDoc_STRVAR(Dialect_Type_doc,
471 "CSV dialect\n"
472 "\n"
473 "The Dialect type records CSV parsing and generation options.\n");
474 
475 static PyTypeObject Dialect_Type = {
476     PyVarObject_HEAD_INIT(NULL, 0)
477     "_csv.Dialect",                         /* tp_name */
478     sizeof(DialectObj),                     /* tp_basicsize */
479     0,                                      /* tp_itemsize */
480     /*  methods  */
481     (destructor)Dialect_dealloc,            /* tp_dealloc */
482     (printfunc)0,                           /* tp_print */
483     (getattrfunc)0,                         /* tp_getattr */
484     (setattrfunc)0,                         /* tp_setattr */
485     (cmpfunc)0,                             /* tp_compare */
486     (reprfunc)0,                            /* tp_repr */
487     0,                                      /* tp_as_number */
488     0,                                      /* tp_as_sequence */
489     0,                                      /* tp_as_mapping */
490     (hashfunc)0,                            /* tp_hash */
491     (ternaryfunc)0,                         /* tp_call */
492     (reprfunc)0,                                /* tp_str */
493     0,                                      /* tp_getattro */
494     0,                                      /* tp_setattro */
495     0,                                      /* tp_as_buffer */
496     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
497     Dialect_Type_doc,                       /* tp_doc */
498     0,                                      /* tp_traverse */
499     0,                                      /* tp_clear */
500     0,                                      /* tp_richcompare */
501     0,                                      /* tp_weaklistoffset */
502     0,                                      /* tp_iter */
503     0,                                      /* tp_iternext */
504     0,                                          /* tp_methods */
505     Dialect_memberlist,                     /* tp_members */
506     Dialect_getsetlist,                     /* tp_getset */
507     0,                                          /* tp_base */
508     0,                                          /* tp_dict */
509     0,                                          /* tp_descr_get */
510     0,                                          /* tp_descr_set */
511     0,                                          /* tp_dictoffset */
512     0,                                          /* tp_init */
513     0,                                          /* tp_alloc */
514     dialect_new,                                /* tp_new */
515     0,                                          /* tp_free */
516 };
517 
518 /*
519  * Return an instance of the dialect type, given a Python instance or kwarg
520  * description of the dialect
521  */
522 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)523 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
524 {
525     PyObject *ctor_args;
526     PyObject *dialect;
527 
528     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
529     if (ctor_args == NULL)
530         return NULL;
531     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
532     Py_DECREF(ctor_args);
533     return dialect;
534 }
535 
536 /*
537  * READER
538  */
539 static int
parse_save_field(ReaderObj * self)540 parse_save_field(ReaderObj *self)
541 {
542     PyObject *field;
543 
544     field = PyString_FromStringAndSize(self->field, self->field_len);
545     if (field == NULL)
546         return -1;
547     self->field_len = 0;
548     if (self->numeric_field) {
549         PyObject *tmp;
550 
551         self->numeric_field = 0;
552         tmp = PyNumber_Float(field);
553         if (tmp == NULL) {
554             Py_DECREF(field);
555             return -1;
556         }
557         Py_DECREF(field);
558         field = tmp;
559     }
560     PyList_Append(self->fields, field);
561     Py_DECREF(field);
562     return 0;
563 }
564 
565 static int
parse_grow_buff(ReaderObj * self)566 parse_grow_buff(ReaderObj *self)
567 {
568     unsigned field_size_new;
569     char *field_new;
570 
571     assert((unsigned)self->field_size <= INT_MAX);
572 
573     field_size_new = self->field_size ? 2 * (unsigned)self->field_size : 4096;
574     if (field_size_new > INT_MAX) {
575         PyErr_NoMemory();
576         return 0;
577     }
578     field_new = (char *)PyMem_Realloc(self->field, field_size_new);
579     if (field_new == NULL) {
580         PyErr_NoMemory();
581         return 0;
582     }
583     self->field = field_new;
584     self->field_size = (int)field_size_new;
585     return 1;
586 }
587 
588 static int
parse_add_char(ReaderObj * self,char c)589 parse_add_char(ReaderObj *self, char c)
590 {
591     if (self->field_len >= field_limit) {
592         PyErr_Format(error_obj, "field larger than field limit (%ld)",
593                      field_limit);
594         return -1;
595     }
596     if (self->field_len == self->field_size && !parse_grow_buff(self))
597         return -1;
598     self->field[self->field_len++] = c;
599     return 0;
600 }
601 
602 static int
parse_process_char(ReaderObj * self,char c)603 parse_process_char(ReaderObj *self, char c)
604 {
605     DialectObj *dialect = self->dialect;
606 
607     switch (self->state) {
608     case START_RECORD:
609         /* start of record */
610         if (c == '\0')
611             /* empty line - return [] */
612             break;
613         else if (c == '\n' || c == '\r') {
614             self->state = EAT_CRNL;
615             break;
616         }
617         /* normal character - handle as START_FIELD */
618         self->state = START_FIELD;
619         /* fallthru */
620     case START_FIELD:
621         /* expecting field */
622         if (c == '\n' || c == '\r' || c == '\0') {
623             /* save empty field - return [fields] */
624             if (parse_save_field(self) < 0)
625                 return -1;
626             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
627         }
628         else if (c == dialect->quotechar &&
629                  dialect->quoting != QUOTE_NONE) {
630             /* start quoted field */
631             self->state = IN_QUOTED_FIELD;
632         }
633         else if (c == dialect->escapechar) {
634             /* possible escaped character */
635             self->state = ESCAPED_CHAR;
636         }
637         else if (c == ' ' && dialect->skipinitialspace)
638             /* ignore space at start of field */
639             ;
640         else if (c == dialect->delimiter) {
641             /* save empty field */
642             if (parse_save_field(self) < 0)
643                 return -1;
644         }
645         else {
646             /* begin new unquoted field */
647             if (dialect->quoting == QUOTE_NONNUMERIC)
648                 self->numeric_field = 1;
649             if (parse_add_char(self, c) < 0)
650                 return -1;
651             self->state = IN_FIELD;
652         }
653         break;
654 
655     case ESCAPED_CHAR:
656         if (c == '\0')
657             c = '\n';
658         if (parse_add_char(self, c) < 0)
659             return -1;
660         self->state = IN_FIELD;
661         break;
662 
663     case IN_FIELD:
664         /* in unquoted field */
665         if (c == '\n' || c == '\r' || c == '\0') {
666             /* end of line - return [fields] */
667             if (parse_save_field(self) < 0)
668                 return -1;
669             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
670         }
671         else if (c == dialect->escapechar) {
672             /* possible escaped character */
673             self->state = ESCAPED_CHAR;
674         }
675         else if (c == dialect->delimiter) {
676             /* save field - wait for new field */
677             if (parse_save_field(self) < 0)
678                 return -1;
679             self->state = START_FIELD;
680         }
681         else {
682             /* normal character - save in field */
683             if (parse_add_char(self, c) < 0)
684                 return -1;
685         }
686         break;
687 
688     case IN_QUOTED_FIELD:
689         /* in quoted field */
690         if (c == '\0')
691             ;
692         else if (c == dialect->escapechar) {
693             /* Possible escape character */
694             self->state = ESCAPE_IN_QUOTED_FIELD;
695         }
696         else if (c == dialect->quotechar &&
697                  dialect->quoting != QUOTE_NONE) {
698             if (dialect->doublequote) {
699                 /* doublequote; " represented by "" */
700                 self->state = QUOTE_IN_QUOTED_FIELD;
701             }
702             else {
703                 /* end of quote part of field */
704                 self->state = IN_FIELD;
705             }
706         }
707         else {
708             /* normal character - save in field */
709             if (parse_add_char(self, c) < 0)
710                 return -1;
711         }
712         break;
713 
714     case ESCAPE_IN_QUOTED_FIELD:
715         if (c == '\0')
716             c = '\n';
717         if (parse_add_char(self, c) < 0)
718             return -1;
719         self->state = IN_QUOTED_FIELD;
720         break;
721 
722     case QUOTE_IN_QUOTED_FIELD:
723         /* doublequote - seen a quote in a quoted field */
724         if (dialect->quoting != QUOTE_NONE &&
725             c == dialect->quotechar) {
726             /* save "" as " */
727             if (parse_add_char(self, c) < 0)
728                 return -1;
729             self->state = IN_QUOTED_FIELD;
730         }
731         else if (c == dialect->delimiter) {
732             /* save field - wait for new field */
733             if (parse_save_field(self) < 0)
734                 return -1;
735             self->state = START_FIELD;
736         }
737         else if (c == '\n' || c == '\r' || c == '\0') {
738             /* end of line - return [fields] */
739             if (parse_save_field(self) < 0)
740                 return -1;
741             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
742         }
743         else if (!dialect->strict) {
744             if (parse_add_char(self, c) < 0)
745                 return -1;
746             self->state = IN_FIELD;
747         }
748         else {
749             /* illegal */
750             PyErr_Format(error_obj, "'%c' expected after '%c'",
751                             dialect->delimiter,
752                             dialect->quotechar);
753             return -1;
754         }
755         break;
756 
757     case EAT_CRNL:
758         if (c == '\n' || c == '\r')
759             ;
760         else if (c == '\0')
761             self->state = START_RECORD;
762         else {
763             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
764             return -1;
765         }
766         break;
767 
768     }
769     return 0;
770 }
771 
772 static int
parse_reset(ReaderObj * self)773 parse_reset(ReaderObj *self)
774 {
775     Py_XSETREF(self->fields, PyList_New(0));
776     if (self->fields == NULL)
777         return -1;
778     self->field_len = 0;
779     self->state = START_RECORD;
780     self->numeric_field = 0;
781     return 0;
782 }
783 
784 static PyObject *
Reader_iternext(ReaderObj * self)785 Reader_iternext(ReaderObj *self)
786 {
787     PyObject *lineobj;
788     PyObject *fields = NULL;
789     char *line, c;
790     int linelen;
791 
792     if (parse_reset(self) < 0)
793         return NULL;
794     do {
795         lineobj = PyIter_Next(self->input_iter);
796         if (lineobj == NULL) {
797             /* End of input OR exception */
798             if (!PyErr_Occurred() && (self->field_len != 0 ||
799                                       self->state == IN_QUOTED_FIELD)) {
800                 if (self->dialect->strict)
801                     PyErr_SetString(error_obj, "unexpected end of data");
802                 else if (parse_save_field(self) >= 0 )
803                     break;
804             }
805             return NULL;
806         }
807         ++self->line_num;
808 
809         line = PyString_AsString(lineobj);
810         linelen = PyString_Size(lineobj);
811 
812         if (line == NULL || linelen < 0) {
813             Py_DECREF(lineobj);
814             return NULL;
815         }
816         while (linelen--) {
817             c = *line++;
818             if (c == '\0') {
819                 Py_DECREF(lineobj);
820                 PyErr_Format(error_obj,
821                              "line contains NULL byte");
822                 goto err;
823             }
824             if (parse_process_char(self, c) < 0) {
825                 Py_DECREF(lineobj);
826                 goto err;
827             }
828         }
829         Py_DECREF(lineobj);
830         if (parse_process_char(self, 0) < 0)
831             goto err;
832     } while (self->state != START_RECORD);
833 
834     fields = self->fields;
835     self->fields = NULL;
836 err:
837     return fields;
838 }
839 
840 static void
Reader_dealloc(ReaderObj * self)841 Reader_dealloc(ReaderObj *self)
842 {
843     PyObject_GC_UnTrack(self);
844     Py_XDECREF(self->dialect);
845     Py_XDECREF(self->input_iter);
846     Py_XDECREF(self->fields);
847     if (self->field != NULL)
848         PyMem_Free(self->field);
849     PyObject_GC_Del(self);
850 }
851 
852 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)853 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
854 {
855     Py_VISIT(self->dialect);
856     Py_VISIT(self->input_iter);
857     Py_VISIT(self->fields);
858     return 0;
859 }
860 
861 static int
Reader_clear(ReaderObj * self)862 Reader_clear(ReaderObj *self)
863 {
864     Py_CLEAR(self->dialect);
865     Py_CLEAR(self->input_iter);
866     Py_CLEAR(self->fields);
867     return 0;
868 }
869 
870 PyDoc_STRVAR(Reader_Type_doc,
871 "CSV reader\n"
872 "\n"
873 "Reader objects are responsible for reading and parsing tabular data\n"
874 "in CSV format.\n"
875 );
876 
877 static struct PyMethodDef Reader_methods[] = {
878     { NULL, NULL }
879 };
880 #define R_OFF(x) offsetof(ReaderObj, x)
881 
882 static struct PyMemberDef Reader_memberlist[] = {
883     { "dialect", T_OBJECT, R_OFF(dialect), RO },
884     { "line_num", T_ULONG, R_OFF(line_num), RO },
885     { NULL }
886 };
887 
888 
889 static PyTypeObject Reader_Type = {
890     PyVarObject_HEAD_INIT(NULL, 0)
891     "_csv.reader",                          /*tp_name*/
892     sizeof(ReaderObj),                      /*tp_basicsize*/
893     0,                                      /*tp_itemsize*/
894     /* methods */
895     (destructor)Reader_dealloc,             /*tp_dealloc*/
896     (printfunc)0,                           /*tp_print*/
897     (getattrfunc)0,                         /*tp_getattr*/
898     (setattrfunc)0,                         /*tp_setattr*/
899     (cmpfunc)0,                             /*tp_compare*/
900     (reprfunc)0,                            /*tp_repr*/
901     0,                                      /*tp_as_number*/
902     0,                                      /*tp_as_sequence*/
903     0,                                      /*tp_as_mapping*/
904     (hashfunc)0,                            /*tp_hash*/
905     (ternaryfunc)0,                         /*tp_call*/
906     (reprfunc)0,                                /*tp_str*/
907     0,                                      /*tp_getattro*/
908     0,                                      /*tp_setattro*/
909     0,                                      /*tp_as_buffer*/
910     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
911         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
912     Reader_Type_doc,                        /*tp_doc*/
913     (traverseproc)Reader_traverse,          /*tp_traverse*/
914     (inquiry)Reader_clear,                  /*tp_clear*/
915     0,                                      /*tp_richcompare*/
916     0,                                      /*tp_weaklistoffset*/
917     PyObject_SelfIter,                          /*tp_iter*/
918     (getiterfunc)Reader_iternext,           /*tp_iternext*/
919     Reader_methods,                         /*tp_methods*/
920     Reader_memberlist,                      /*tp_members*/
921     0,                                      /*tp_getset*/
922 
923 };
924 
925 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)926 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
927 {
928     PyObject * iterator, * dialect = NULL;
929     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
930 
931     if (!self)
932         return NULL;
933 
934     self->dialect = NULL;
935     self->fields = NULL;
936     self->input_iter = NULL;
937     self->field = NULL;
938     self->field_size = 0;
939     self->line_num = 0;
940 
941     if (parse_reset(self) < 0) {
942         Py_DECREF(self);
943         return NULL;
944     }
945 
946     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
947         Py_DECREF(self);
948         return NULL;
949     }
950     self->input_iter = PyObject_GetIter(iterator);
951     if (self->input_iter == NULL) {
952         PyErr_SetString(PyExc_TypeError,
953                         "argument 1 must be an iterator");
954         Py_DECREF(self);
955         return NULL;
956     }
957     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
958     if (self->dialect == NULL) {
959         Py_DECREF(self);
960         return NULL;
961     }
962 
963     PyObject_GC_Track(self);
964     return (PyObject *)self;
965 }
966 
967 /*
968  * WRITER
969  */
970 /* ---------------------------------------------------------------- */
971 static void
join_reset(WriterObj * self)972 join_reset(WriterObj *self)
973 {
974     self->rec_len = 0;
975     self->num_fields = 0;
976 }
977 
978 #define MEM_INCR 32768
979 
980 /* Calculate new record length or append field to record.  Return new
981  * record length.
982  */
983 static int
join_append_data(WriterObj * self,char * field,int quote_empty,int * quoted,int copy_phase)984 join_append_data(WriterObj *self, char *field, int quote_empty,
985                  int *quoted, int copy_phase)
986 {
987     DialectObj *dialect = self->dialect;
988     int i, rec_len;
989     char *lineterm;
990 
991 #define INCLEN \
992     do {\
993         if (!copy_phase && rec_len == INT_MAX) { \
994             goto overflow; \
995         } \
996         rec_len++; \
997     } while(0)
998 
999 #define ADDCH(c)                                \
1000     do {\
1001         if (copy_phase) \
1002             self->rec[rec_len] = c;\
1003         INCLEN;\
1004     } while(0)
1005 
1006     lineterm = PyString_AsString(dialect->lineterminator);
1007     if (lineterm == NULL)
1008         return -1;
1009 
1010     rec_len = self->rec_len;
1011 
1012     /* If this is not the first field we need a field separator */
1013     if (self->num_fields > 0)
1014         ADDCH(dialect->delimiter);
1015 
1016     /* Handle preceding quote */
1017     if (copy_phase && *quoted)
1018         ADDCH(dialect->quotechar);
1019 
1020     /* Copy/count field data */
1021     for (i = 0;; i++) {
1022         char c = field[i];
1023         int want_escape = 0;
1024 
1025         if (c == '\0')
1026             break;
1027 
1028         if (c == dialect->delimiter ||
1029             c == dialect->escapechar ||
1030             c == dialect->quotechar ||
1031             strchr(lineterm, c)) {
1032             if (dialect->quoting == QUOTE_NONE)
1033                 want_escape = 1;
1034             else {
1035                 if (c == dialect->quotechar) {
1036                     if (dialect->doublequote)
1037                         ADDCH(dialect->quotechar);
1038                     else
1039                         want_escape = 1;
1040                 }
1041                 if (!want_escape)
1042                     *quoted = 1;
1043             }
1044             if (want_escape) {
1045                 if (!dialect->escapechar) {
1046                     PyErr_Format(error_obj,
1047                                  "need to escape, but no escapechar set");
1048                     return -1;
1049                 }
1050                 ADDCH(dialect->escapechar);
1051             }
1052         }
1053         /* Copy field character into record buffer.
1054          */
1055         ADDCH(c);
1056     }
1057 
1058     /* If field is empty check if it needs to be quoted.
1059      */
1060     if (i == 0 && quote_empty) {
1061         if (dialect->quoting == QUOTE_NONE) {
1062             PyErr_Format(error_obj,
1063                          "single empty field record must be quoted");
1064             return -1;
1065         }
1066         else
1067             *quoted = 1;
1068     }
1069 
1070     if (*quoted) {
1071         if (copy_phase)
1072             ADDCH(dialect->quotechar);
1073         else {
1074             INCLEN; /* starting quote */
1075             INCLEN; /* ending quote */
1076         }
1077     }
1078     return rec_len;
1079 
1080   overflow:
1081     PyErr_NoMemory();
1082     return -1;
1083 #undef ADDCH
1084 #undef INCLEN
1085 }
1086 
1087 static int
join_check_rec_size(WriterObj * self,int rec_len)1088 join_check_rec_size(WriterObj *self, int rec_len)
1089 {
1090     unsigned rec_size_new;
1091     char *rec_new;
1092 
1093     assert(rec_len >= 0);
1094 
1095     if (rec_len > self->rec_size) {
1096         rec_size_new = (unsigned)(rec_len / MEM_INCR + 1) * MEM_INCR;
1097         if (rec_size_new > INT_MAX) {
1098             PyErr_NoMemory();
1099             return 0;
1100         }
1101         rec_new = (char *)PyMem_Realloc(self->rec, rec_size_new);
1102         if (rec_new == NULL) {
1103             PyErr_NoMemory();
1104             return 0;
1105         }
1106         self->rec = rec_new;
1107         self->rec_size = (int)rec_size_new;
1108     }
1109     return 1;
1110 }
1111 
1112 static int
join_append(WriterObj * self,char * field,int * quoted,int quote_empty)1113 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1114 {
1115     int rec_len;
1116 
1117     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1118     if (rec_len < 0)
1119         return 0;
1120 
1121     /* grow record buffer if necessary */
1122     if (!join_check_rec_size(self, rec_len))
1123         return 0;
1124 
1125     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1126     self->num_fields++;
1127 
1128     return 1;
1129 }
1130 
1131 static int
join_append_lineterminator(WriterObj * self)1132 join_append_lineterminator(WriterObj *self)
1133 {
1134     int terminator_len;
1135     char *terminator;
1136 
1137     terminator_len = PyString_Size(self->dialect->lineterminator);
1138     if (terminator_len == -1)
1139         return 0;
1140 
1141     /* grow record buffer if necessary */
1142     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143         return 0;
1144 
1145     terminator = PyString_AsString(self->dialect->lineterminator);
1146     if (terminator == NULL)
1147         return 0;
1148     memmove(self->rec + self->rec_len, terminator, terminator_len);
1149     self->rec_len += terminator_len;
1150 
1151     return 1;
1152 }
1153 
1154 PyDoc_STRVAR(csv_writerow_doc,
1155 "writerow(sequence)\n"
1156 "\n"
1157 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1158 "elements will be converted to string.");
1159 
1160 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1161 csv_writerow(WriterObj *self, PyObject *seq)
1162 {
1163     DialectObj *dialect = self->dialect;
1164     int len, i;
1165 
1166     if (!PySequence_Check(seq))
1167         return PyErr_Format(error_obj, "sequence expected");
1168 
1169     len = PySequence_Length(seq);
1170     if (len < 0)
1171         return NULL;
1172 
1173     /* Join all fields in internal buffer.
1174      */
1175     join_reset(self);
1176     for (i = 0; i < len; i++) {
1177         PyObject *field;
1178         int append_ok;
1179         int quoted;
1180 
1181         field = PySequence_GetItem(seq, i);
1182         if (field == NULL)
1183             return NULL;
1184 
1185         switch (dialect->quoting) {
1186         case QUOTE_NONNUMERIC:
1187             quoted = !PyNumber_Check(field);
1188             break;
1189         case QUOTE_ALL:
1190             quoted = 1;
1191             break;
1192         default:
1193             quoted = 0;
1194             break;
1195         }
1196 
1197         if (PyString_Check(field)) {
1198             append_ok = join_append(self,
1199                                     PyString_AS_STRING(field),
1200                                     &quoted, len == 1);
1201             Py_DECREF(field);
1202         }
1203         else if (field == Py_None) {
1204             append_ok = join_append(self, "", &quoted, len == 1);
1205             Py_DECREF(field);
1206         }
1207         else {
1208             PyObject *str;
1209 
1210             if (PyFloat_Check(field)) {
1211                 str = PyObject_Repr(field);
1212             } else {
1213                 str = PyObject_Str(field);
1214             }
1215             Py_DECREF(field);
1216             if (str == NULL)
1217                 return NULL;
1218 
1219             append_ok = join_append(self, PyString_AS_STRING(str),
1220                                     &quoted, len == 1);
1221             Py_DECREF(str);
1222         }
1223         if (!append_ok)
1224             return NULL;
1225     }
1226 
1227     /* Add line terminator.
1228      */
1229     if (!join_append_lineterminator(self))
1230         return 0;
1231 
1232     return PyObject_CallFunction(self->writeline,
1233                                  "(s#)", self->rec, self->rec_len);
1234 }
1235 
1236 PyDoc_STRVAR(csv_writerows_doc,
1237 "writerows(sequence of sequences)\n"
1238 "\n"
1239 "Construct and write a series of sequences to a csv file.  Non-string\n"
1240 "elements will be converted to string.");
1241 
1242 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1243 csv_writerows(WriterObj *self, PyObject *seqseq)
1244 {
1245     PyObject *row_iter, *row_obj, *result;
1246 
1247     row_iter = PyObject_GetIter(seqseq);
1248     if (row_iter == NULL) {
1249         PyErr_SetString(PyExc_TypeError,
1250                         "writerows() argument must be iterable");
1251         return NULL;
1252     }
1253     while ((row_obj = PyIter_Next(row_iter))) {
1254         result = csv_writerow(self, row_obj);
1255         Py_DECREF(row_obj);
1256         if (!result) {
1257             Py_DECREF(row_iter);
1258             return NULL;
1259         }
1260         else
1261              Py_DECREF(result);
1262     }
1263     Py_DECREF(row_iter);
1264     if (PyErr_Occurred())
1265         return NULL;
1266     Py_INCREF(Py_None);
1267     return Py_None;
1268 }
1269 
1270 static struct PyMethodDef Writer_methods[] = {
1271     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1272     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1273     { NULL, NULL }
1274 };
1275 
1276 #define W_OFF(x) offsetof(WriterObj, x)
1277 
1278 static struct PyMemberDef Writer_memberlist[] = {
1279     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1280     { NULL }
1281 };
1282 
1283 static void
Writer_dealloc(WriterObj * self)1284 Writer_dealloc(WriterObj *self)
1285 {
1286     PyObject_GC_UnTrack(self);
1287     Py_XDECREF(self->dialect);
1288     Py_XDECREF(self->writeline);
1289     if (self->rec != NULL)
1290         PyMem_Free(self->rec);
1291     PyObject_GC_Del(self);
1292 }
1293 
1294 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1295 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1296 {
1297     Py_VISIT(self->dialect);
1298     Py_VISIT(self->writeline);
1299     return 0;
1300 }
1301 
1302 static int
Writer_clear(WriterObj * self)1303 Writer_clear(WriterObj *self)
1304 {
1305     Py_CLEAR(self->dialect);
1306     Py_CLEAR(self->writeline);
1307     return 0;
1308 }
1309 
1310 PyDoc_STRVAR(Writer_Type_doc,
1311 "CSV writer\n"
1312 "\n"
1313 "Writer objects are responsible for generating tabular data\n"
1314 "in CSV format from sequence input.\n"
1315 );
1316 
1317 static PyTypeObject Writer_Type = {
1318     PyVarObject_HEAD_INIT(NULL, 0)
1319     "_csv.writer",                          /*tp_name*/
1320     sizeof(WriterObj),                      /*tp_basicsize*/
1321     0,                                      /*tp_itemsize*/
1322     /* methods */
1323     (destructor)Writer_dealloc,             /*tp_dealloc*/
1324     (printfunc)0,                           /*tp_print*/
1325     (getattrfunc)0,                         /*tp_getattr*/
1326     (setattrfunc)0,                         /*tp_setattr*/
1327     (cmpfunc)0,                             /*tp_compare*/
1328     (reprfunc)0,                            /*tp_repr*/
1329     0,                                      /*tp_as_number*/
1330     0,                                      /*tp_as_sequence*/
1331     0,                                      /*tp_as_mapping*/
1332     (hashfunc)0,                            /*tp_hash*/
1333     (ternaryfunc)0,                         /*tp_call*/
1334     (reprfunc)0,                            /*tp_str*/
1335     0,                                      /*tp_getattro*/
1336     0,                                      /*tp_setattro*/
1337     0,                                      /*tp_as_buffer*/
1338     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1339         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1340     Writer_Type_doc,
1341     (traverseproc)Writer_traverse,          /*tp_traverse*/
1342     (inquiry)Writer_clear,                  /*tp_clear*/
1343     0,                                      /*tp_richcompare*/
1344     0,                                      /*tp_weaklistoffset*/
1345     (getiterfunc)0,                         /*tp_iter*/
1346     (getiterfunc)0,                         /*tp_iternext*/
1347     Writer_methods,                         /*tp_methods*/
1348     Writer_memberlist,                      /*tp_members*/
1349     0,                                      /*tp_getset*/
1350 };
1351 
1352 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1353 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1354 {
1355     PyObject * output_file, * dialect = NULL;
1356     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1357 
1358     if (!self)
1359         return NULL;
1360 
1361     self->dialect = NULL;
1362     self->writeline = NULL;
1363 
1364     self->rec = NULL;
1365     self->rec_size = 0;
1366     self->rec_len = 0;
1367     self->num_fields = 0;
1368 
1369     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1370         Py_DECREF(self);
1371         return NULL;
1372     }
1373     self->writeline = PyObject_GetAttrString(output_file, "write");
1374     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1375         PyErr_SetString(PyExc_TypeError,
1376                         "argument 1 must have a \"write\" method");
1377         Py_DECREF(self);
1378         return NULL;
1379     }
1380     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1381     if (self->dialect == NULL) {
1382         Py_DECREF(self);
1383         return NULL;
1384     }
1385     PyObject_GC_Track(self);
1386     return (PyObject *)self;
1387 }
1388 
1389 /*
1390  * DIALECT REGISTRY
1391  */
1392 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1393 csv_list_dialects(PyObject *module, PyObject *args)
1394 {
1395     return PyDict_Keys(dialects);
1396 }
1397 
1398 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1399 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1400 {
1401     PyObject *name_obj, *dialect_obj = NULL;
1402     PyObject *dialect;
1403 
1404     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1405         return NULL;
1406     if (!IS_BASESTRING(name_obj)) {
1407         PyErr_SetString(PyExc_TypeError,
1408                         "dialect name must be a string or unicode");
1409         return NULL;
1410     }
1411     dialect = _call_dialect(dialect_obj, kwargs);
1412     if (dialect == NULL)
1413         return NULL;
1414     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1415         Py_DECREF(dialect);
1416         return NULL;
1417     }
1418     Py_DECREF(dialect);
1419     Py_INCREF(Py_None);
1420     return Py_None;
1421 }
1422 
1423 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1424 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1425 {
1426     if (PyDict_DelItem(dialects, name_obj) < 0)
1427         return PyErr_Format(error_obj, "unknown dialect");
1428     Py_INCREF(Py_None);
1429     return Py_None;
1430 }
1431 
1432 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1433 csv_get_dialect(PyObject *module, PyObject *name_obj)
1434 {
1435     return get_dialect_from_registry(name_obj);
1436 }
1437 
1438 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1439 csv_field_size_limit(PyObject *module, PyObject *args)
1440 {
1441     PyObject *new_limit = NULL;
1442     long old_limit = field_limit, limit;
1443 
1444     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1445         return NULL;
1446     if (new_limit != NULL) {
1447         if (!_PyAnyInt_Check(new_limit)) {
1448             PyErr_Format(PyExc_TypeError,
1449                          "limit must be an integer");
1450             return NULL;
1451         }
1452         limit = PyInt_AsLong(new_limit);
1453         if (limit == -1 && PyErr_Occurred())
1454             return NULL;
1455         field_limit = limit;
1456     }
1457     return PyInt_FromLong(old_limit);
1458 }
1459 
1460 /*
1461  * MODULE
1462  */
1463 
1464 PyDoc_STRVAR(csv_module_doc,
1465 "CSV parsing and writing.\n"
1466 "\n"
1467 "This module provides classes that assist in the reading and writing\n"
1468 "of Comma Separated Value (CSV) files, and implements the interface\n"
1469 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1470 "the format is not formally defined by a stable specification and\n"
1471 "is subtle enough that parsing lines of a CSV file with something\n"
1472 "like line.split(\",\") is bound to fail.  The module supports three\n"
1473 "basic APIs: reading, writing, and registration of dialects.\n"
1474 "\n"
1475 "\n"
1476 "DIALECT REGISTRATION:\n"
1477 "\n"
1478 "Readers and writers support a dialect argument, which is a convenient\n"
1479 "handle on a group of settings.  When the dialect argument is a string,\n"
1480 "it identifies one of the dialects previously registered with the module.\n"
1481 "If it is a class or instance, the attributes of the argument are used as\n"
1482 "the settings for the reader or writer:\n"
1483 "\n"
1484 "    class excel:\n"
1485 "        delimiter = ','\n"
1486 "        quotechar = '\"'\n"
1487 "        escapechar = None\n"
1488 "        doublequote = True\n"
1489 "        skipinitialspace = False\n"
1490 "        lineterminator = '\\r\\n'\n"
1491 "        quoting = QUOTE_MINIMAL\n"
1492 "\n"
1493 "SETTINGS:\n"
1494 "\n"
1495 "    * quotechar - specifies a one-character string to use as the \n"
1496 "        quoting character.  It defaults to '\"'.\n"
1497 "    * delimiter - specifies a one-character string to use as the \n"
1498 "        field separator.  It defaults to ','.\n"
1499 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1500 "        immediately follows a delimiter.  It defaults to False, which\n"
1501 "        means that whitespace immediately following a delimiter is part\n"
1502 "        of the following field.\n"
1503 "    * lineterminator -  specifies the character sequence which should \n"
1504 "        terminate rows.\n"
1505 "    * quoting - controls when quotes should be generated by the writer.\n"
1506 "        It can take on any of the following module constants:\n"
1507 "\n"
1508 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1509 "            field contains either the quotechar or the delimiter\n"
1510 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1511 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1512 "            fields which do not parse as integers or floating point\n"
1513 "            numbers.\n"
1514 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1515 "    * escapechar - specifies a one-character string used to escape \n"
1516 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1517 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1518 "        True, two consecutive quotes are interpreted as one during read,\n"
1519 "        and when writing, each quote character embedded in the data is\n"
1520 "        written as two quotes\n");
1521 
1522 PyDoc_STRVAR(csv_reader_doc,
1523 "    csv_reader = reader(iterable [, dialect='excel']\n"
1524 "                        [optional keyword args])\n"
1525 "    for row in csv_reader:\n"
1526 "        process(row)\n"
1527 "\n"
1528 "The \"iterable\" argument can be any object that returns a line\n"
1529 "of input for each iteration, such as a file object or a list.  The\n"
1530 "optional \"dialect\" parameter is discussed below.  The function\n"
1531 "also accepts optional keyword arguments which override settings\n"
1532 "provided by the dialect.\n"
1533 "\n"
1534 "The returned object is an iterator.  Each iteration returns a row\n"
1535 "of the CSV file (which can span multiple input lines).\n");
1536 
1537 PyDoc_STRVAR(csv_writer_doc,
1538 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1539 "                            [optional keyword args])\n"
1540 "    for row in sequence:\n"
1541 "        csv_writer.writerow(row)\n"
1542 "\n"
1543 "    [or]\n"
1544 "\n"
1545 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1546 "                            [optional keyword args])\n"
1547 "    csv_writer.writerows(rows)\n"
1548 "\n"
1549 "The \"fileobj\" argument can be any object that supports the file API.\n");
1550 
1551 PyDoc_STRVAR(csv_list_dialects_doc,
1552 "Return a list of all know dialect names.\n"
1553 "    names = csv.list_dialects()");
1554 
1555 PyDoc_STRVAR(csv_get_dialect_doc,
1556 "Return the dialect instance associated with name.\n"
1557 "    dialect = csv.get_dialect(name)");
1558 
1559 PyDoc_STRVAR(csv_register_dialect_doc,
1560 "Create a mapping from a string name to a dialect class.\n"
1561 "    dialect = csv.register_dialect(name, dialect)");
1562 
1563 PyDoc_STRVAR(csv_unregister_dialect_doc,
1564 "Delete the name/dialect mapping associated with a string name.\n"
1565 "    csv.unregister_dialect(name)");
1566 
1567 PyDoc_STRVAR(csv_field_size_limit_doc,
1568 "Sets an upper limit on parsed fields.\n"
1569 "    csv.field_size_limit([limit])\n"
1570 "\n"
1571 "Returns old limit. If limit is not given, no new limit is set and\n"
1572 "the old limit is returned");
1573 
1574 static struct PyMethodDef csv_methods[] = {
1575     { "reader", (PyCFunction)csv_reader,
1576         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1577     { "writer", (PyCFunction)csv_writer,
1578         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1579     { "list_dialects", (PyCFunction)csv_list_dialects,
1580         METH_NOARGS, csv_list_dialects_doc},
1581     { "register_dialect", (PyCFunction)csv_register_dialect,
1582         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1583     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1584         METH_O, csv_unregister_dialect_doc},
1585     { "get_dialect", (PyCFunction)csv_get_dialect,
1586         METH_O, csv_get_dialect_doc},
1587     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1588         METH_VARARGS, csv_field_size_limit_doc},
1589     { NULL, NULL }
1590 };
1591 
1592 PyMODINIT_FUNC
init_csv(void)1593 init_csv(void)
1594 {
1595     PyObject *module;
1596     StyleDesc *style;
1597 
1598     if (PyType_Ready(&Dialect_Type) < 0)
1599         return;
1600 
1601     if (PyType_Ready(&Reader_Type) < 0)
1602         return;
1603 
1604     if (PyType_Ready(&Writer_Type) < 0)
1605         return;
1606 
1607     /* Create the module and add the functions */
1608     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1609     if (module == NULL)
1610         return;
1611 
1612     /* Add version to the module. */
1613     if (PyModule_AddStringConstant(module, "__version__",
1614                                    MODULE_VERSION) == -1)
1615         return;
1616 
1617     /* Add _dialects dictionary */
1618     dialects = PyDict_New();
1619     if (dialects == NULL)
1620         return;
1621     if (PyModule_AddObject(module, "_dialects", dialects))
1622         return;
1623 
1624     /* Add quote styles into dictionary */
1625     for (style = quote_styles; style->name; style++) {
1626         if (PyModule_AddIntConstant(module, style->name,
1627                                     style->style) == -1)
1628             return;
1629     }
1630 
1631     /* Add the Dialect type */
1632     Py_INCREF(&Dialect_Type);
1633     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1634         return;
1635 
1636     /* Add the CSV exception object to the module. */
1637     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1638     if (error_obj == NULL)
1639         return;
1640     PyModule_AddObject(module, "Error", error_obj);
1641 }
1642