• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
11 **** 2.2.
12 
13 */
14 
15 #define MODULE_VERSION "1.0"
16 
17 #include "Python.h"
18 #include "structmember.h"
19 
20 
21 /* begin 2.2 compatibility macros */
22 #ifndef PyDoc_STRVAR
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
28 #else
29 #define PyDoc_STR(str) ""
30 #endif
31 #endif /* ifndef PyDoc_STRVAR */
32 
33 #ifndef PyMODINIT_FUNC
34 #       if defined(__cplusplus)
35 #               define PyMODINIT_FUNC extern "C" void
36 #       else /* __cplusplus */
37 #               define PyMODINIT_FUNC void
38 #       endif /* __cplusplus */
39 #endif
40 
41 #ifndef Py_CLEAR
42 #define Py_CLEAR(op)                                            \
43     do {                                                        \
44         if (op) {                                               \
45             PyObject *tmp = (PyObject *)(op);                   \
46             (op) = NULL;                                        \
47             Py_DECREF(tmp);                                     \
48         }                                                       \
49     } while (0)
50 #endif
51 #ifndef Py_VISIT
52 #define Py_VISIT(op)                                                    \
53     do {                                                                \
54         if (op) {                                                       \
55             int vret = visit((PyObject *)(op), arg);                    \
56             if (vret)                                                   \
57                 return vret;                                            \
58         }                                                               \
59     } while (0)
60 #endif
61 
62 /* end 2.2 compatibility macros */
63 
64 #define IS_BASESTRING(o) \
65     PyObject_TypeCheck(o, &PyBaseString_Type)
66 
67 static PyObject *error_obj;     /* CSV exception */
68 static PyObject *dialects;      /* Dialect registry */
69 static long field_limit = 128 * 1024;   /* max parsed field size */
70 
71 typedef enum {
72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74     EAT_CRNL
75 } ParserState;
76 
77 typedef enum {
78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79 } QuoteStyle;
80 
81 typedef struct {
82     QuoteStyle style;
83     char *name;
84 } StyleDesc;
85 
86 static StyleDesc quote_styles[] = {
87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
88     { QUOTE_ALL,        "QUOTE_ALL" },
89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90     { QUOTE_NONE,       "QUOTE_NONE" },
91     { 0 }
92 };
93 
94 typedef struct {
95     PyObject_HEAD
96 
97     int doublequote;            /* is " represented by ""? */
98     char delimiter;             /* field separator */
99     char quotechar;             /* quote character */
100     char escapechar;            /* escape character */
101     int skipinitialspace;       /* ignore spaces following delimiter? */
102     PyObject *lineterminator; /* string to write between records */
103     int quoting;                /* style of quoting to write */
104 
105     int strict;                 /* raise exception on bad CSV */
106 } DialectObj;
107 
108 staticforward PyTypeObject Dialect_Type;
109 
110 typedef struct {
111     PyObject_HEAD
112 
113     PyObject *input_iter;   /* iterate over this for input lines */
114 
115     DialectObj *dialect;    /* parsing dialect */
116 
117     PyObject *fields;           /* field list for current record */
118     ParserState state;          /* current CSV parse state */
119     char *field;                /* build current field in here */
120     int field_size;             /* size of allocated buffer */
121     int field_len;              /* length of current field */
122     int numeric_field;          /* treat field as numeric */
123     unsigned long line_num;     /* Source-file line number */
124 } ReaderObj;
125 
126 staticforward PyTypeObject Reader_Type;
127 
128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
129 
130 typedef struct {
131     PyObject_HEAD
132 
133     PyObject *writeline;    /* write output lines to this file */
134 
135     DialectObj *dialect;    /* parsing dialect */
136 
137     char *rec;                  /* buffer for parser.join */
138     int rec_size;               /* size of allocated record */
139     int rec_len;                /* length of record */
140     int num_fields;             /* number of fields in record */
141 } WriterObj;
142 
143 staticforward PyTypeObject Writer_Type;
144 
145 /*
146  * DIALECT class
147  */
148 
149 static PyObject *
get_dialect_from_registry(PyObject * name_obj)150 get_dialect_from_registry(PyObject * name_obj)
151 {
152     PyObject *dialect_obj;
153 
154     dialect_obj = PyDict_GetItem(dialects, name_obj);
155     if (dialect_obj == NULL) {
156         if (!PyErr_Occurred())
157             PyErr_Format(error_obj, "unknown dialect");
158     }
159     else
160         Py_INCREF(dialect_obj);
161     return dialect_obj;
162 }
163 
164 static PyObject *
get_string(PyObject * str)165 get_string(PyObject *str)
166 {
167     Py_XINCREF(str);
168     return str;
169 }
170 
171 static PyObject *
get_nullchar_as_None(char c)172 get_nullchar_as_None(char c)
173 {
174     if (c == '\0') {
175         Py_INCREF(Py_None);
176         return Py_None;
177     }
178     else
179         return PyString_FromStringAndSize((char*)&c, 1);
180 }
181 
182 static PyObject *
Dialect_get_lineterminator(DialectObj * self)183 Dialect_get_lineterminator(DialectObj *self)
184 {
185     return get_string(self->lineterminator);
186 }
187 
188 static PyObject *
Dialect_get_escapechar(DialectObj * self)189 Dialect_get_escapechar(DialectObj *self)
190 {
191     return get_nullchar_as_None(self->escapechar);
192 }
193 
194 static PyObject *
Dialect_get_quotechar(DialectObj * self)195 Dialect_get_quotechar(DialectObj *self)
196 {
197     return get_nullchar_as_None(self->quotechar);
198 }
199 
200 static PyObject *
Dialect_get_quoting(DialectObj * self)201 Dialect_get_quoting(DialectObj *self)
202 {
203     return PyInt_FromLong(self->quoting);
204 }
205 
206 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
208 {
209     if (src == NULL)
210         *target = dflt;
211     else {
212         int b = PyObject_IsTrue(src);
213         if (b < 0)
214             return -1;
215         *target = b;
216     }
217     return 0;
218 }
219 
220 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)221 _set_int(const char *name, int *target, PyObject *src, int dflt)
222 {
223     if (src == NULL)
224         *target = dflt;
225     else {
226         if (!PyInt_Check(src)) {
227             PyErr_Format(PyExc_TypeError,
228                          "\"%s\" must be an integer", name);
229             return -1;
230         }
231         *target = PyInt_AsLong(src);
232     }
233     return 0;
234 }
235 
236 static int
_set_char(const char * name,char * target,PyObject * src,char dflt)237 _set_char(const char *name, char *target, PyObject *src, char dflt)
238 {
239     if (src == NULL)
240         *target = dflt;
241     else {
242         *target = '\0';
243         if (src != Py_None) {
244             Py_ssize_t len;
245             if (!PyString_Check(src)) {
246                 PyErr_Format(PyExc_TypeError,
247                     "\"%s\" must be string, not %.200s", name,
248                     src->ob_type->tp_name);
249                 return -1;
250             }
251             len = PyString_GET_SIZE(src);
252             if (len > 1) {
253                 PyErr_Format(PyExc_TypeError,
254                     "\"%s\" must be an 1-character string",
255                     name);
256                 return -1;
257             }
258             if (len > 0)
259                 *target = *PyString_AS_STRING(src);
260         }
261     }
262     return 0;
263 }
264 
265 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)266 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
267 {
268     if (src == NULL)
269         *target = PyString_FromString(dflt);
270     else {
271         if (src == Py_None)
272             *target = NULL;
273         else if (!IS_BASESTRING(src)) {
274             PyErr_Format(PyExc_TypeError,
275                          "\"%s\" must be a string", name);
276             return -1;
277         }
278         else {
279             Py_INCREF(src);
280             Py_XSETREF(*target, src);
281         }
282     }
283     return 0;
284 }
285 
286 static int
dialect_check_quoting(int quoting)287 dialect_check_quoting(int quoting)
288 {
289     StyleDesc *qs = quote_styles;
290 
291     for (qs = quote_styles; qs->name; qs++) {
292         if (qs->style == quoting)
293             return 0;
294     }
295     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296     return -1;
297 }
298 
299 #define D_OFF(x) offsetof(DialectObj, x)
300 
301 static struct PyMemberDef Dialect_memberlist[] = {
302     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
303     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
304     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
305     { "strict",             T_INT, D_OFF(strict), READONLY },
306     { NULL }
307 };
308 
309 static PyGetSetDef Dialect_getsetlist[] = {
310     { "escapechar",             (getter)Dialect_get_escapechar},
311     { "lineterminator",         (getter)Dialect_get_lineterminator},
312     { "quotechar",              (getter)Dialect_get_quotechar},
313     { "quoting",                (getter)Dialect_get_quoting},
314     {NULL},
315 };
316 
317 static void
Dialect_dealloc(DialectObj * self)318 Dialect_dealloc(DialectObj *self)
319 {
320     Py_XDECREF(self->lineterminator);
321     Py_TYPE(self)->tp_free((PyObject *)self);
322 }
323 
324 static char *dialect_kws[] = {
325     "dialect",
326     "delimiter",
327     "doublequote",
328     "escapechar",
329     "lineterminator",
330     "quotechar",
331     "quoting",
332     "skipinitialspace",
333     "strict",
334     NULL
335 };
336 
337 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340     DialectObj *self;
341     PyObject *ret = NULL;
342     PyObject *dialect = NULL;
343     PyObject *delimiter = NULL;
344     PyObject *doublequote = NULL;
345     PyObject *escapechar = NULL;
346     PyObject *lineterminator = NULL;
347     PyObject *quotechar = NULL;
348     PyObject *quoting = NULL;
349     PyObject *skipinitialspace = NULL;
350     PyObject *strict = NULL;
351 
352     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353                                      "|OOOOOOOOO", dialect_kws,
354                                      &dialect,
355                                      &delimiter,
356                                      &doublequote,
357                                      &escapechar,
358                                      &lineterminator,
359                                      &quotechar,
360                                      &quoting,
361                                      &skipinitialspace,
362                                      &strict))
363         return NULL;
364 
365     if (dialect != NULL) {
366         if (IS_BASESTRING(dialect)) {
367             dialect = get_dialect_from_registry(dialect);
368             if (dialect == NULL)
369                 return NULL;
370         }
371         else
372             Py_INCREF(dialect);
373         /* Can we reuse this instance? */
374         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375             delimiter == 0 &&
376             doublequote == 0 &&
377             escapechar == 0 &&
378             lineterminator == 0 &&
379             quotechar == 0 &&
380             quoting == 0 &&
381             skipinitialspace == 0 &&
382             strict == 0)
383             return dialect;
384     }
385 
386     self = (DialectObj *)type->tp_alloc(type, 0);
387     if (self == NULL) {
388         Py_XDECREF(dialect);
389         return NULL;
390     }
391     self->lineterminator = NULL;
392 
393     Py_XINCREF(delimiter);
394     Py_XINCREF(doublequote);
395     Py_XINCREF(escapechar);
396     Py_XINCREF(lineterminator);
397     Py_XINCREF(quotechar);
398     Py_XINCREF(quoting);
399     Py_XINCREF(skipinitialspace);
400     Py_XINCREF(strict);
401     if (dialect != NULL) {
402 #define DIALECT_GETATTR(v, n) \
403         if (v == NULL) \
404             v = PyObject_GetAttrString(dialect, n)
405         DIALECT_GETATTR(delimiter, "delimiter");
406         DIALECT_GETATTR(doublequote, "doublequote");
407         DIALECT_GETATTR(escapechar, "escapechar");
408         DIALECT_GETATTR(lineterminator, "lineterminator");
409         DIALECT_GETATTR(quotechar, "quotechar");
410         DIALECT_GETATTR(quoting, "quoting");
411         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412         DIALECT_GETATTR(strict, "strict");
413         PyErr_Clear();
414     }
415 
416     /* check types and convert to C values */
417 #define DIASET(meth, name, target, src, dflt) \
418     if (meth(name, target, src, dflt)) \
419         goto err
420     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427     DIASET(_set_bool, "strict", &self->strict, strict, 0);
428 
429     /* validate options */
430     if (dialect_check_quoting(self->quoting))
431         goto err;
432     if (self->delimiter == 0) {
433         PyErr_SetString(PyExc_TypeError,
434                         "\"delimiter\" must be an 1-character string");
435         goto err;
436     }
437     if (quotechar == Py_None && quoting == NULL)
438         self->quoting = QUOTE_NONE;
439     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440         PyErr_SetString(PyExc_TypeError,
441                         "quotechar must be set if quoting enabled");
442         goto err;
443     }
444     if (self->lineterminator == 0) {
445         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446         goto err;
447     }
448 
449     ret = (PyObject *)self;
450     Py_INCREF(self);
451 err:
452     Py_XDECREF(self);
453     Py_XDECREF(dialect);
454     Py_XDECREF(delimiter);
455     Py_XDECREF(doublequote);
456     Py_XDECREF(escapechar);
457     Py_XDECREF(lineterminator);
458     Py_XDECREF(quotechar);
459     Py_XDECREF(quoting);
460     Py_XDECREF(skipinitialspace);
461     Py_XDECREF(strict);
462     return ret;
463 }
464 
465 
466 PyDoc_STRVAR(Dialect_Type_doc,
467 "CSV dialect\n"
468 "\n"
469 "The Dialect type records CSV parsing and generation options.\n");
470 
471 static PyTypeObject Dialect_Type = {
472     PyVarObject_HEAD_INIT(NULL, 0)
473     "_csv.Dialect",                         /* tp_name */
474     sizeof(DialectObj),                     /* tp_basicsize */
475     0,                                      /* tp_itemsize */
476     /*  methods  */
477     (destructor)Dialect_dealloc,            /* tp_dealloc */
478     (printfunc)0,                           /* tp_print */
479     (getattrfunc)0,                         /* tp_getattr */
480     (setattrfunc)0,                         /* tp_setattr */
481     (cmpfunc)0,                             /* tp_compare */
482     (reprfunc)0,                            /* tp_repr */
483     0,                                      /* tp_as_number */
484     0,                                      /* tp_as_sequence */
485     0,                                      /* tp_as_mapping */
486     (hashfunc)0,                            /* tp_hash */
487     (ternaryfunc)0,                         /* tp_call */
488     (reprfunc)0,                                /* tp_str */
489     0,                                      /* tp_getattro */
490     0,                                      /* tp_setattro */
491     0,                                      /* tp_as_buffer */
492     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493     Dialect_Type_doc,                       /* tp_doc */
494     0,                                      /* tp_traverse */
495     0,                                      /* tp_clear */
496     0,                                      /* tp_richcompare */
497     0,                                      /* tp_weaklistoffset */
498     0,                                      /* tp_iter */
499     0,                                      /* tp_iternext */
500     0,                                          /* tp_methods */
501     Dialect_memberlist,                     /* tp_members */
502     Dialect_getsetlist,                     /* tp_getset */
503     0,                                          /* tp_base */
504     0,                                          /* tp_dict */
505     0,                                          /* tp_descr_get */
506     0,                                          /* tp_descr_set */
507     0,                                          /* tp_dictoffset */
508     0,                                          /* tp_init */
509     0,                                          /* tp_alloc */
510     dialect_new,                                /* tp_new */
511     0,                                          /* tp_free */
512 };
513 
514 /*
515  * Return an instance of the dialect type, given a Python instance or kwarg
516  * description of the dialect
517  */
518 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520 {
521     PyObject *ctor_args;
522     PyObject *dialect;
523 
524     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
525     if (ctor_args == NULL)
526         return NULL;
527     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
528     Py_DECREF(ctor_args);
529     return dialect;
530 }
531 
532 /*
533  * READER
534  */
535 static int
parse_save_field(ReaderObj * self)536 parse_save_field(ReaderObj *self)
537 {
538     PyObject *field;
539 
540     field = PyString_FromStringAndSize(self->field, self->field_len);
541     if (field == NULL)
542         return -1;
543     self->field_len = 0;
544     if (self->numeric_field) {
545         PyObject *tmp;
546 
547         self->numeric_field = 0;
548         tmp = PyNumber_Float(field);
549         if (tmp == NULL) {
550             Py_DECREF(field);
551             return -1;
552         }
553         Py_DECREF(field);
554         field = tmp;
555     }
556     PyList_Append(self->fields, field);
557     Py_DECREF(field);
558     return 0;
559 }
560 
561 static int
parse_grow_buff(ReaderObj * self)562 parse_grow_buff(ReaderObj *self)
563 {
564     if (self->field_size == 0) {
565         self->field_size = 4096;
566         if (self->field != NULL)
567             PyMem_Free(self->field);
568         self->field = PyMem_Malloc(self->field_size);
569     }
570     else {
571         if (self->field_size > INT_MAX / 2) {
572             PyErr_NoMemory();
573             return 0;
574         }
575         self->field_size *= 2;
576         self->field = PyMem_Realloc(self->field, self->field_size);
577     }
578     if (self->field == NULL) {
579         PyErr_NoMemory();
580         return 0;
581     }
582     return 1;
583 }
584 
585 static int
parse_add_char(ReaderObj * self,char c)586 parse_add_char(ReaderObj *self, char c)
587 {
588     if (self->field_len >= field_limit) {
589         PyErr_Format(error_obj, "field larger than field limit (%ld)",
590                      field_limit);
591         return -1;
592     }
593     if (self->field_len == self->field_size && !parse_grow_buff(self))
594         return -1;
595     self->field[self->field_len++] = c;
596     return 0;
597 }
598 
599 static int
parse_process_char(ReaderObj * self,char c)600 parse_process_char(ReaderObj *self, char c)
601 {
602     DialectObj *dialect = self->dialect;
603 
604     switch (self->state) {
605     case START_RECORD:
606         /* start of record */
607         if (c == '\0')
608             /* empty line - return [] */
609             break;
610         else if (c == '\n' || c == '\r') {
611             self->state = EAT_CRNL;
612             break;
613         }
614         /* normal character - handle as START_FIELD */
615         self->state = START_FIELD;
616         /* fallthru */
617     case START_FIELD:
618         /* expecting field */
619         if (c == '\n' || c == '\r' || c == '\0') {
620             /* save empty field - return [fields] */
621             if (parse_save_field(self) < 0)
622                 return -1;
623             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
624         }
625         else if (c == dialect->quotechar &&
626                  dialect->quoting != QUOTE_NONE) {
627             /* start quoted field */
628             self->state = IN_QUOTED_FIELD;
629         }
630         else if (c == dialect->escapechar) {
631             /* possible escaped character */
632             self->state = ESCAPED_CHAR;
633         }
634         else if (c == ' ' && dialect->skipinitialspace)
635             /* ignore space at start of field */
636             ;
637         else if (c == dialect->delimiter) {
638             /* save empty field */
639             if (parse_save_field(self) < 0)
640                 return -1;
641         }
642         else {
643             /* begin new unquoted field */
644             if (dialect->quoting == QUOTE_NONNUMERIC)
645                 self->numeric_field = 1;
646             if (parse_add_char(self, c) < 0)
647                 return -1;
648             self->state = IN_FIELD;
649         }
650         break;
651 
652     case ESCAPED_CHAR:
653         if (c == '\0')
654             c = '\n';
655         if (parse_add_char(self, c) < 0)
656             return -1;
657         self->state = IN_FIELD;
658         break;
659 
660     case IN_FIELD:
661         /* in unquoted field */
662         if (c == '\n' || c == '\r' || c == '\0') {
663             /* end of line - return [fields] */
664             if (parse_save_field(self) < 0)
665                 return -1;
666             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
667         }
668         else if (c == dialect->escapechar) {
669             /* possible escaped character */
670             self->state = ESCAPED_CHAR;
671         }
672         else if (c == dialect->delimiter) {
673             /* save field - wait for new field */
674             if (parse_save_field(self) < 0)
675                 return -1;
676             self->state = START_FIELD;
677         }
678         else {
679             /* normal character - save in field */
680             if (parse_add_char(self, c) < 0)
681                 return -1;
682         }
683         break;
684 
685     case IN_QUOTED_FIELD:
686         /* in quoted field */
687         if (c == '\0')
688             ;
689         else if (c == dialect->escapechar) {
690             /* Possible escape character */
691             self->state = ESCAPE_IN_QUOTED_FIELD;
692         }
693         else if (c == dialect->quotechar &&
694                  dialect->quoting != QUOTE_NONE) {
695             if (dialect->doublequote) {
696                 /* doublequote; " represented by "" */
697                 self->state = QUOTE_IN_QUOTED_FIELD;
698             }
699             else {
700                 /* end of quote part of field */
701                 self->state = IN_FIELD;
702             }
703         }
704         else {
705             /* normal character - save in field */
706             if (parse_add_char(self, c) < 0)
707                 return -1;
708         }
709         break;
710 
711     case ESCAPE_IN_QUOTED_FIELD:
712         if (c == '\0')
713             c = '\n';
714         if (parse_add_char(self, c) < 0)
715             return -1;
716         self->state = IN_QUOTED_FIELD;
717         break;
718 
719     case QUOTE_IN_QUOTED_FIELD:
720         /* doublequote - seen a quote in a quoted field */
721         if (dialect->quoting != QUOTE_NONE &&
722             c == dialect->quotechar) {
723             /* save "" as " */
724             if (parse_add_char(self, c) < 0)
725                 return -1;
726             self->state = IN_QUOTED_FIELD;
727         }
728         else if (c == dialect->delimiter) {
729             /* save field - wait for new field */
730             if (parse_save_field(self) < 0)
731                 return -1;
732             self->state = START_FIELD;
733         }
734         else if (c == '\n' || c == '\r' || c == '\0') {
735             /* end of line - return [fields] */
736             if (parse_save_field(self) < 0)
737                 return -1;
738             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
739         }
740         else if (!dialect->strict) {
741             if (parse_add_char(self, c) < 0)
742                 return -1;
743             self->state = IN_FIELD;
744         }
745         else {
746             /* illegal */
747             PyErr_Format(error_obj, "'%c' expected after '%c'",
748                             dialect->delimiter,
749                             dialect->quotechar);
750             return -1;
751         }
752         break;
753 
754     case EAT_CRNL:
755         if (c == '\n' || c == '\r')
756             ;
757         else if (c == '\0')
758             self->state = START_RECORD;
759         else {
760             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
761             return -1;
762         }
763         break;
764 
765     }
766     return 0;
767 }
768 
769 static int
parse_reset(ReaderObj * self)770 parse_reset(ReaderObj *self)
771 {
772     Py_XSETREF(self->fields, PyList_New(0));
773     if (self->fields == NULL)
774         return -1;
775     self->field_len = 0;
776     self->state = START_RECORD;
777     self->numeric_field = 0;
778     return 0;
779 }
780 
781 static PyObject *
Reader_iternext(ReaderObj * self)782 Reader_iternext(ReaderObj *self)
783 {
784     PyObject *lineobj;
785     PyObject *fields = NULL;
786     char *line, c;
787     int linelen;
788 
789     if (parse_reset(self) < 0)
790         return NULL;
791     do {
792         lineobj = PyIter_Next(self->input_iter);
793         if (lineobj == NULL) {
794             /* End of input OR exception */
795             if (!PyErr_Occurred() && (self->field_len != 0 ||
796                                       self->state == IN_QUOTED_FIELD)) {
797                 if (self->dialect->strict)
798                     PyErr_SetString(error_obj, "unexpected end of data");
799                 else if (parse_save_field(self) >= 0 )
800                     break;
801             }
802             return NULL;
803         }
804         ++self->line_num;
805 
806         line = PyString_AsString(lineobj);
807         linelen = PyString_Size(lineobj);
808 
809         if (line == NULL || linelen < 0) {
810             Py_DECREF(lineobj);
811             return NULL;
812         }
813         while (linelen--) {
814             c = *line++;
815             if (c == '\0') {
816                 Py_DECREF(lineobj);
817                 PyErr_Format(error_obj,
818                              "line contains NULL byte");
819                 goto err;
820             }
821             if (parse_process_char(self, c) < 0) {
822                 Py_DECREF(lineobj);
823                 goto err;
824             }
825         }
826         Py_DECREF(lineobj);
827         if (parse_process_char(self, 0) < 0)
828             goto err;
829     } while (self->state != START_RECORD);
830 
831     fields = self->fields;
832     self->fields = NULL;
833 err:
834     return fields;
835 }
836 
837 static void
Reader_dealloc(ReaderObj * self)838 Reader_dealloc(ReaderObj *self)
839 {
840     PyObject_GC_UnTrack(self);
841     Py_XDECREF(self->dialect);
842     Py_XDECREF(self->input_iter);
843     Py_XDECREF(self->fields);
844     if (self->field != NULL)
845         PyMem_Free(self->field);
846     PyObject_GC_Del(self);
847 }
848 
849 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)850 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
851 {
852     Py_VISIT(self->dialect);
853     Py_VISIT(self->input_iter);
854     Py_VISIT(self->fields);
855     return 0;
856 }
857 
858 static int
Reader_clear(ReaderObj * self)859 Reader_clear(ReaderObj *self)
860 {
861     Py_CLEAR(self->dialect);
862     Py_CLEAR(self->input_iter);
863     Py_CLEAR(self->fields);
864     return 0;
865 }
866 
867 PyDoc_STRVAR(Reader_Type_doc,
868 "CSV reader\n"
869 "\n"
870 "Reader objects are responsible for reading and parsing tabular data\n"
871 "in CSV format.\n"
872 );
873 
874 static struct PyMethodDef Reader_methods[] = {
875     { NULL, NULL }
876 };
877 #define R_OFF(x) offsetof(ReaderObj, x)
878 
879 static struct PyMemberDef Reader_memberlist[] = {
880     { "dialect", T_OBJECT, R_OFF(dialect), RO },
881     { "line_num", T_ULONG, R_OFF(line_num), RO },
882     { NULL }
883 };
884 
885 
886 static PyTypeObject Reader_Type = {
887     PyVarObject_HEAD_INIT(NULL, 0)
888     "_csv.reader",                          /*tp_name*/
889     sizeof(ReaderObj),                      /*tp_basicsize*/
890     0,                                      /*tp_itemsize*/
891     /* methods */
892     (destructor)Reader_dealloc,             /*tp_dealloc*/
893     (printfunc)0,                           /*tp_print*/
894     (getattrfunc)0,                         /*tp_getattr*/
895     (setattrfunc)0,                         /*tp_setattr*/
896     (cmpfunc)0,                             /*tp_compare*/
897     (reprfunc)0,                            /*tp_repr*/
898     0,                                      /*tp_as_number*/
899     0,                                      /*tp_as_sequence*/
900     0,                                      /*tp_as_mapping*/
901     (hashfunc)0,                            /*tp_hash*/
902     (ternaryfunc)0,                         /*tp_call*/
903     (reprfunc)0,                                /*tp_str*/
904     0,                                      /*tp_getattro*/
905     0,                                      /*tp_setattro*/
906     0,                                      /*tp_as_buffer*/
907     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
908         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
909     Reader_Type_doc,                        /*tp_doc*/
910     (traverseproc)Reader_traverse,          /*tp_traverse*/
911     (inquiry)Reader_clear,                  /*tp_clear*/
912     0,                                      /*tp_richcompare*/
913     0,                                      /*tp_weaklistoffset*/
914     PyObject_SelfIter,                          /*tp_iter*/
915     (getiterfunc)Reader_iternext,           /*tp_iternext*/
916     Reader_methods,                         /*tp_methods*/
917     Reader_memberlist,                      /*tp_members*/
918     0,                                      /*tp_getset*/
919 
920 };
921 
922 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)923 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
924 {
925     PyObject * iterator, * dialect = NULL;
926     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
927 
928     if (!self)
929         return NULL;
930 
931     self->dialect = NULL;
932     self->fields = NULL;
933     self->input_iter = NULL;
934     self->field = NULL;
935     self->field_size = 0;
936     self->line_num = 0;
937 
938     if (parse_reset(self) < 0) {
939         Py_DECREF(self);
940         return NULL;
941     }
942 
943     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
944         Py_DECREF(self);
945         return NULL;
946     }
947     self->input_iter = PyObject_GetIter(iterator);
948     if (self->input_iter == NULL) {
949         PyErr_SetString(PyExc_TypeError,
950                         "argument 1 must be an iterator");
951         Py_DECREF(self);
952         return NULL;
953     }
954     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
955     if (self->dialect == NULL) {
956         Py_DECREF(self);
957         return NULL;
958     }
959 
960     PyObject_GC_Track(self);
961     return (PyObject *)self;
962 }
963 
964 /*
965  * WRITER
966  */
967 /* ---------------------------------------------------------------- */
968 static void
join_reset(WriterObj * self)969 join_reset(WriterObj *self)
970 {
971     self->rec_len = 0;
972     self->num_fields = 0;
973 }
974 
975 #define MEM_INCR 32768
976 
977 /* Calculate new record length or append field to record.  Return new
978  * record length.
979  */
980 static int
join_append_data(WriterObj * self,char * field,int quote_empty,int * quoted,int copy_phase)981 join_append_data(WriterObj *self, char *field, int quote_empty,
982                  int *quoted, int copy_phase)
983 {
984     DialectObj *dialect = self->dialect;
985     int i, rec_len;
986     char *lineterm;
987 
988 #define INCLEN \
989     do {\
990         if (!copy_phase && rec_len == INT_MAX) { \
991             goto overflow; \
992         } \
993         rec_len++; \
994     } while(0)
995 
996 #define ADDCH(c)                                \
997     do {\
998         if (copy_phase) \
999             self->rec[rec_len] = c;\
1000         INCLEN;\
1001     } while(0)
1002 
1003     lineterm = PyString_AsString(dialect->lineterminator);
1004     if (lineterm == NULL)
1005         return -1;
1006 
1007     rec_len = self->rec_len;
1008 
1009     /* If this is not the first field we need a field separator */
1010     if (self->num_fields > 0)
1011         ADDCH(dialect->delimiter);
1012 
1013     /* Handle preceding quote */
1014     if (copy_phase && *quoted)
1015         ADDCH(dialect->quotechar);
1016 
1017     /* Copy/count field data */
1018     for (i = 0;; i++) {
1019         char c = field[i];
1020         int want_escape = 0;
1021 
1022         if (c == '\0')
1023             break;
1024 
1025         if (c == dialect->delimiter ||
1026             c == dialect->escapechar ||
1027             c == dialect->quotechar ||
1028             strchr(lineterm, c)) {
1029             if (dialect->quoting == QUOTE_NONE)
1030                 want_escape = 1;
1031             else {
1032                 if (c == dialect->quotechar) {
1033                     if (dialect->doublequote)
1034                         ADDCH(dialect->quotechar);
1035                     else
1036                         want_escape = 1;
1037                 }
1038                 if (!want_escape)
1039                     *quoted = 1;
1040             }
1041             if (want_escape) {
1042                 if (!dialect->escapechar) {
1043                     PyErr_Format(error_obj,
1044                                  "need to escape, but no escapechar set");
1045                     return -1;
1046                 }
1047                 ADDCH(dialect->escapechar);
1048             }
1049         }
1050         /* Copy field character into record buffer.
1051          */
1052         ADDCH(c);
1053     }
1054 
1055     /* If field is empty check if it needs to be quoted.
1056      */
1057     if (i == 0 && quote_empty) {
1058         if (dialect->quoting == QUOTE_NONE) {
1059             PyErr_Format(error_obj,
1060                          "single empty field record must be quoted");
1061             return -1;
1062         }
1063         else
1064             *quoted = 1;
1065     }
1066 
1067     if (*quoted) {
1068         if (copy_phase)
1069             ADDCH(dialect->quotechar);
1070         else {
1071             INCLEN; /* starting quote */
1072             INCLEN; /* ending quote */
1073         }
1074     }
1075     return rec_len;
1076 
1077   overflow:
1078     PyErr_NoMemory();
1079     return -1;
1080 #undef ADDCH
1081 #undef INCLEN
1082 }
1083 
1084 static int
join_check_rec_size(WriterObj * self,int rec_len)1085 join_check_rec_size(WriterObj *self, int rec_len)
1086 {
1087 
1088     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1089         PyErr_NoMemory();
1090         return 0;
1091     }
1092 
1093     if (rec_len > self->rec_size) {
1094         if (self->rec_size == 0) {
1095             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1096             if (self->rec != NULL)
1097                 PyMem_Free(self->rec);
1098             self->rec = PyMem_Malloc(self->rec_size);
1099         }
1100         else {
1101             char *old_rec = self->rec;
1102 
1103             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1104             self->rec = PyMem_Realloc(self->rec, self->rec_size);
1105             if (self->rec == NULL)
1106                 PyMem_Free(old_rec);
1107         }
1108         if (self->rec == NULL) {
1109             PyErr_NoMemory();
1110             return 0;
1111         }
1112     }
1113     return 1;
1114 }
1115 
1116 static int
join_append(WriterObj * self,char * field,int * quoted,int quote_empty)1117 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1118 {
1119     int rec_len;
1120 
1121     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1122     if (rec_len < 0)
1123         return 0;
1124 
1125     /* grow record buffer if necessary */
1126     if (!join_check_rec_size(self, rec_len))
1127         return 0;
1128 
1129     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1130     self->num_fields++;
1131 
1132     return 1;
1133 }
1134 
1135 static int
join_append_lineterminator(WriterObj * self)1136 join_append_lineterminator(WriterObj *self)
1137 {
1138     int terminator_len;
1139     char *terminator;
1140 
1141     terminator_len = PyString_Size(self->dialect->lineterminator);
1142     if (terminator_len == -1)
1143         return 0;
1144 
1145     /* grow record buffer if necessary */
1146     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1147         return 0;
1148 
1149     terminator = PyString_AsString(self->dialect->lineterminator);
1150     if (terminator == NULL)
1151         return 0;
1152     memmove(self->rec + self->rec_len, terminator, terminator_len);
1153     self->rec_len += terminator_len;
1154 
1155     return 1;
1156 }
1157 
1158 PyDoc_STRVAR(csv_writerow_doc,
1159 "writerow(sequence)\n"
1160 "\n"
1161 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1162 "elements will be converted to string.");
1163 
1164 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1165 csv_writerow(WriterObj *self, PyObject *seq)
1166 {
1167     DialectObj *dialect = self->dialect;
1168     int len, i;
1169 
1170     if (!PySequence_Check(seq))
1171         return PyErr_Format(error_obj, "sequence expected");
1172 
1173     len = PySequence_Length(seq);
1174     if (len < 0)
1175         return NULL;
1176 
1177     /* Join all fields in internal buffer.
1178      */
1179     join_reset(self);
1180     for (i = 0; i < len; i++) {
1181         PyObject *field;
1182         int append_ok;
1183         int quoted;
1184 
1185         field = PySequence_GetItem(seq, i);
1186         if (field == NULL)
1187             return NULL;
1188 
1189         switch (dialect->quoting) {
1190         case QUOTE_NONNUMERIC:
1191             quoted = !PyNumber_Check(field);
1192             break;
1193         case QUOTE_ALL:
1194             quoted = 1;
1195             break;
1196         default:
1197             quoted = 0;
1198             break;
1199         }
1200 
1201         if (PyString_Check(field)) {
1202             append_ok = join_append(self,
1203                                     PyString_AS_STRING(field),
1204                                     &quoted, len == 1);
1205             Py_DECREF(field);
1206         }
1207         else if (field == Py_None) {
1208             append_ok = join_append(self, "", &quoted, len == 1);
1209             Py_DECREF(field);
1210         }
1211         else {
1212             PyObject *str;
1213 
1214             if (PyFloat_Check(field)) {
1215                 str = PyObject_Repr(field);
1216             } else {
1217                 str = PyObject_Str(field);
1218             }
1219             Py_DECREF(field);
1220             if (str == NULL)
1221                 return NULL;
1222 
1223             append_ok = join_append(self, PyString_AS_STRING(str),
1224                                     &quoted, len == 1);
1225             Py_DECREF(str);
1226         }
1227         if (!append_ok)
1228             return NULL;
1229     }
1230 
1231     /* Add line terminator.
1232      */
1233     if (!join_append_lineterminator(self))
1234         return 0;
1235 
1236     return PyObject_CallFunction(self->writeline,
1237                                  "(s#)", self->rec, self->rec_len);
1238 }
1239 
1240 PyDoc_STRVAR(csv_writerows_doc,
1241 "writerows(sequence of sequences)\n"
1242 "\n"
1243 "Construct and write a series of sequences to a csv file.  Non-string\n"
1244 "elements will be converted to string.");
1245 
1246 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1247 csv_writerows(WriterObj *self, PyObject *seqseq)
1248 {
1249     PyObject *row_iter, *row_obj, *result;
1250 
1251     row_iter = PyObject_GetIter(seqseq);
1252     if (row_iter == NULL) {
1253         PyErr_SetString(PyExc_TypeError,
1254                         "writerows() argument must be iterable");
1255         return NULL;
1256     }
1257     while ((row_obj = PyIter_Next(row_iter))) {
1258         result = csv_writerow(self, row_obj);
1259         Py_DECREF(row_obj);
1260         if (!result) {
1261             Py_DECREF(row_iter);
1262             return NULL;
1263         }
1264         else
1265              Py_DECREF(result);
1266     }
1267     Py_DECREF(row_iter);
1268     if (PyErr_Occurred())
1269         return NULL;
1270     Py_INCREF(Py_None);
1271     return Py_None;
1272 }
1273 
1274 static struct PyMethodDef Writer_methods[] = {
1275     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1276     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1277     { NULL, NULL }
1278 };
1279 
1280 #define W_OFF(x) offsetof(WriterObj, x)
1281 
1282 static struct PyMemberDef Writer_memberlist[] = {
1283     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1284     { NULL }
1285 };
1286 
1287 static void
Writer_dealloc(WriterObj * self)1288 Writer_dealloc(WriterObj *self)
1289 {
1290     PyObject_GC_UnTrack(self);
1291     Py_XDECREF(self->dialect);
1292     Py_XDECREF(self->writeline);
1293     if (self->rec != NULL)
1294         PyMem_Free(self->rec);
1295     PyObject_GC_Del(self);
1296 }
1297 
1298 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1299 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1300 {
1301     Py_VISIT(self->dialect);
1302     Py_VISIT(self->writeline);
1303     return 0;
1304 }
1305 
1306 static int
Writer_clear(WriterObj * self)1307 Writer_clear(WriterObj *self)
1308 {
1309     Py_CLEAR(self->dialect);
1310     Py_CLEAR(self->writeline);
1311     return 0;
1312 }
1313 
1314 PyDoc_STRVAR(Writer_Type_doc,
1315 "CSV writer\n"
1316 "\n"
1317 "Writer objects are responsible for generating tabular data\n"
1318 "in CSV format from sequence input.\n"
1319 );
1320 
1321 static PyTypeObject Writer_Type = {
1322     PyVarObject_HEAD_INIT(NULL, 0)
1323     "_csv.writer",                          /*tp_name*/
1324     sizeof(WriterObj),                      /*tp_basicsize*/
1325     0,                                      /*tp_itemsize*/
1326     /* methods */
1327     (destructor)Writer_dealloc,             /*tp_dealloc*/
1328     (printfunc)0,                           /*tp_print*/
1329     (getattrfunc)0,                         /*tp_getattr*/
1330     (setattrfunc)0,                         /*tp_setattr*/
1331     (cmpfunc)0,                             /*tp_compare*/
1332     (reprfunc)0,                            /*tp_repr*/
1333     0,                                      /*tp_as_number*/
1334     0,                                      /*tp_as_sequence*/
1335     0,                                      /*tp_as_mapping*/
1336     (hashfunc)0,                            /*tp_hash*/
1337     (ternaryfunc)0,                         /*tp_call*/
1338     (reprfunc)0,                            /*tp_str*/
1339     0,                                      /*tp_getattro*/
1340     0,                                      /*tp_setattro*/
1341     0,                                      /*tp_as_buffer*/
1342     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1343         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1344     Writer_Type_doc,
1345     (traverseproc)Writer_traverse,          /*tp_traverse*/
1346     (inquiry)Writer_clear,                  /*tp_clear*/
1347     0,                                      /*tp_richcompare*/
1348     0,                                      /*tp_weaklistoffset*/
1349     (getiterfunc)0,                         /*tp_iter*/
1350     (getiterfunc)0,                         /*tp_iternext*/
1351     Writer_methods,                         /*tp_methods*/
1352     Writer_memberlist,                      /*tp_members*/
1353     0,                                      /*tp_getset*/
1354 };
1355 
1356 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1357 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1358 {
1359     PyObject * output_file, * dialect = NULL;
1360     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1361 
1362     if (!self)
1363         return NULL;
1364 
1365     self->dialect = NULL;
1366     self->writeline = NULL;
1367 
1368     self->rec = NULL;
1369     self->rec_size = 0;
1370     self->rec_len = 0;
1371     self->num_fields = 0;
1372 
1373     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1374         Py_DECREF(self);
1375         return NULL;
1376     }
1377     self->writeline = PyObject_GetAttrString(output_file, "write");
1378     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1379         PyErr_SetString(PyExc_TypeError,
1380                         "argument 1 must have a \"write\" method");
1381         Py_DECREF(self);
1382         return NULL;
1383     }
1384     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1385     if (self->dialect == NULL) {
1386         Py_DECREF(self);
1387         return NULL;
1388     }
1389     PyObject_GC_Track(self);
1390     return (PyObject *)self;
1391 }
1392 
1393 /*
1394  * DIALECT REGISTRY
1395  */
1396 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1397 csv_list_dialects(PyObject *module, PyObject *args)
1398 {
1399     return PyDict_Keys(dialects);
1400 }
1401 
1402 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1403 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1404 {
1405     PyObject *name_obj, *dialect_obj = NULL;
1406     PyObject *dialect;
1407 
1408     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1409         return NULL;
1410     if (!IS_BASESTRING(name_obj)) {
1411         PyErr_SetString(PyExc_TypeError,
1412                         "dialect name must be a string or unicode");
1413         return NULL;
1414     }
1415     dialect = _call_dialect(dialect_obj, kwargs);
1416     if (dialect == NULL)
1417         return NULL;
1418     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1419         Py_DECREF(dialect);
1420         return NULL;
1421     }
1422     Py_DECREF(dialect);
1423     Py_INCREF(Py_None);
1424     return Py_None;
1425 }
1426 
1427 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1428 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1429 {
1430     if (PyDict_DelItem(dialects, name_obj) < 0)
1431         return PyErr_Format(error_obj, "unknown dialect");
1432     Py_INCREF(Py_None);
1433     return Py_None;
1434 }
1435 
1436 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1437 csv_get_dialect(PyObject *module, PyObject *name_obj)
1438 {
1439     return get_dialect_from_registry(name_obj);
1440 }
1441 
1442 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1443 csv_field_size_limit(PyObject *module, PyObject *args)
1444 {
1445     PyObject *new_limit = NULL;
1446     long old_limit = field_limit;
1447 
1448     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1449         return NULL;
1450     if (new_limit != NULL) {
1451         if (!PyInt_Check(new_limit)) {
1452             PyErr_Format(PyExc_TypeError,
1453                          "limit must be an integer");
1454             return NULL;
1455         }
1456         field_limit = PyInt_AsLong(new_limit);
1457     }
1458     return PyInt_FromLong(old_limit);
1459 }
1460 
1461 /*
1462  * MODULE
1463  */
1464 
1465 PyDoc_STRVAR(csv_module_doc,
1466 "CSV parsing and writing.\n"
1467 "\n"
1468 "This module provides classes that assist in the reading and writing\n"
1469 "of Comma Separated Value (CSV) files, and implements the interface\n"
1470 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1471 "the format is not formally defined by a stable specification and\n"
1472 "is subtle enough that parsing lines of a CSV file with something\n"
1473 "like line.split(\",\") is bound to fail.  The module supports three\n"
1474 "basic APIs: reading, writing, and registration of dialects.\n"
1475 "\n"
1476 "\n"
1477 "DIALECT REGISTRATION:\n"
1478 "\n"
1479 "Readers and writers support a dialect argument, which is a convenient\n"
1480 "handle on a group of settings.  When the dialect argument is a string,\n"
1481 "it identifies one of the dialects previously registered with the module.\n"
1482 "If it is a class or instance, the attributes of the argument are used as\n"
1483 "the settings for the reader or writer:\n"
1484 "\n"
1485 "    class excel:\n"
1486 "        delimiter = ','\n"
1487 "        quotechar = '\"'\n"
1488 "        escapechar = None\n"
1489 "        doublequote = True\n"
1490 "        skipinitialspace = False\n"
1491 "        lineterminator = '\\r\\n'\n"
1492 "        quoting = QUOTE_MINIMAL\n"
1493 "\n"
1494 "SETTINGS:\n"
1495 "\n"
1496 "    * quotechar - specifies a one-character string to use as the \n"
1497 "        quoting character.  It defaults to '\"'.\n"
1498 "    * delimiter - specifies a one-character string to use as the \n"
1499 "        field separator.  It defaults to ','.\n"
1500 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1501 "        immediately follows a delimiter.  It defaults to False, which\n"
1502 "        means that whitespace immediately following a delimiter is part\n"
1503 "        of the following field.\n"
1504 "    * lineterminator -  specifies the character sequence which should \n"
1505 "        terminate rows.\n"
1506 "    * quoting - controls when quotes should be generated by the writer.\n"
1507 "        It can take on any of the following module constants:\n"
1508 "\n"
1509 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1510 "            field contains either the quotechar or the delimiter\n"
1511 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1512 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1513 "            fields which do not parse as integers or floating point\n"
1514 "            numbers.\n"
1515 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1516 "    * escapechar - specifies a one-character string used to escape \n"
1517 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1518 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1519 "        True, two consecutive quotes are interpreted as one during read,\n"
1520 "        and when writing, each quote character embedded in the data is\n"
1521 "        written as two quotes\n");
1522 
1523 PyDoc_STRVAR(csv_reader_doc,
1524 "    csv_reader = reader(iterable [, dialect='excel']\n"
1525 "                        [optional keyword args])\n"
1526 "    for row in csv_reader:\n"
1527 "        process(row)\n"
1528 "\n"
1529 "The \"iterable\" argument can be any object that returns a line\n"
1530 "of input for each iteration, such as a file object or a list.  The\n"
1531 "optional \"dialect\" parameter is discussed below.  The function\n"
1532 "also accepts optional keyword arguments which override settings\n"
1533 "provided by the dialect.\n"
1534 "\n"
1535 "The returned object is an iterator.  Each iteration returns a row\n"
1536 "of the CSV file (which can span multiple input lines).\n");
1537 
1538 PyDoc_STRVAR(csv_writer_doc,
1539 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1540 "                            [optional keyword args])\n"
1541 "    for row in sequence:\n"
1542 "        csv_writer.writerow(row)\n"
1543 "\n"
1544 "    [or]\n"
1545 "\n"
1546 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1547 "                            [optional keyword args])\n"
1548 "    csv_writer.writerows(rows)\n"
1549 "\n"
1550 "The \"fileobj\" argument can be any object that supports the file API.\n");
1551 
1552 PyDoc_STRVAR(csv_list_dialects_doc,
1553 "Return a list of all know dialect names.\n"
1554 "    names = csv.list_dialects()");
1555 
1556 PyDoc_STRVAR(csv_get_dialect_doc,
1557 "Return the dialect instance associated with name.\n"
1558 "    dialect = csv.get_dialect(name)");
1559 
1560 PyDoc_STRVAR(csv_register_dialect_doc,
1561 "Create a mapping from a string name to a dialect class.\n"
1562 "    dialect = csv.register_dialect(name, dialect)");
1563 
1564 PyDoc_STRVAR(csv_unregister_dialect_doc,
1565 "Delete the name/dialect mapping associated with a string name.\n"
1566 "    csv.unregister_dialect(name)");
1567 
1568 PyDoc_STRVAR(csv_field_size_limit_doc,
1569 "Sets an upper limit on parsed fields.\n"
1570 "    csv.field_size_limit([limit])\n"
1571 "\n"
1572 "Returns old limit. If limit is not given, no new limit is set and\n"
1573 "the old limit is returned");
1574 
1575 static struct PyMethodDef csv_methods[] = {
1576     { "reader", (PyCFunction)csv_reader,
1577         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1578     { "writer", (PyCFunction)csv_writer,
1579         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1580     { "list_dialects", (PyCFunction)csv_list_dialects,
1581         METH_NOARGS, csv_list_dialects_doc},
1582     { "register_dialect", (PyCFunction)csv_register_dialect,
1583         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1584     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1585         METH_O, csv_unregister_dialect_doc},
1586     { "get_dialect", (PyCFunction)csv_get_dialect,
1587         METH_O, csv_get_dialect_doc},
1588     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1589         METH_VARARGS, csv_field_size_limit_doc},
1590     { NULL, NULL }
1591 };
1592 
1593 PyMODINIT_FUNC
init_csv(void)1594 init_csv(void)
1595 {
1596     PyObject *module;
1597     StyleDesc *style;
1598 
1599     if (PyType_Ready(&Dialect_Type) < 0)
1600         return;
1601 
1602     if (PyType_Ready(&Reader_Type) < 0)
1603         return;
1604 
1605     if (PyType_Ready(&Writer_Type) < 0)
1606         return;
1607 
1608     /* Create the module and add the functions */
1609     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1610     if (module == NULL)
1611         return;
1612 
1613     /* Add version to the module. */
1614     if (PyModule_AddStringConstant(module, "__version__",
1615                                    MODULE_VERSION) == -1)
1616         return;
1617 
1618     /* Add _dialects dictionary */
1619     dialects = PyDict_New();
1620     if (dialects == NULL)
1621         return;
1622     if (PyModule_AddObject(module, "_dialects", dialects))
1623         return;
1624 
1625     /* Add quote styles into dictionary */
1626     for (style = quote_styles; style->name; style++) {
1627         if (PyModule_AddIntConstant(module, style->name,
1628                                     style->style) == -1)
1629             return;
1630     }
1631 
1632     /* Add the Dialect type */
1633     Py_INCREF(&Dialect_Type);
1634     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1635         return;
1636 
1637     /* Add the CSV exception object to the module. */
1638     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1639     if (error_obj == NULL)
1640         return;
1641     PyModule_AddObject(module, "Error", error_obj);
1642 }
1643