1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
11 **** 2.2.
12
13 */
14
15 #define MODULE_VERSION "1.0"
16
17 #include "Python.h"
18 #include "structmember.h"
19
20
21 /* begin 2.2 compatibility macros */
22 #ifndef PyDoc_STRVAR
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
28 #else
29 #define PyDoc_STR(str) ""
30 #endif
31 #endif /* ifndef PyDoc_STRVAR */
32
33 #ifndef PyMODINIT_FUNC
34 # if defined(__cplusplus)
35 # define PyMODINIT_FUNC extern "C" void
36 # else /* __cplusplus */
37 # define PyMODINIT_FUNC void
38 # endif /* __cplusplus */
39 #endif
40
41 #ifndef Py_CLEAR
42 #define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50 #endif
51 #ifndef Py_VISIT
52 #define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60 #endif
61
62 /* end 2.2 compatibility macros */
63
64 #define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
66
67 static PyObject *error_obj; /* CSV exception */
68 static PyObject *dialects; /* Dialect registry */
69 static long field_limit = 128 * 1024; /* max parsed field size */
70
71 typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
75 } ParserState;
76
77 typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79 } QuoteStyle;
80
81 typedef struct {
82 QuoteStyle style;
83 char *name;
84 } StyleDesc;
85
86 static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92 };
93
94 typedef struct {
95 PyObject_HEAD
96
97 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
106 } DialectObj;
107
108 staticforward PyTypeObject Dialect_Type;
109
110 typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
124 } ReaderObj;
125
126 staticforward PyTypeObject Reader_Type;
127
128 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
129
130 typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141 } WriterObj;
142
143 staticforward PyTypeObject Writer_Type;
144
145 /*
146 * DIALECT class
147 */
148
149 static PyObject *
get_dialect_from_registry(PyObject * name_obj)150 get_dialect_from_registry(PyObject * name_obj)
151 {
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
162 }
163
164 static PyObject *
get_string(PyObject * str)165 get_string(PyObject *str)
166 {
167 Py_XINCREF(str);
168 return str;
169 }
170
171 static PyObject *
get_nullchar_as_None(char c)172 get_nullchar_as_None(char c)
173 {
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
180 }
181
182 static PyObject *
Dialect_get_lineterminator(DialectObj * self)183 Dialect_get_lineterminator(DialectObj *self)
184 {
185 return get_string(self->lineterminator);
186 }
187
188 static PyObject *
Dialect_get_escapechar(DialectObj * self)189 Dialect_get_escapechar(DialectObj *self)
190 {
191 return get_nullchar_as_None(self->escapechar);
192 }
193
194 static PyObject *
Dialect_get_quotechar(DialectObj * self)195 Dialect_get_quotechar(DialectObj *self)
196 {
197 return get_nullchar_as_None(self->quotechar);
198 }
199
200 static PyObject *
Dialect_get_quoting(DialectObj * self)201 Dialect_get_quoting(DialectObj *self)
202 {
203 return PyInt_FromLong(self->quoting);
204 }
205
206 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
208 {
209 if (src == NULL)
210 *target = dflt;
211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
217 return 0;
218 }
219
220 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)221 _set_int(const char *name, int *target, PyObject *src, int dflt)
222 {
223 int value;
224 if (src == NULL)
225 *target = dflt;
226 else {
227 if (!_PyAnyInt_Check(src)) {
228 PyErr_Format(PyExc_TypeError,
229 "\"%s\" must be an integer", name);
230 return -1;
231 }
232 value = PyInt_AsLong(src);
233 if (value == -1 && PyErr_Occurred())
234 return -1;
235 *target = value;
236 }
237 return 0;
238 }
239
240 static int
_set_char(const char * name,char * target,PyObject * src,char dflt)241 _set_char(const char *name, char *target, PyObject *src, char dflt)
242 {
243 if (src == NULL)
244 *target = dflt;
245 else {
246 *target = '\0';
247 if (src != Py_None) {
248 Py_ssize_t len;
249 if (!PyString_Check(src)) {
250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be string, not %.200s", name,
252 src->ob_type->tp_name);
253 return -1;
254 }
255 len = PyString_GET_SIZE(src);
256 if (len > 1) {
257 PyErr_Format(PyExc_TypeError,
258 "\"%s\" must be an 1-character string",
259 name);
260 return -1;
261 }
262 if (len > 0)
263 *target = *PyString_AS_STRING(src);
264 }
265 }
266 return 0;
267 }
268
269 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)270 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
271 {
272 if (src == NULL)
273 *target = PyString_FromString(dflt);
274 else {
275 if (src == Py_None)
276 *target = NULL;
277 else if (!IS_BASESTRING(src)) {
278 PyErr_Format(PyExc_TypeError,
279 "\"%s\" must be a string", name);
280 return -1;
281 }
282 else {
283 Py_INCREF(src);
284 Py_XSETREF(*target, src);
285 }
286 }
287 return 0;
288 }
289
290 static int
dialect_check_quoting(int quoting)291 dialect_check_quoting(int quoting)
292 {
293 StyleDesc *qs = quote_styles;
294
295 for (qs = quote_styles; qs->name; qs++) {
296 if (qs->style == quoting)
297 return 0;
298 }
299 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
300 return -1;
301 }
302
303 #define D_OFF(x) offsetof(DialectObj, x)
304
305 static struct PyMemberDef Dialect_memberlist[] = {
306 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
307 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
308 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
309 { "strict", T_INT, D_OFF(strict), READONLY },
310 { NULL }
311 };
312
313 static PyGetSetDef Dialect_getsetlist[] = {
314 { "escapechar", (getter)Dialect_get_escapechar},
315 { "lineterminator", (getter)Dialect_get_lineterminator},
316 { "quotechar", (getter)Dialect_get_quotechar},
317 { "quoting", (getter)Dialect_get_quoting},
318 {NULL},
319 };
320
321 static void
Dialect_dealloc(DialectObj * self)322 Dialect_dealloc(DialectObj *self)
323 {
324 Py_XDECREF(self->lineterminator);
325 Py_TYPE(self)->tp_free((PyObject *)self);
326 }
327
328 static char *dialect_kws[] = {
329 "dialect",
330 "delimiter",
331 "doublequote",
332 "escapechar",
333 "lineterminator",
334 "quotechar",
335 "quoting",
336 "skipinitialspace",
337 "strict",
338 NULL
339 };
340
341 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)342 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
343 {
344 DialectObj *self;
345 PyObject *ret = NULL;
346 PyObject *dialect = NULL;
347 PyObject *delimiter = NULL;
348 PyObject *doublequote = NULL;
349 PyObject *escapechar = NULL;
350 PyObject *lineterminator = NULL;
351 PyObject *quotechar = NULL;
352 PyObject *quoting = NULL;
353 PyObject *skipinitialspace = NULL;
354 PyObject *strict = NULL;
355
356 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
357 "|OOOOOOOOO", dialect_kws,
358 &dialect,
359 &delimiter,
360 &doublequote,
361 &escapechar,
362 &lineterminator,
363 "echar,
364 "ing,
365 &skipinitialspace,
366 &strict))
367 return NULL;
368
369 if (dialect != NULL) {
370 if (IS_BASESTRING(dialect)) {
371 dialect = get_dialect_from_registry(dialect);
372 if (dialect == NULL)
373 return NULL;
374 }
375 else
376 Py_INCREF(dialect);
377 /* Can we reuse this instance? */
378 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
379 delimiter == 0 &&
380 doublequote == 0 &&
381 escapechar == 0 &&
382 lineterminator == 0 &&
383 quotechar == 0 &&
384 quoting == 0 &&
385 skipinitialspace == 0 &&
386 strict == 0)
387 return dialect;
388 }
389
390 self = (DialectObj *)type->tp_alloc(type, 0);
391 if (self == NULL) {
392 Py_XDECREF(dialect);
393 return NULL;
394 }
395 self->lineterminator = NULL;
396
397 Py_XINCREF(delimiter);
398 Py_XINCREF(doublequote);
399 Py_XINCREF(escapechar);
400 Py_XINCREF(lineterminator);
401 Py_XINCREF(quotechar);
402 Py_XINCREF(quoting);
403 Py_XINCREF(skipinitialspace);
404 Py_XINCREF(strict);
405 if (dialect != NULL) {
406 #define DIALECT_GETATTR(v, n) \
407 if (v == NULL) \
408 v = PyObject_GetAttrString(dialect, n)
409 DIALECT_GETATTR(delimiter, "delimiter");
410 DIALECT_GETATTR(doublequote, "doublequote");
411 DIALECT_GETATTR(escapechar, "escapechar");
412 DIALECT_GETATTR(lineterminator, "lineterminator");
413 DIALECT_GETATTR(quotechar, "quotechar");
414 DIALECT_GETATTR(quoting, "quoting");
415 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
416 DIALECT_GETATTR(strict, "strict");
417 PyErr_Clear();
418 }
419
420 /* check types and convert to C values */
421 #define DIASET(meth, name, target, src, dflt) \
422 if (meth(name, target, src, dflt)) \
423 goto err
424 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
425 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
426 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
427 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
428 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
429 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
430 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
431 DIASET(_set_bool, "strict", &self->strict, strict, 0);
432
433 /* validate options */
434 if (dialect_check_quoting(self->quoting))
435 goto err;
436 if (self->delimiter == 0) {
437 PyErr_SetString(PyExc_TypeError,
438 "\"delimiter\" must be an 1-character string");
439 goto err;
440 }
441 if (quotechar == Py_None && quoting == NULL)
442 self->quoting = QUOTE_NONE;
443 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
444 PyErr_SetString(PyExc_TypeError,
445 "quotechar must be set if quoting enabled");
446 goto err;
447 }
448 if (self->lineterminator == 0) {
449 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
450 goto err;
451 }
452
453 ret = (PyObject *)self;
454 Py_INCREF(self);
455 err:
456 Py_XDECREF(self);
457 Py_XDECREF(dialect);
458 Py_XDECREF(delimiter);
459 Py_XDECREF(doublequote);
460 Py_XDECREF(escapechar);
461 Py_XDECREF(lineterminator);
462 Py_XDECREF(quotechar);
463 Py_XDECREF(quoting);
464 Py_XDECREF(skipinitialspace);
465 Py_XDECREF(strict);
466 return ret;
467 }
468
469
470 PyDoc_STRVAR(Dialect_Type_doc,
471 "CSV dialect\n"
472 "\n"
473 "The Dialect type records CSV parsing and generation options.\n");
474
475 static PyTypeObject Dialect_Type = {
476 PyVarObject_HEAD_INIT(NULL, 0)
477 "_csv.Dialect", /* tp_name */
478 sizeof(DialectObj), /* tp_basicsize */
479 0, /* tp_itemsize */
480 /* methods */
481 (destructor)Dialect_dealloc, /* tp_dealloc */
482 (printfunc)0, /* tp_print */
483 (getattrfunc)0, /* tp_getattr */
484 (setattrfunc)0, /* tp_setattr */
485 (cmpfunc)0, /* tp_compare */
486 (reprfunc)0, /* tp_repr */
487 0, /* tp_as_number */
488 0, /* tp_as_sequence */
489 0, /* tp_as_mapping */
490 (hashfunc)0, /* tp_hash */
491 (ternaryfunc)0, /* tp_call */
492 (reprfunc)0, /* tp_str */
493 0, /* tp_getattro */
494 0, /* tp_setattro */
495 0, /* tp_as_buffer */
496 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
497 Dialect_Type_doc, /* tp_doc */
498 0, /* tp_traverse */
499 0, /* tp_clear */
500 0, /* tp_richcompare */
501 0, /* tp_weaklistoffset */
502 0, /* tp_iter */
503 0, /* tp_iternext */
504 0, /* tp_methods */
505 Dialect_memberlist, /* tp_members */
506 Dialect_getsetlist, /* tp_getset */
507 0, /* tp_base */
508 0, /* tp_dict */
509 0, /* tp_descr_get */
510 0, /* tp_descr_set */
511 0, /* tp_dictoffset */
512 0, /* tp_init */
513 0, /* tp_alloc */
514 dialect_new, /* tp_new */
515 0, /* tp_free */
516 };
517
518 /*
519 * Return an instance of the dialect type, given a Python instance or kwarg
520 * description of the dialect
521 */
522 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)523 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
524 {
525 PyObject *ctor_args;
526 PyObject *dialect;
527
528 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
529 if (ctor_args == NULL)
530 return NULL;
531 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
532 Py_DECREF(ctor_args);
533 return dialect;
534 }
535
536 /*
537 * READER
538 */
539 static int
parse_save_field(ReaderObj * self)540 parse_save_field(ReaderObj *self)
541 {
542 PyObject *field;
543
544 field = PyString_FromStringAndSize(self->field, self->field_len);
545 if (field == NULL)
546 return -1;
547 self->field_len = 0;
548 if (self->numeric_field) {
549 PyObject *tmp;
550
551 self->numeric_field = 0;
552 tmp = PyNumber_Float(field);
553 if (tmp == NULL) {
554 Py_DECREF(field);
555 return -1;
556 }
557 Py_DECREF(field);
558 field = tmp;
559 }
560 PyList_Append(self->fields, field);
561 Py_DECREF(field);
562 return 0;
563 }
564
565 static int
parse_grow_buff(ReaderObj * self)566 parse_grow_buff(ReaderObj *self)
567 {
568 unsigned field_size_new;
569 char *field_new;
570
571 assert((unsigned)self->field_size <= INT_MAX);
572
573 field_size_new = self->field_size ? 2 * (unsigned)self->field_size : 4096;
574 if (field_size_new > INT_MAX) {
575 PyErr_NoMemory();
576 return 0;
577 }
578 field_new = (char *)PyMem_Realloc(self->field, field_size_new);
579 if (field_new == NULL) {
580 PyErr_NoMemory();
581 return 0;
582 }
583 self->field = field_new;
584 self->field_size = (int)field_size_new;
585 return 1;
586 }
587
588 static int
parse_add_char(ReaderObj * self,char c)589 parse_add_char(ReaderObj *self, char c)
590 {
591 if (self->field_len >= field_limit) {
592 PyErr_Format(error_obj, "field larger than field limit (%ld)",
593 field_limit);
594 return -1;
595 }
596 if (self->field_len == self->field_size && !parse_grow_buff(self))
597 return -1;
598 self->field[self->field_len++] = c;
599 return 0;
600 }
601
602 static int
parse_process_char(ReaderObj * self,char c)603 parse_process_char(ReaderObj *self, char c)
604 {
605 DialectObj *dialect = self->dialect;
606
607 switch (self->state) {
608 case START_RECORD:
609 /* start of record */
610 if (c == '\0')
611 /* empty line - return [] */
612 break;
613 else if (c == '\n' || c == '\r') {
614 self->state = EAT_CRNL;
615 break;
616 }
617 /* normal character - handle as START_FIELD */
618 self->state = START_FIELD;
619 /* fallthru */
620 case START_FIELD:
621 /* expecting field */
622 if (c == '\n' || c == '\r' || c == '\0') {
623 /* save empty field - return [fields] */
624 if (parse_save_field(self) < 0)
625 return -1;
626 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
627 }
628 else if (c == dialect->quotechar &&
629 dialect->quoting != QUOTE_NONE) {
630 /* start quoted field */
631 self->state = IN_QUOTED_FIELD;
632 }
633 else if (c == dialect->escapechar) {
634 /* possible escaped character */
635 self->state = ESCAPED_CHAR;
636 }
637 else if (c == ' ' && dialect->skipinitialspace)
638 /* ignore space at start of field */
639 ;
640 else if (c == dialect->delimiter) {
641 /* save empty field */
642 if (parse_save_field(self) < 0)
643 return -1;
644 }
645 else {
646 /* begin new unquoted field */
647 if (dialect->quoting == QUOTE_NONNUMERIC)
648 self->numeric_field = 1;
649 if (parse_add_char(self, c) < 0)
650 return -1;
651 self->state = IN_FIELD;
652 }
653 break;
654
655 case ESCAPED_CHAR:
656 if (c == '\0')
657 c = '\n';
658 if (parse_add_char(self, c) < 0)
659 return -1;
660 self->state = IN_FIELD;
661 break;
662
663 case IN_FIELD:
664 /* in unquoted field */
665 if (c == '\n' || c == '\r' || c == '\0') {
666 /* end of line - return [fields] */
667 if (parse_save_field(self) < 0)
668 return -1;
669 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
670 }
671 else if (c == dialect->escapechar) {
672 /* possible escaped character */
673 self->state = ESCAPED_CHAR;
674 }
675 else if (c == dialect->delimiter) {
676 /* save field - wait for new field */
677 if (parse_save_field(self) < 0)
678 return -1;
679 self->state = START_FIELD;
680 }
681 else {
682 /* normal character - save in field */
683 if (parse_add_char(self, c) < 0)
684 return -1;
685 }
686 break;
687
688 case IN_QUOTED_FIELD:
689 /* in quoted field */
690 if (c == '\0')
691 ;
692 else if (c == dialect->escapechar) {
693 /* Possible escape character */
694 self->state = ESCAPE_IN_QUOTED_FIELD;
695 }
696 else if (c == dialect->quotechar &&
697 dialect->quoting != QUOTE_NONE) {
698 if (dialect->doublequote) {
699 /* doublequote; " represented by "" */
700 self->state = QUOTE_IN_QUOTED_FIELD;
701 }
702 else {
703 /* end of quote part of field */
704 self->state = IN_FIELD;
705 }
706 }
707 else {
708 /* normal character - save in field */
709 if (parse_add_char(self, c) < 0)
710 return -1;
711 }
712 break;
713
714 case ESCAPE_IN_QUOTED_FIELD:
715 if (c == '\0')
716 c = '\n';
717 if (parse_add_char(self, c) < 0)
718 return -1;
719 self->state = IN_QUOTED_FIELD;
720 break;
721
722 case QUOTE_IN_QUOTED_FIELD:
723 /* doublequote - seen a quote in a quoted field */
724 if (dialect->quoting != QUOTE_NONE &&
725 c == dialect->quotechar) {
726 /* save "" as " */
727 if (parse_add_char(self, c) < 0)
728 return -1;
729 self->state = IN_QUOTED_FIELD;
730 }
731 else if (c == dialect->delimiter) {
732 /* save field - wait for new field */
733 if (parse_save_field(self) < 0)
734 return -1;
735 self->state = START_FIELD;
736 }
737 else if (c == '\n' || c == '\r' || c == '\0') {
738 /* end of line - return [fields] */
739 if (parse_save_field(self) < 0)
740 return -1;
741 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
742 }
743 else if (!dialect->strict) {
744 if (parse_add_char(self, c) < 0)
745 return -1;
746 self->state = IN_FIELD;
747 }
748 else {
749 /* illegal */
750 PyErr_Format(error_obj, "'%c' expected after '%c'",
751 dialect->delimiter,
752 dialect->quotechar);
753 return -1;
754 }
755 break;
756
757 case EAT_CRNL:
758 if (c == '\n' || c == '\r')
759 ;
760 else if (c == '\0')
761 self->state = START_RECORD;
762 else {
763 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
764 return -1;
765 }
766 break;
767
768 }
769 return 0;
770 }
771
772 static int
parse_reset(ReaderObj * self)773 parse_reset(ReaderObj *self)
774 {
775 Py_XSETREF(self->fields, PyList_New(0));
776 if (self->fields == NULL)
777 return -1;
778 self->field_len = 0;
779 self->state = START_RECORD;
780 self->numeric_field = 0;
781 return 0;
782 }
783
784 static PyObject *
Reader_iternext(ReaderObj * self)785 Reader_iternext(ReaderObj *self)
786 {
787 PyObject *lineobj;
788 PyObject *fields = NULL;
789 char *line, c;
790 int linelen;
791
792 if (parse_reset(self) < 0)
793 return NULL;
794 do {
795 lineobj = PyIter_Next(self->input_iter);
796 if (lineobj == NULL) {
797 /* End of input OR exception */
798 if (!PyErr_Occurred() && (self->field_len != 0 ||
799 self->state == IN_QUOTED_FIELD)) {
800 if (self->dialect->strict)
801 PyErr_SetString(error_obj, "unexpected end of data");
802 else if (parse_save_field(self) >= 0 )
803 break;
804 }
805 return NULL;
806 }
807 ++self->line_num;
808
809 line = PyString_AsString(lineobj);
810 linelen = PyString_Size(lineobj);
811
812 if (line == NULL || linelen < 0) {
813 Py_DECREF(lineobj);
814 return NULL;
815 }
816 while (linelen--) {
817 c = *line++;
818 if (c == '\0') {
819 Py_DECREF(lineobj);
820 PyErr_Format(error_obj,
821 "line contains NULL byte");
822 goto err;
823 }
824 if (parse_process_char(self, c) < 0) {
825 Py_DECREF(lineobj);
826 goto err;
827 }
828 }
829 Py_DECREF(lineobj);
830 if (parse_process_char(self, 0) < 0)
831 goto err;
832 } while (self->state != START_RECORD);
833
834 fields = self->fields;
835 self->fields = NULL;
836 err:
837 return fields;
838 }
839
840 static void
Reader_dealloc(ReaderObj * self)841 Reader_dealloc(ReaderObj *self)
842 {
843 PyObject_GC_UnTrack(self);
844 Py_XDECREF(self->dialect);
845 Py_XDECREF(self->input_iter);
846 Py_XDECREF(self->fields);
847 if (self->field != NULL)
848 PyMem_Free(self->field);
849 PyObject_GC_Del(self);
850 }
851
852 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)853 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
854 {
855 Py_VISIT(self->dialect);
856 Py_VISIT(self->input_iter);
857 Py_VISIT(self->fields);
858 return 0;
859 }
860
861 static int
Reader_clear(ReaderObj * self)862 Reader_clear(ReaderObj *self)
863 {
864 Py_CLEAR(self->dialect);
865 Py_CLEAR(self->input_iter);
866 Py_CLEAR(self->fields);
867 return 0;
868 }
869
870 PyDoc_STRVAR(Reader_Type_doc,
871 "CSV reader\n"
872 "\n"
873 "Reader objects are responsible for reading and parsing tabular data\n"
874 "in CSV format.\n"
875 );
876
877 static struct PyMethodDef Reader_methods[] = {
878 { NULL, NULL }
879 };
880 #define R_OFF(x) offsetof(ReaderObj, x)
881
882 static struct PyMemberDef Reader_memberlist[] = {
883 { "dialect", T_OBJECT, R_OFF(dialect), RO },
884 { "line_num", T_ULONG, R_OFF(line_num), RO },
885 { NULL }
886 };
887
888
889 static PyTypeObject Reader_Type = {
890 PyVarObject_HEAD_INIT(NULL, 0)
891 "_csv.reader", /*tp_name*/
892 sizeof(ReaderObj), /*tp_basicsize*/
893 0, /*tp_itemsize*/
894 /* methods */
895 (destructor)Reader_dealloc, /*tp_dealloc*/
896 (printfunc)0, /*tp_print*/
897 (getattrfunc)0, /*tp_getattr*/
898 (setattrfunc)0, /*tp_setattr*/
899 (cmpfunc)0, /*tp_compare*/
900 (reprfunc)0, /*tp_repr*/
901 0, /*tp_as_number*/
902 0, /*tp_as_sequence*/
903 0, /*tp_as_mapping*/
904 (hashfunc)0, /*tp_hash*/
905 (ternaryfunc)0, /*tp_call*/
906 (reprfunc)0, /*tp_str*/
907 0, /*tp_getattro*/
908 0, /*tp_setattro*/
909 0, /*tp_as_buffer*/
910 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
911 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
912 Reader_Type_doc, /*tp_doc*/
913 (traverseproc)Reader_traverse, /*tp_traverse*/
914 (inquiry)Reader_clear, /*tp_clear*/
915 0, /*tp_richcompare*/
916 0, /*tp_weaklistoffset*/
917 PyObject_SelfIter, /*tp_iter*/
918 (getiterfunc)Reader_iternext, /*tp_iternext*/
919 Reader_methods, /*tp_methods*/
920 Reader_memberlist, /*tp_members*/
921 0, /*tp_getset*/
922
923 };
924
925 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)926 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
927 {
928 PyObject * iterator, * dialect = NULL;
929 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
930
931 if (!self)
932 return NULL;
933
934 self->dialect = NULL;
935 self->fields = NULL;
936 self->input_iter = NULL;
937 self->field = NULL;
938 self->field_size = 0;
939 self->line_num = 0;
940
941 if (parse_reset(self) < 0) {
942 Py_DECREF(self);
943 return NULL;
944 }
945
946 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
947 Py_DECREF(self);
948 return NULL;
949 }
950 self->input_iter = PyObject_GetIter(iterator);
951 if (self->input_iter == NULL) {
952 PyErr_SetString(PyExc_TypeError,
953 "argument 1 must be an iterator");
954 Py_DECREF(self);
955 return NULL;
956 }
957 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
958 if (self->dialect == NULL) {
959 Py_DECREF(self);
960 return NULL;
961 }
962
963 PyObject_GC_Track(self);
964 return (PyObject *)self;
965 }
966
967 /*
968 * WRITER
969 */
970 /* ---------------------------------------------------------------- */
971 static void
join_reset(WriterObj * self)972 join_reset(WriterObj *self)
973 {
974 self->rec_len = 0;
975 self->num_fields = 0;
976 }
977
978 #define MEM_INCR 32768
979
980 /* Calculate new record length or append field to record. Return new
981 * record length.
982 */
983 static int
join_append_data(WriterObj * self,char * field,int quote_empty,int * quoted,int copy_phase)984 join_append_data(WriterObj *self, char *field, int quote_empty,
985 int *quoted, int copy_phase)
986 {
987 DialectObj *dialect = self->dialect;
988 int i, rec_len;
989 char *lineterm;
990
991 #define INCLEN \
992 do {\
993 if (!copy_phase && rec_len == INT_MAX) { \
994 goto overflow; \
995 } \
996 rec_len++; \
997 } while(0)
998
999 #define ADDCH(c) \
1000 do {\
1001 if (copy_phase) \
1002 self->rec[rec_len] = c;\
1003 INCLEN;\
1004 } while(0)
1005
1006 lineterm = PyString_AsString(dialect->lineterminator);
1007 if (lineterm == NULL)
1008 return -1;
1009
1010 rec_len = self->rec_len;
1011
1012 /* If this is not the first field we need a field separator */
1013 if (self->num_fields > 0)
1014 ADDCH(dialect->delimiter);
1015
1016 /* Handle preceding quote */
1017 if (copy_phase && *quoted)
1018 ADDCH(dialect->quotechar);
1019
1020 /* Copy/count field data */
1021 for (i = 0;; i++) {
1022 char c = field[i];
1023 int want_escape = 0;
1024
1025 if (c == '\0')
1026 break;
1027
1028 if (c == dialect->delimiter ||
1029 c == dialect->escapechar ||
1030 c == dialect->quotechar ||
1031 strchr(lineterm, c)) {
1032 if (dialect->quoting == QUOTE_NONE)
1033 want_escape = 1;
1034 else {
1035 if (c == dialect->quotechar) {
1036 if (dialect->doublequote)
1037 ADDCH(dialect->quotechar);
1038 else
1039 want_escape = 1;
1040 }
1041 if (!want_escape)
1042 *quoted = 1;
1043 }
1044 if (want_escape) {
1045 if (!dialect->escapechar) {
1046 PyErr_Format(error_obj,
1047 "need to escape, but no escapechar set");
1048 return -1;
1049 }
1050 ADDCH(dialect->escapechar);
1051 }
1052 }
1053 /* Copy field character into record buffer.
1054 */
1055 ADDCH(c);
1056 }
1057
1058 /* If field is empty check if it needs to be quoted.
1059 */
1060 if (i == 0 && quote_empty) {
1061 if (dialect->quoting == QUOTE_NONE) {
1062 PyErr_Format(error_obj,
1063 "single empty field record must be quoted");
1064 return -1;
1065 }
1066 else
1067 *quoted = 1;
1068 }
1069
1070 if (*quoted) {
1071 if (copy_phase)
1072 ADDCH(dialect->quotechar);
1073 else {
1074 INCLEN; /* starting quote */
1075 INCLEN; /* ending quote */
1076 }
1077 }
1078 return rec_len;
1079
1080 overflow:
1081 PyErr_NoMemory();
1082 return -1;
1083 #undef ADDCH
1084 #undef INCLEN
1085 }
1086
1087 static int
join_check_rec_size(WriterObj * self,int rec_len)1088 join_check_rec_size(WriterObj *self, int rec_len)
1089 {
1090 unsigned rec_size_new;
1091 char *rec_new;
1092
1093 assert(rec_len >= 0);
1094
1095 if (rec_len > self->rec_size) {
1096 rec_size_new = (unsigned)(rec_len / MEM_INCR + 1) * MEM_INCR;
1097 if (rec_size_new > INT_MAX) {
1098 PyErr_NoMemory();
1099 return 0;
1100 }
1101 rec_new = (char *)PyMem_Realloc(self->rec, rec_size_new);
1102 if (rec_new == NULL) {
1103 PyErr_NoMemory();
1104 return 0;
1105 }
1106 self->rec = rec_new;
1107 self->rec_size = (int)rec_size_new;
1108 }
1109 return 1;
1110 }
1111
1112 static int
join_append(WriterObj * self,char * field,int * quoted,int quote_empty)1113 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1114 {
1115 int rec_len;
1116
1117 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1118 if (rec_len < 0)
1119 return 0;
1120
1121 /* grow record buffer if necessary */
1122 if (!join_check_rec_size(self, rec_len))
1123 return 0;
1124
1125 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1126 self->num_fields++;
1127
1128 return 1;
1129 }
1130
1131 static int
join_append_lineterminator(WriterObj * self)1132 join_append_lineterminator(WriterObj *self)
1133 {
1134 int terminator_len;
1135 char *terminator;
1136
1137 terminator_len = PyString_Size(self->dialect->lineterminator);
1138 if (terminator_len == -1)
1139 return 0;
1140
1141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143 return 0;
1144
1145 terminator = PyString_AsString(self->dialect->lineterminator);
1146 if (terminator == NULL)
1147 return 0;
1148 memmove(self->rec + self->rec_len, terminator, terminator_len);
1149 self->rec_len += terminator_len;
1150
1151 return 1;
1152 }
1153
1154 PyDoc_STRVAR(csv_writerow_doc,
1155 "writerow(sequence)\n"
1156 "\n"
1157 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1158 "elements will be converted to string.");
1159
1160 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1161 csv_writerow(WriterObj *self, PyObject *seq)
1162 {
1163 DialectObj *dialect = self->dialect;
1164 int len, i;
1165
1166 if (!PySequence_Check(seq))
1167 return PyErr_Format(error_obj, "sequence expected");
1168
1169 len = PySequence_Length(seq);
1170 if (len < 0)
1171 return NULL;
1172
1173 /* Join all fields in internal buffer.
1174 */
1175 join_reset(self);
1176 for (i = 0; i < len; i++) {
1177 PyObject *field;
1178 int append_ok;
1179 int quoted;
1180
1181 field = PySequence_GetItem(seq, i);
1182 if (field == NULL)
1183 return NULL;
1184
1185 switch (dialect->quoting) {
1186 case QUOTE_NONNUMERIC:
1187 quoted = !PyNumber_Check(field);
1188 break;
1189 case QUOTE_ALL:
1190 quoted = 1;
1191 break;
1192 default:
1193 quoted = 0;
1194 break;
1195 }
1196
1197 if (PyString_Check(field)) {
1198 append_ok = join_append(self,
1199 PyString_AS_STRING(field),
1200 "ed, len == 1);
1201 Py_DECREF(field);
1202 }
1203 else if (field == Py_None) {
1204 append_ok = join_append(self, "", "ed, len == 1);
1205 Py_DECREF(field);
1206 }
1207 else {
1208 PyObject *str;
1209
1210 if (PyFloat_Check(field)) {
1211 str = PyObject_Repr(field);
1212 } else {
1213 str = PyObject_Str(field);
1214 }
1215 Py_DECREF(field);
1216 if (str == NULL)
1217 return NULL;
1218
1219 append_ok = join_append(self, PyString_AS_STRING(str),
1220 "ed, len == 1);
1221 Py_DECREF(str);
1222 }
1223 if (!append_ok)
1224 return NULL;
1225 }
1226
1227 /* Add line terminator.
1228 */
1229 if (!join_append_lineterminator(self))
1230 return 0;
1231
1232 return PyObject_CallFunction(self->writeline,
1233 "(s#)", self->rec, self->rec_len);
1234 }
1235
1236 PyDoc_STRVAR(csv_writerows_doc,
1237 "writerows(sequence of sequences)\n"
1238 "\n"
1239 "Construct and write a series of sequences to a csv file. Non-string\n"
1240 "elements will be converted to string.");
1241
1242 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1243 csv_writerows(WriterObj *self, PyObject *seqseq)
1244 {
1245 PyObject *row_iter, *row_obj, *result;
1246
1247 row_iter = PyObject_GetIter(seqseq);
1248 if (row_iter == NULL) {
1249 PyErr_SetString(PyExc_TypeError,
1250 "writerows() argument must be iterable");
1251 return NULL;
1252 }
1253 while ((row_obj = PyIter_Next(row_iter))) {
1254 result = csv_writerow(self, row_obj);
1255 Py_DECREF(row_obj);
1256 if (!result) {
1257 Py_DECREF(row_iter);
1258 return NULL;
1259 }
1260 else
1261 Py_DECREF(result);
1262 }
1263 Py_DECREF(row_iter);
1264 if (PyErr_Occurred())
1265 return NULL;
1266 Py_INCREF(Py_None);
1267 return Py_None;
1268 }
1269
1270 static struct PyMethodDef Writer_methods[] = {
1271 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1272 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1273 { NULL, NULL }
1274 };
1275
1276 #define W_OFF(x) offsetof(WriterObj, x)
1277
1278 static struct PyMemberDef Writer_memberlist[] = {
1279 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1280 { NULL }
1281 };
1282
1283 static void
Writer_dealloc(WriterObj * self)1284 Writer_dealloc(WriterObj *self)
1285 {
1286 PyObject_GC_UnTrack(self);
1287 Py_XDECREF(self->dialect);
1288 Py_XDECREF(self->writeline);
1289 if (self->rec != NULL)
1290 PyMem_Free(self->rec);
1291 PyObject_GC_Del(self);
1292 }
1293
1294 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1295 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1296 {
1297 Py_VISIT(self->dialect);
1298 Py_VISIT(self->writeline);
1299 return 0;
1300 }
1301
1302 static int
Writer_clear(WriterObj * self)1303 Writer_clear(WriterObj *self)
1304 {
1305 Py_CLEAR(self->dialect);
1306 Py_CLEAR(self->writeline);
1307 return 0;
1308 }
1309
1310 PyDoc_STRVAR(Writer_Type_doc,
1311 "CSV writer\n"
1312 "\n"
1313 "Writer objects are responsible for generating tabular data\n"
1314 "in CSV format from sequence input.\n"
1315 );
1316
1317 static PyTypeObject Writer_Type = {
1318 PyVarObject_HEAD_INIT(NULL, 0)
1319 "_csv.writer", /*tp_name*/
1320 sizeof(WriterObj), /*tp_basicsize*/
1321 0, /*tp_itemsize*/
1322 /* methods */
1323 (destructor)Writer_dealloc, /*tp_dealloc*/
1324 (printfunc)0, /*tp_print*/
1325 (getattrfunc)0, /*tp_getattr*/
1326 (setattrfunc)0, /*tp_setattr*/
1327 (cmpfunc)0, /*tp_compare*/
1328 (reprfunc)0, /*tp_repr*/
1329 0, /*tp_as_number*/
1330 0, /*tp_as_sequence*/
1331 0, /*tp_as_mapping*/
1332 (hashfunc)0, /*tp_hash*/
1333 (ternaryfunc)0, /*tp_call*/
1334 (reprfunc)0, /*tp_str*/
1335 0, /*tp_getattro*/
1336 0, /*tp_setattro*/
1337 0, /*tp_as_buffer*/
1338 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1339 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1340 Writer_Type_doc,
1341 (traverseproc)Writer_traverse, /*tp_traverse*/
1342 (inquiry)Writer_clear, /*tp_clear*/
1343 0, /*tp_richcompare*/
1344 0, /*tp_weaklistoffset*/
1345 (getiterfunc)0, /*tp_iter*/
1346 (getiterfunc)0, /*tp_iternext*/
1347 Writer_methods, /*tp_methods*/
1348 Writer_memberlist, /*tp_members*/
1349 0, /*tp_getset*/
1350 };
1351
1352 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1353 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1354 {
1355 PyObject * output_file, * dialect = NULL;
1356 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1357
1358 if (!self)
1359 return NULL;
1360
1361 self->dialect = NULL;
1362 self->writeline = NULL;
1363
1364 self->rec = NULL;
1365 self->rec_size = 0;
1366 self->rec_len = 0;
1367 self->num_fields = 0;
1368
1369 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1370 Py_DECREF(self);
1371 return NULL;
1372 }
1373 self->writeline = PyObject_GetAttrString(output_file, "write");
1374 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1375 PyErr_SetString(PyExc_TypeError,
1376 "argument 1 must have a \"write\" method");
1377 Py_DECREF(self);
1378 return NULL;
1379 }
1380 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1381 if (self->dialect == NULL) {
1382 Py_DECREF(self);
1383 return NULL;
1384 }
1385 PyObject_GC_Track(self);
1386 return (PyObject *)self;
1387 }
1388
1389 /*
1390 * DIALECT REGISTRY
1391 */
1392 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1393 csv_list_dialects(PyObject *module, PyObject *args)
1394 {
1395 return PyDict_Keys(dialects);
1396 }
1397
1398 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1399 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1400 {
1401 PyObject *name_obj, *dialect_obj = NULL;
1402 PyObject *dialect;
1403
1404 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1405 return NULL;
1406 if (!IS_BASESTRING(name_obj)) {
1407 PyErr_SetString(PyExc_TypeError,
1408 "dialect name must be a string or unicode");
1409 return NULL;
1410 }
1411 dialect = _call_dialect(dialect_obj, kwargs);
1412 if (dialect == NULL)
1413 return NULL;
1414 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1415 Py_DECREF(dialect);
1416 return NULL;
1417 }
1418 Py_DECREF(dialect);
1419 Py_INCREF(Py_None);
1420 return Py_None;
1421 }
1422
1423 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1424 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1425 {
1426 if (PyDict_DelItem(dialects, name_obj) < 0)
1427 return PyErr_Format(error_obj, "unknown dialect");
1428 Py_INCREF(Py_None);
1429 return Py_None;
1430 }
1431
1432 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1433 csv_get_dialect(PyObject *module, PyObject *name_obj)
1434 {
1435 return get_dialect_from_registry(name_obj);
1436 }
1437
1438 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1439 csv_field_size_limit(PyObject *module, PyObject *args)
1440 {
1441 PyObject *new_limit = NULL;
1442 long old_limit = field_limit, limit;
1443
1444 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1445 return NULL;
1446 if (new_limit != NULL) {
1447 if (!_PyAnyInt_Check(new_limit)) {
1448 PyErr_Format(PyExc_TypeError,
1449 "limit must be an integer");
1450 return NULL;
1451 }
1452 limit = PyInt_AsLong(new_limit);
1453 if (limit == -1 && PyErr_Occurred())
1454 return NULL;
1455 field_limit = limit;
1456 }
1457 return PyInt_FromLong(old_limit);
1458 }
1459
1460 /*
1461 * MODULE
1462 */
1463
1464 PyDoc_STRVAR(csv_module_doc,
1465 "CSV parsing and writing.\n"
1466 "\n"
1467 "This module provides classes that assist in the reading and writing\n"
1468 "of Comma Separated Value (CSV) files, and implements the interface\n"
1469 "described by PEP 305. Although many CSV files are simple to parse,\n"
1470 "the format is not formally defined by a stable specification and\n"
1471 "is subtle enough that parsing lines of a CSV file with something\n"
1472 "like line.split(\",\") is bound to fail. The module supports three\n"
1473 "basic APIs: reading, writing, and registration of dialects.\n"
1474 "\n"
1475 "\n"
1476 "DIALECT REGISTRATION:\n"
1477 "\n"
1478 "Readers and writers support a dialect argument, which is a convenient\n"
1479 "handle on a group of settings. When the dialect argument is a string,\n"
1480 "it identifies one of the dialects previously registered with the module.\n"
1481 "If it is a class or instance, the attributes of the argument are used as\n"
1482 "the settings for the reader or writer:\n"
1483 "\n"
1484 " class excel:\n"
1485 " delimiter = ','\n"
1486 " quotechar = '\"'\n"
1487 " escapechar = None\n"
1488 " doublequote = True\n"
1489 " skipinitialspace = False\n"
1490 " lineterminator = '\\r\\n'\n"
1491 " quoting = QUOTE_MINIMAL\n"
1492 "\n"
1493 "SETTINGS:\n"
1494 "\n"
1495 " * quotechar - specifies a one-character string to use as the \n"
1496 " quoting character. It defaults to '\"'.\n"
1497 " * delimiter - specifies a one-character string to use as the \n"
1498 " field separator. It defaults to ','.\n"
1499 " * skipinitialspace - specifies how to interpret whitespace which\n"
1500 " immediately follows a delimiter. It defaults to False, which\n"
1501 " means that whitespace immediately following a delimiter is part\n"
1502 " of the following field.\n"
1503 " * lineterminator - specifies the character sequence which should \n"
1504 " terminate rows.\n"
1505 " * quoting - controls when quotes should be generated by the writer.\n"
1506 " It can take on any of the following module constants:\n"
1507 "\n"
1508 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1509 " field contains either the quotechar or the delimiter\n"
1510 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1511 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1512 " fields which do not parse as integers or floating point\n"
1513 " numbers.\n"
1514 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1515 " * escapechar - specifies a one-character string used to escape \n"
1516 " the delimiter when quoting is set to QUOTE_NONE.\n"
1517 " * doublequote - controls the handling of quotes inside fields. When\n"
1518 " True, two consecutive quotes are interpreted as one during read,\n"
1519 " and when writing, each quote character embedded in the data is\n"
1520 " written as two quotes\n");
1521
1522 PyDoc_STRVAR(csv_reader_doc,
1523 " csv_reader = reader(iterable [, dialect='excel']\n"
1524 " [optional keyword args])\n"
1525 " for row in csv_reader:\n"
1526 " process(row)\n"
1527 "\n"
1528 "The \"iterable\" argument can be any object that returns a line\n"
1529 "of input for each iteration, such as a file object or a list. The\n"
1530 "optional \"dialect\" parameter is discussed below. The function\n"
1531 "also accepts optional keyword arguments which override settings\n"
1532 "provided by the dialect.\n"
1533 "\n"
1534 "The returned object is an iterator. Each iteration returns a row\n"
1535 "of the CSV file (which can span multiple input lines).\n");
1536
1537 PyDoc_STRVAR(csv_writer_doc,
1538 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1539 " [optional keyword args])\n"
1540 " for row in sequence:\n"
1541 " csv_writer.writerow(row)\n"
1542 "\n"
1543 " [or]\n"
1544 "\n"
1545 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1546 " [optional keyword args])\n"
1547 " csv_writer.writerows(rows)\n"
1548 "\n"
1549 "The \"fileobj\" argument can be any object that supports the file API.\n");
1550
1551 PyDoc_STRVAR(csv_list_dialects_doc,
1552 "Return a list of all know dialect names.\n"
1553 " names = csv.list_dialects()");
1554
1555 PyDoc_STRVAR(csv_get_dialect_doc,
1556 "Return the dialect instance associated with name.\n"
1557 " dialect = csv.get_dialect(name)");
1558
1559 PyDoc_STRVAR(csv_register_dialect_doc,
1560 "Create a mapping from a string name to a dialect class.\n"
1561 " dialect = csv.register_dialect(name, dialect)");
1562
1563 PyDoc_STRVAR(csv_unregister_dialect_doc,
1564 "Delete the name/dialect mapping associated with a string name.\n"
1565 " csv.unregister_dialect(name)");
1566
1567 PyDoc_STRVAR(csv_field_size_limit_doc,
1568 "Sets an upper limit on parsed fields.\n"
1569 " csv.field_size_limit([limit])\n"
1570 "\n"
1571 "Returns old limit. If limit is not given, no new limit is set and\n"
1572 "the old limit is returned");
1573
1574 static struct PyMethodDef csv_methods[] = {
1575 { "reader", (PyCFunction)csv_reader,
1576 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1577 { "writer", (PyCFunction)csv_writer,
1578 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1579 { "list_dialects", (PyCFunction)csv_list_dialects,
1580 METH_NOARGS, csv_list_dialects_doc},
1581 { "register_dialect", (PyCFunction)csv_register_dialect,
1582 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1583 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1584 METH_O, csv_unregister_dialect_doc},
1585 { "get_dialect", (PyCFunction)csv_get_dialect,
1586 METH_O, csv_get_dialect_doc},
1587 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1588 METH_VARARGS, csv_field_size_limit_doc},
1589 { NULL, NULL }
1590 };
1591
1592 PyMODINIT_FUNC
init_csv(void)1593 init_csv(void)
1594 {
1595 PyObject *module;
1596 StyleDesc *style;
1597
1598 if (PyType_Ready(&Dialect_Type) < 0)
1599 return;
1600
1601 if (PyType_Ready(&Reader_Type) < 0)
1602 return;
1603
1604 if (PyType_Ready(&Writer_Type) < 0)
1605 return;
1606
1607 /* Create the module and add the functions */
1608 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1609 if (module == NULL)
1610 return;
1611
1612 /* Add version to the module. */
1613 if (PyModule_AddStringConstant(module, "__version__",
1614 MODULE_VERSION) == -1)
1615 return;
1616
1617 /* Add _dialects dictionary */
1618 dialects = PyDict_New();
1619 if (dialects == NULL)
1620 return;
1621 if (PyModule_AddObject(module, "_dialects", dialects))
1622 return;
1623
1624 /* Add quote styles into dictionary */
1625 for (style = quote_styles; style->name; style++) {
1626 if (PyModule_AddIntConstant(module, style->name,
1627 style->style) == -1)
1628 return;
1629 }
1630
1631 /* Add the Dialect type */
1632 Py_INCREF(&Dialect_Type);
1633 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1634 return;
1635
1636 /* Add the CSV exception object to the module. */
1637 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1638 if (error_obj == NULL)
1639 return;
1640 PyModule_AddObject(module, "Error", error_obj);
1641 }
1642