1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 **** For people modifying this code, please note that as of this writing
10 **** (2003-03-23), it is intended that this code should work with Python
11 **** 2.2.
12
13 */
14
15 #define MODULE_VERSION "1.0"
16
17 #include "Python.h"
18 #include "structmember.h"
19
20
21 /* begin 2.2 compatibility macros */
22 #ifndef PyDoc_STRVAR
23 /* Define macros for inline documentation. */
24 #define PyDoc_VAR(name) static char name[]
25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26 #ifdef WITH_DOC_STRINGS
27 #define PyDoc_STR(str) str
28 #else
29 #define PyDoc_STR(str) ""
30 #endif
31 #endif /* ifndef PyDoc_STRVAR */
32
33 #ifndef PyMODINIT_FUNC
34 # if defined(__cplusplus)
35 # define PyMODINIT_FUNC extern "C" void
36 # else /* __cplusplus */
37 # define PyMODINIT_FUNC void
38 # endif /* __cplusplus */
39 #endif
40
41 #ifndef Py_CLEAR
42 #define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50 #endif
51 #ifndef Py_VISIT
52 #define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60 #endif
61
62 /* end 2.2 compatibility macros */
63
64 #define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
66
67 static PyObject *error_obj; /* CSV exception */
68 static PyObject *dialects; /* Dialect registry */
69 static long field_limit = 128 * 1024; /* max parsed field size */
70
71 typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
75 } ParserState;
76
77 typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79 } QuoteStyle;
80
81 typedef struct {
82 QuoteStyle style;
83 char *name;
84 } StyleDesc;
85
86 static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92 };
93
94 typedef struct {
95 PyObject_HEAD
96
97 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
106 } DialectObj;
107
108 staticforward PyTypeObject Dialect_Type;
109
110 typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
124 } ReaderObj;
125
126 staticforward PyTypeObject Reader_Type;
127
128 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
129
130 typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141 } WriterObj;
142
143 staticforward PyTypeObject Writer_Type;
144
145 /*
146 * DIALECT class
147 */
148
149 static PyObject *
get_dialect_from_registry(PyObject * name_obj)150 get_dialect_from_registry(PyObject * name_obj)
151 {
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
162 }
163
164 static PyObject *
get_string(PyObject * str)165 get_string(PyObject *str)
166 {
167 Py_XINCREF(str);
168 return str;
169 }
170
171 static PyObject *
get_nullchar_as_None(char c)172 get_nullchar_as_None(char c)
173 {
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
180 }
181
182 static PyObject *
Dialect_get_lineterminator(DialectObj * self)183 Dialect_get_lineterminator(DialectObj *self)
184 {
185 return get_string(self->lineterminator);
186 }
187
188 static PyObject *
Dialect_get_escapechar(DialectObj * self)189 Dialect_get_escapechar(DialectObj *self)
190 {
191 return get_nullchar_as_None(self->escapechar);
192 }
193
194 static PyObject *
Dialect_get_quotechar(DialectObj * self)195 Dialect_get_quotechar(DialectObj *self)
196 {
197 return get_nullchar_as_None(self->quotechar);
198 }
199
200 static PyObject *
Dialect_get_quoting(DialectObj * self)201 Dialect_get_quoting(DialectObj *self)
202 {
203 return PyInt_FromLong(self->quoting);
204 }
205
206 static int
_set_bool(const char * name,int * target,PyObject * src,int dflt)207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
208 {
209 if (src == NULL)
210 *target = dflt;
211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
217 return 0;
218 }
219
220 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)221 _set_int(const char *name, int *target, PyObject *src, int dflt)
222 {
223 if (src == NULL)
224 *target = dflt;
225 else {
226 if (!PyInt_Check(src)) {
227 PyErr_Format(PyExc_TypeError,
228 "\"%s\" must be an integer", name);
229 return -1;
230 }
231 *target = PyInt_AsLong(src);
232 }
233 return 0;
234 }
235
236 static int
_set_char(const char * name,char * target,PyObject * src,char dflt)237 _set_char(const char *name, char *target, PyObject *src, char dflt)
238 {
239 if (src == NULL)
240 *target = dflt;
241 else {
242 *target = '\0';
243 if (src != Py_None) {
244 Py_ssize_t len;
245 if (!PyString_Check(src)) {
246 PyErr_Format(PyExc_TypeError,
247 "\"%s\" must be string, not %.200s", name,
248 src->ob_type->tp_name);
249 return -1;
250 }
251 len = PyString_GET_SIZE(src);
252 if (len > 1) {
253 PyErr_Format(PyExc_TypeError,
254 "\"%s\" must be an 1-character string",
255 name);
256 return -1;
257 }
258 if (len > 0)
259 *target = *PyString_AS_STRING(src);
260 }
261 }
262 return 0;
263 }
264
265 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)266 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
267 {
268 if (src == NULL)
269 *target = PyString_FromString(dflt);
270 else {
271 if (src == Py_None)
272 *target = NULL;
273 else if (!IS_BASESTRING(src)) {
274 PyErr_Format(PyExc_TypeError,
275 "\"%s\" must be a string", name);
276 return -1;
277 }
278 else {
279 Py_INCREF(src);
280 Py_XSETREF(*target, src);
281 }
282 }
283 return 0;
284 }
285
286 static int
dialect_check_quoting(int quoting)287 dialect_check_quoting(int quoting)
288 {
289 StyleDesc *qs = quote_styles;
290
291 for (qs = quote_styles; qs->name; qs++) {
292 if (qs->style == quoting)
293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
297 }
298
299 #define D_OFF(x) offsetof(DialectObj, x)
300
301 static struct PyMemberDef Dialect_memberlist[] = {
302 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
303 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
304 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
305 { "strict", T_INT, D_OFF(strict), READONLY },
306 { NULL }
307 };
308
309 static PyGetSetDef Dialect_getsetlist[] = {
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
315 };
316
317 static void
Dialect_dealloc(DialectObj * self)318 Dialect_dealloc(DialectObj *self)
319 {
320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
322 }
323
324 static char *dialect_kws[] = {
325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
335 };
336
337 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
351
352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 "echar,
360 "ing,
361 &skipinitialspace,
362 &strict))
363 return NULL;
364
365 if (dialect != NULL) {
366 if (IS_BASESTRING(dialect)) {
367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == 0 &&
376 doublequote == 0 &&
377 escapechar == 0 &&
378 lineterminator == 0 &&
379 quotechar == 0 &&
380 quoting == 0 &&
381 skipinitialspace == 0 &&
382 strict == 0)
383 return dialect;
384 }
385
386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
392
393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
402 #define DIALECT_GETATTR(v, n) \
403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
415
416 /* check types and convert to C values */
417 #define DIASET(meth, name, target, src, dflt) \
418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427 DIASET(_set_bool, "strict", &self->strict, strict, 0);
428
429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "\"delimiter\" must be an 1-character string");
435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
448
449 ret = (PyObject *)self;
450 Py_INCREF(self);
451 err:
452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
463 }
464
465
466 PyDoc_STRVAR(Dialect_Type_doc,
467 "CSV dialect\n"
468 "\n"
469 "The Dialect type records CSV parsing and generation options.\n");
470
471 static PyTypeObject Dialect_Type = {
472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 (printfunc)0, /* tp_print */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 (cmpfunc)0, /* tp_compare */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
512 };
513
514 /*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520 {
521 PyObject *ctor_args;
522 PyObject *dialect;
523
524 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
525 if (ctor_args == NULL)
526 return NULL;
527 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
528 Py_DECREF(ctor_args);
529 return dialect;
530 }
531
532 /*
533 * READER
534 */
535 static int
parse_save_field(ReaderObj * self)536 parse_save_field(ReaderObj *self)
537 {
538 PyObject *field;
539
540 field = PyString_FromStringAndSize(self->field, self->field_len);
541 if (field == NULL)
542 return -1;
543 self->field_len = 0;
544 if (self->numeric_field) {
545 PyObject *tmp;
546
547 self->numeric_field = 0;
548 tmp = PyNumber_Float(field);
549 if (tmp == NULL) {
550 Py_DECREF(field);
551 return -1;
552 }
553 Py_DECREF(field);
554 field = tmp;
555 }
556 PyList_Append(self->fields, field);
557 Py_DECREF(field);
558 return 0;
559 }
560
561 static int
parse_grow_buff(ReaderObj * self)562 parse_grow_buff(ReaderObj *self)
563 {
564 if (self->field_size == 0) {
565 self->field_size = 4096;
566 if (self->field != NULL)
567 PyMem_Free(self->field);
568 self->field = PyMem_Malloc(self->field_size);
569 }
570 else {
571 if (self->field_size > INT_MAX / 2) {
572 PyErr_NoMemory();
573 return 0;
574 }
575 self->field_size *= 2;
576 self->field = PyMem_Realloc(self->field, self->field_size);
577 }
578 if (self->field == NULL) {
579 PyErr_NoMemory();
580 return 0;
581 }
582 return 1;
583 }
584
585 static int
parse_add_char(ReaderObj * self,char c)586 parse_add_char(ReaderObj *self, char c)
587 {
588 if (self->field_len >= field_limit) {
589 PyErr_Format(error_obj, "field larger than field limit (%ld)",
590 field_limit);
591 return -1;
592 }
593 if (self->field_len == self->field_size && !parse_grow_buff(self))
594 return -1;
595 self->field[self->field_len++] = c;
596 return 0;
597 }
598
599 static int
parse_process_char(ReaderObj * self,char c)600 parse_process_char(ReaderObj *self, char c)
601 {
602 DialectObj *dialect = self->dialect;
603
604 switch (self->state) {
605 case START_RECORD:
606 /* start of record */
607 if (c == '\0')
608 /* empty line - return [] */
609 break;
610 else if (c == '\n' || c == '\r') {
611 self->state = EAT_CRNL;
612 break;
613 }
614 /* normal character - handle as START_FIELD */
615 self->state = START_FIELD;
616 /* fallthru */
617 case START_FIELD:
618 /* expecting field */
619 if (c == '\n' || c == '\r' || c == '\0') {
620 /* save empty field - return [fields] */
621 if (parse_save_field(self) < 0)
622 return -1;
623 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
624 }
625 else if (c == dialect->quotechar &&
626 dialect->quoting != QUOTE_NONE) {
627 /* start quoted field */
628 self->state = IN_QUOTED_FIELD;
629 }
630 else if (c == dialect->escapechar) {
631 /* possible escaped character */
632 self->state = ESCAPED_CHAR;
633 }
634 else if (c == ' ' && dialect->skipinitialspace)
635 /* ignore space at start of field */
636 ;
637 else if (c == dialect->delimiter) {
638 /* save empty field */
639 if (parse_save_field(self) < 0)
640 return -1;
641 }
642 else {
643 /* begin new unquoted field */
644 if (dialect->quoting == QUOTE_NONNUMERIC)
645 self->numeric_field = 1;
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 self->state = IN_FIELD;
649 }
650 break;
651
652 case ESCAPED_CHAR:
653 if (c == '\0')
654 c = '\n';
655 if (parse_add_char(self, c) < 0)
656 return -1;
657 self->state = IN_FIELD;
658 break;
659
660 case IN_FIELD:
661 /* in unquoted field */
662 if (c == '\n' || c == '\r' || c == '\0') {
663 /* end of line - return [fields] */
664 if (parse_save_field(self) < 0)
665 return -1;
666 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
667 }
668 else if (c == dialect->escapechar) {
669 /* possible escaped character */
670 self->state = ESCAPED_CHAR;
671 }
672 else if (c == dialect->delimiter) {
673 /* save field - wait for new field */
674 if (parse_save_field(self) < 0)
675 return -1;
676 self->state = START_FIELD;
677 }
678 else {
679 /* normal character - save in field */
680 if (parse_add_char(self, c) < 0)
681 return -1;
682 }
683 break;
684
685 case IN_QUOTED_FIELD:
686 /* in quoted field */
687 if (c == '\0')
688 ;
689 else if (c == dialect->escapechar) {
690 /* Possible escape character */
691 self->state = ESCAPE_IN_QUOTED_FIELD;
692 }
693 else if (c == dialect->quotechar &&
694 dialect->quoting != QUOTE_NONE) {
695 if (dialect->doublequote) {
696 /* doublequote; " represented by "" */
697 self->state = QUOTE_IN_QUOTED_FIELD;
698 }
699 else {
700 /* end of quote part of field */
701 self->state = IN_FIELD;
702 }
703 }
704 else {
705 /* normal character - save in field */
706 if (parse_add_char(self, c) < 0)
707 return -1;
708 }
709 break;
710
711 case ESCAPE_IN_QUOTED_FIELD:
712 if (c == '\0')
713 c = '\n';
714 if (parse_add_char(self, c) < 0)
715 return -1;
716 self->state = IN_QUOTED_FIELD;
717 break;
718
719 case QUOTE_IN_QUOTED_FIELD:
720 /* doublequote - seen a quote in a quoted field */
721 if (dialect->quoting != QUOTE_NONE &&
722 c == dialect->quotechar) {
723 /* save "" as " */
724 if (parse_add_char(self, c) < 0)
725 return -1;
726 self->state = IN_QUOTED_FIELD;
727 }
728 else if (c == dialect->delimiter) {
729 /* save field - wait for new field */
730 if (parse_save_field(self) < 0)
731 return -1;
732 self->state = START_FIELD;
733 }
734 else if (c == '\n' || c == '\r' || c == '\0') {
735 /* end of line - return [fields] */
736 if (parse_save_field(self) < 0)
737 return -1;
738 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
739 }
740 else if (!dialect->strict) {
741 if (parse_add_char(self, c) < 0)
742 return -1;
743 self->state = IN_FIELD;
744 }
745 else {
746 /* illegal */
747 PyErr_Format(error_obj, "'%c' expected after '%c'",
748 dialect->delimiter,
749 dialect->quotechar);
750 return -1;
751 }
752 break;
753
754 case EAT_CRNL:
755 if (c == '\n' || c == '\r')
756 ;
757 else if (c == '\0')
758 self->state = START_RECORD;
759 else {
760 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
761 return -1;
762 }
763 break;
764
765 }
766 return 0;
767 }
768
769 static int
parse_reset(ReaderObj * self)770 parse_reset(ReaderObj *self)
771 {
772 Py_XSETREF(self->fields, PyList_New(0));
773 if (self->fields == NULL)
774 return -1;
775 self->field_len = 0;
776 self->state = START_RECORD;
777 self->numeric_field = 0;
778 return 0;
779 }
780
781 static PyObject *
Reader_iternext(ReaderObj * self)782 Reader_iternext(ReaderObj *self)
783 {
784 PyObject *lineobj;
785 PyObject *fields = NULL;
786 char *line, c;
787 int linelen;
788
789 if (parse_reset(self) < 0)
790 return NULL;
791 do {
792 lineobj = PyIter_Next(self->input_iter);
793 if (lineobj == NULL) {
794 /* End of input OR exception */
795 if (!PyErr_Occurred() && (self->field_len != 0 ||
796 self->state == IN_QUOTED_FIELD)) {
797 if (self->dialect->strict)
798 PyErr_SetString(error_obj, "unexpected end of data");
799 else if (parse_save_field(self) >= 0 )
800 break;
801 }
802 return NULL;
803 }
804 ++self->line_num;
805
806 line = PyString_AsString(lineobj);
807 linelen = PyString_Size(lineobj);
808
809 if (line == NULL || linelen < 0) {
810 Py_DECREF(lineobj);
811 return NULL;
812 }
813 while (linelen--) {
814 c = *line++;
815 if (c == '\0') {
816 Py_DECREF(lineobj);
817 PyErr_Format(error_obj,
818 "line contains NULL byte");
819 goto err;
820 }
821 if (parse_process_char(self, c) < 0) {
822 Py_DECREF(lineobj);
823 goto err;
824 }
825 }
826 Py_DECREF(lineobj);
827 if (parse_process_char(self, 0) < 0)
828 goto err;
829 } while (self->state != START_RECORD);
830
831 fields = self->fields;
832 self->fields = NULL;
833 err:
834 return fields;
835 }
836
837 static void
Reader_dealloc(ReaderObj * self)838 Reader_dealloc(ReaderObj *self)
839 {
840 PyObject_GC_UnTrack(self);
841 Py_XDECREF(self->dialect);
842 Py_XDECREF(self->input_iter);
843 Py_XDECREF(self->fields);
844 if (self->field != NULL)
845 PyMem_Free(self->field);
846 PyObject_GC_Del(self);
847 }
848
849 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)850 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
851 {
852 Py_VISIT(self->dialect);
853 Py_VISIT(self->input_iter);
854 Py_VISIT(self->fields);
855 return 0;
856 }
857
858 static int
Reader_clear(ReaderObj * self)859 Reader_clear(ReaderObj *self)
860 {
861 Py_CLEAR(self->dialect);
862 Py_CLEAR(self->input_iter);
863 Py_CLEAR(self->fields);
864 return 0;
865 }
866
867 PyDoc_STRVAR(Reader_Type_doc,
868 "CSV reader\n"
869 "\n"
870 "Reader objects are responsible for reading and parsing tabular data\n"
871 "in CSV format.\n"
872 );
873
874 static struct PyMethodDef Reader_methods[] = {
875 { NULL, NULL }
876 };
877 #define R_OFF(x) offsetof(ReaderObj, x)
878
879 static struct PyMemberDef Reader_memberlist[] = {
880 { "dialect", T_OBJECT, R_OFF(dialect), RO },
881 { "line_num", T_ULONG, R_OFF(line_num), RO },
882 { NULL }
883 };
884
885
886 static PyTypeObject Reader_Type = {
887 PyVarObject_HEAD_INIT(NULL, 0)
888 "_csv.reader", /*tp_name*/
889 sizeof(ReaderObj), /*tp_basicsize*/
890 0, /*tp_itemsize*/
891 /* methods */
892 (destructor)Reader_dealloc, /*tp_dealloc*/
893 (printfunc)0, /*tp_print*/
894 (getattrfunc)0, /*tp_getattr*/
895 (setattrfunc)0, /*tp_setattr*/
896 (cmpfunc)0, /*tp_compare*/
897 (reprfunc)0, /*tp_repr*/
898 0, /*tp_as_number*/
899 0, /*tp_as_sequence*/
900 0, /*tp_as_mapping*/
901 (hashfunc)0, /*tp_hash*/
902 (ternaryfunc)0, /*tp_call*/
903 (reprfunc)0, /*tp_str*/
904 0, /*tp_getattro*/
905 0, /*tp_setattro*/
906 0, /*tp_as_buffer*/
907 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
908 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
909 Reader_Type_doc, /*tp_doc*/
910 (traverseproc)Reader_traverse, /*tp_traverse*/
911 (inquiry)Reader_clear, /*tp_clear*/
912 0, /*tp_richcompare*/
913 0, /*tp_weaklistoffset*/
914 PyObject_SelfIter, /*tp_iter*/
915 (getiterfunc)Reader_iternext, /*tp_iternext*/
916 Reader_methods, /*tp_methods*/
917 Reader_memberlist, /*tp_members*/
918 0, /*tp_getset*/
919
920 };
921
922 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)923 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
924 {
925 PyObject * iterator, * dialect = NULL;
926 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
927
928 if (!self)
929 return NULL;
930
931 self->dialect = NULL;
932 self->fields = NULL;
933 self->input_iter = NULL;
934 self->field = NULL;
935 self->field_size = 0;
936 self->line_num = 0;
937
938 if (parse_reset(self) < 0) {
939 Py_DECREF(self);
940 return NULL;
941 }
942
943 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
944 Py_DECREF(self);
945 return NULL;
946 }
947 self->input_iter = PyObject_GetIter(iterator);
948 if (self->input_iter == NULL) {
949 PyErr_SetString(PyExc_TypeError,
950 "argument 1 must be an iterator");
951 Py_DECREF(self);
952 return NULL;
953 }
954 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
955 if (self->dialect == NULL) {
956 Py_DECREF(self);
957 return NULL;
958 }
959
960 PyObject_GC_Track(self);
961 return (PyObject *)self;
962 }
963
964 /*
965 * WRITER
966 */
967 /* ---------------------------------------------------------------- */
968 static void
join_reset(WriterObj * self)969 join_reset(WriterObj *self)
970 {
971 self->rec_len = 0;
972 self->num_fields = 0;
973 }
974
975 #define MEM_INCR 32768
976
977 /* Calculate new record length or append field to record. Return new
978 * record length.
979 */
980 static int
join_append_data(WriterObj * self,char * field,int quote_empty,int * quoted,int copy_phase)981 join_append_data(WriterObj *self, char *field, int quote_empty,
982 int *quoted, int copy_phase)
983 {
984 DialectObj *dialect = self->dialect;
985 int i, rec_len;
986 char *lineterm;
987
988 #define INCLEN \
989 do {\
990 if (!copy_phase && rec_len == INT_MAX) { \
991 goto overflow; \
992 } \
993 rec_len++; \
994 } while(0)
995
996 #define ADDCH(c) \
997 do {\
998 if (copy_phase) \
999 self->rec[rec_len] = c;\
1000 INCLEN;\
1001 } while(0)
1002
1003 lineterm = PyString_AsString(dialect->lineterminator);
1004 if (lineterm == NULL)
1005 return -1;
1006
1007 rec_len = self->rec_len;
1008
1009 /* If this is not the first field we need a field separator */
1010 if (self->num_fields > 0)
1011 ADDCH(dialect->delimiter);
1012
1013 /* Handle preceding quote */
1014 if (copy_phase && *quoted)
1015 ADDCH(dialect->quotechar);
1016
1017 /* Copy/count field data */
1018 for (i = 0;; i++) {
1019 char c = field[i];
1020 int want_escape = 0;
1021
1022 if (c == '\0')
1023 break;
1024
1025 if (c == dialect->delimiter ||
1026 c == dialect->escapechar ||
1027 c == dialect->quotechar ||
1028 strchr(lineterm, c)) {
1029 if (dialect->quoting == QUOTE_NONE)
1030 want_escape = 1;
1031 else {
1032 if (c == dialect->quotechar) {
1033 if (dialect->doublequote)
1034 ADDCH(dialect->quotechar);
1035 else
1036 want_escape = 1;
1037 }
1038 if (!want_escape)
1039 *quoted = 1;
1040 }
1041 if (want_escape) {
1042 if (!dialect->escapechar) {
1043 PyErr_Format(error_obj,
1044 "need to escape, but no escapechar set");
1045 return -1;
1046 }
1047 ADDCH(dialect->escapechar);
1048 }
1049 }
1050 /* Copy field character into record buffer.
1051 */
1052 ADDCH(c);
1053 }
1054
1055 /* If field is empty check if it needs to be quoted.
1056 */
1057 if (i == 0 && quote_empty) {
1058 if (dialect->quoting == QUOTE_NONE) {
1059 PyErr_Format(error_obj,
1060 "single empty field record must be quoted");
1061 return -1;
1062 }
1063 else
1064 *quoted = 1;
1065 }
1066
1067 if (*quoted) {
1068 if (copy_phase)
1069 ADDCH(dialect->quotechar);
1070 else {
1071 INCLEN; /* starting quote */
1072 INCLEN; /* ending quote */
1073 }
1074 }
1075 return rec_len;
1076
1077 overflow:
1078 PyErr_NoMemory();
1079 return -1;
1080 #undef ADDCH
1081 #undef INCLEN
1082 }
1083
1084 static int
join_check_rec_size(WriterObj * self,int rec_len)1085 join_check_rec_size(WriterObj *self, int rec_len)
1086 {
1087
1088 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1089 PyErr_NoMemory();
1090 return 0;
1091 }
1092
1093 if (rec_len > self->rec_size) {
1094 if (self->rec_size == 0) {
1095 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1096 if (self->rec != NULL)
1097 PyMem_Free(self->rec);
1098 self->rec = PyMem_Malloc(self->rec_size);
1099 }
1100 else {
1101 char *old_rec = self->rec;
1102
1103 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1104 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1105 if (self->rec == NULL)
1106 PyMem_Free(old_rec);
1107 }
1108 if (self->rec == NULL) {
1109 PyErr_NoMemory();
1110 return 0;
1111 }
1112 }
1113 return 1;
1114 }
1115
1116 static int
join_append(WriterObj * self,char * field,int * quoted,int quote_empty)1117 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1118 {
1119 int rec_len;
1120
1121 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1122 if (rec_len < 0)
1123 return 0;
1124
1125 /* grow record buffer if necessary */
1126 if (!join_check_rec_size(self, rec_len))
1127 return 0;
1128
1129 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1130 self->num_fields++;
1131
1132 return 1;
1133 }
1134
1135 static int
join_append_lineterminator(WriterObj * self)1136 join_append_lineterminator(WriterObj *self)
1137 {
1138 int terminator_len;
1139 char *terminator;
1140
1141 terminator_len = PyString_Size(self->dialect->lineterminator);
1142 if (terminator_len == -1)
1143 return 0;
1144
1145 /* grow record buffer if necessary */
1146 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1147 return 0;
1148
1149 terminator = PyString_AsString(self->dialect->lineterminator);
1150 if (terminator == NULL)
1151 return 0;
1152 memmove(self->rec + self->rec_len, terminator, terminator_len);
1153 self->rec_len += terminator_len;
1154
1155 return 1;
1156 }
1157
1158 PyDoc_STRVAR(csv_writerow_doc,
1159 "writerow(sequence)\n"
1160 "\n"
1161 "Construct and write a CSV record from a sequence of fields. Non-string\n"
1162 "elements will be converted to string.");
1163
1164 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1165 csv_writerow(WriterObj *self, PyObject *seq)
1166 {
1167 DialectObj *dialect = self->dialect;
1168 int len, i;
1169
1170 if (!PySequence_Check(seq))
1171 return PyErr_Format(error_obj, "sequence expected");
1172
1173 len = PySequence_Length(seq);
1174 if (len < 0)
1175 return NULL;
1176
1177 /* Join all fields in internal buffer.
1178 */
1179 join_reset(self);
1180 for (i = 0; i < len; i++) {
1181 PyObject *field;
1182 int append_ok;
1183 int quoted;
1184
1185 field = PySequence_GetItem(seq, i);
1186 if (field == NULL)
1187 return NULL;
1188
1189 switch (dialect->quoting) {
1190 case QUOTE_NONNUMERIC:
1191 quoted = !PyNumber_Check(field);
1192 break;
1193 case QUOTE_ALL:
1194 quoted = 1;
1195 break;
1196 default:
1197 quoted = 0;
1198 break;
1199 }
1200
1201 if (PyString_Check(field)) {
1202 append_ok = join_append(self,
1203 PyString_AS_STRING(field),
1204 "ed, len == 1);
1205 Py_DECREF(field);
1206 }
1207 else if (field == Py_None) {
1208 append_ok = join_append(self, "", "ed, len == 1);
1209 Py_DECREF(field);
1210 }
1211 else {
1212 PyObject *str;
1213
1214 if (PyFloat_Check(field)) {
1215 str = PyObject_Repr(field);
1216 } else {
1217 str = PyObject_Str(field);
1218 }
1219 Py_DECREF(field);
1220 if (str == NULL)
1221 return NULL;
1222
1223 append_ok = join_append(self, PyString_AS_STRING(str),
1224 "ed, len == 1);
1225 Py_DECREF(str);
1226 }
1227 if (!append_ok)
1228 return NULL;
1229 }
1230
1231 /* Add line terminator.
1232 */
1233 if (!join_append_lineterminator(self))
1234 return 0;
1235
1236 return PyObject_CallFunction(self->writeline,
1237 "(s#)", self->rec, self->rec_len);
1238 }
1239
1240 PyDoc_STRVAR(csv_writerows_doc,
1241 "writerows(sequence of sequences)\n"
1242 "\n"
1243 "Construct and write a series of sequences to a csv file. Non-string\n"
1244 "elements will be converted to string.");
1245
1246 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1247 csv_writerows(WriterObj *self, PyObject *seqseq)
1248 {
1249 PyObject *row_iter, *row_obj, *result;
1250
1251 row_iter = PyObject_GetIter(seqseq);
1252 if (row_iter == NULL) {
1253 PyErr_SetString(PyExc_TypeError,
1254 "writerows() argument must be iterable");
1255 return NULL;
1256 }
1257 while ((row_obj = PyIter_Next(row_iter))) {
1258 result = csv_writerow(self, row_obj);
1259 Py_DECREF(row_obj);
1260 if (!result) {
1261 Py_DECREF(row_iter);
1262 return NULL;
1263 }
1264 else
1265 Py_DECREF(result);
1266 }
1267 Py_DECREF(row_iter);
1268 if (PyErr_Occurred())
1269 return NULL;
1270 Py_INCREF(Py_None);
1271 return Py_None;
1272 }
1273
1274 static struct PyMethodDef Writer_methods[] = {
1275 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1276 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1277 { NULL, NULL }
1278 };
1279
1280 #define W_OFF(x) offsetof(WriterObj, x)
1281
1282 static struct PyMemberDef Writer_memberlist[] = {
1283 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1284 { NULL }
1285 };
1286
1287 static void
Writer_dealloc(WriterObj * self)1288 Writer_dealloc(WriterObj *self)
1289 {
1290 PyObject_GC_UnTrack(self);
1291 Py_XDECREF(self->dialect);
1292 Py_XDECREF(self->writeline);
1293 if (self->rec != NULL)
1294 PyMem_Free(self->rec);
1295 PyObject_GC_Del(self);
1296 }
1297
1298 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1299 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1300 {
1301 Py_VISIT(self->dialect);
1302 Py_VISIT(self->writeline);
1303 return 0;
1304 }
1305
1306 static int
Writer_clear(WriterObj * self)1307 Writer_clear(WriterObj *self)
1308 {
1309 Py_CLEAR(self->dialect);
1310 Py_CLEAR(self->writeline);
1311 return 0;
1312 }
1313
1314 PyDoc_STRVAR(Writer_Type_doc,
1315 "CSV writer\n"
1316 "\n"
1317 "Writer objects are responsible for generating tabular data\n"
1318 "in CSV format from sequence input.\n"
1319 );
1320
1321 static PyTypeObject Writer_Type = {
1322 PyVarObject_HEAD_INIT(NULL, 0)
1323 "_csv.writer", /*tp_name*/
1324 sizeof(WriterObj), /*tp_basicsize*/
1325 0, /*tp_itemsize*/
1326 /* methods */
1327 (destructor)Writer_dealloc, /*tp_dealloc*/
1328 (printfunc)0, /*tp_print*/
1329 (getattrfunc)0, /*tp_getattr*/
1330 (setattrfunc)0, /*tp_setattr*/
1331 (cmpfunc)0, /*tp_compare*/
1332 (reprfunc)0, /*tp_repr*/
1333 0, /*tp_as_number*/
1334 0, /*tp_as_sequence*/
1335 0, /*tp_as_mapping*/
1336 (hashfunc)0, /*tp_hash*/
1337 (ternaryfunc)0, /*tp_call*/
1338 (reprfunc)0, /*tp_str*/
1339 0, /*tp_getattro*/
1340 0, /*tp_setattro*/
1341 0, /*tp_as_buffer*/
1342 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1343 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1344 Writer_Type_doc,
1345 (traverseproc)Writer_traverse, /*tp_traverse*/
1346 (inquiry)Writer_clear, /*tp_clear*/
1347 0, /*tp_richcompare*/
1348 0, /*tp_weaklistoffset*/
1349 (getiterfunc)0, /*tp_iter*/
1350 (getiterfunc)0, /*tp_iternext*/
1351 Writer_methods, /*tp_methods*/
1352 Writer_memberlist, /*tp_members*/
1353 0, /*tp_getset*/
1354 };
1355
1356 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1357 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1358 {
1359 PyObject * output_file, * dialect = NULL;
1360 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1361
1362 if (!self)
1363 return NULL;
1364
1365 self->dialect = NULL;
1366 self->writeline = NULL;
1367
1368 self->rec = NULL;
1369 self->rec_size = 0;
1370 self->rec_len = 0;
1371 self->num_fields = 0;
1372
1373 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1374 Py_DECREF(self);
1375 return NULL;
1376 }
1377 self->writeline = PyObject_GetAttrString(output_file, "write");
1378 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1379 PyErr_SetString(PyExc_TypeError,
1380 "argument 1 must have a \"write\" method");
1381 Py_DECREF(self);
1382 return NULL;
1383 }
1384 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1385 if (self->dialect == NULL) {
1386 Py_DECREF(self);
1387 return NULL;
1388 }
1389 PyObject_GC_Track(self);
1390 return (PyObject *)self;
1391 }
1392
1393 /*
1394 * DIALECT REGISTRY
1395 */
1396 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1397 csv_list_dialects(PyObject *module, PyObject *args)
1398 {
1399 return PyDict_Keys(dialects);
1400 }
1401
1402 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1403 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1404 {
1405 PyObject *name_obj, *dialect_obj = NULL;
1406 PyObject *dialect;
1407
1408 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1409 return NULL;
1410 if (!IS_BASESTRING(name_obj)) {
1411 PyErr_SetString(PyExc_TypeError,
1412 "dialect name must be a string or unicode");
1413 return NULL;
1414 }
1415 dialect = _call_dialect(dialect_obj, kwargs);
1416 if (dialect == NULL)
1417 return NULL;
1418 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1419 Py_DECREF(dialect);
1420 return NULL;
1421 }
1422 Py_DECREF(dialect);
1423 Py_INCREF(Py_None);
1424 return Py_None;
1425 }
1426
1427 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1428 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1429 {
1430 if (PyDict_DelItem(dialects, name_obj) < 0)
1431 return PyErr_Format(error_obj, "unknown dialect");
1432 Py_INCREF(Py_None);
1433 return Py_None;
1434 }
1435
1436 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1437 csv_get_dialect(PyObject *module, PyObject *name_obj)
1438 {
1439 return get_dialect_from_registry(name_obj);
1440 }
1441
1442 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1443 csv_field_size_limit(PyObject *module, PyObject *args)
1444 {
1445 PyObject *new_limit = NULL;
1446 long old_limit = field_limit;
1447
1448 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1449 return NULL;
1450 if (new_limit != NULL) {
1451 if (!PyInt_Check(new_limit)) {
1452 PyErr_Format(PyExc_TypeError,
1453 "limit must be an integer");
1454 return NULL;
1455 }
1456 field_limit = PyInt_AsLong(new_limit);
1457 }
1458 return PyInt_FromLong(old_limit);
1459 }
1460
1461 /*
1462 * MODULE
1463 */
1464
1465 PyDoc_STRVAR(csv_module_doc,
1466 "CSV parsing and writing.\n"
1467 "\n"
1468 "This module provides classes that assist in the reading and writing\n"
1469 "of Comma Separated Value (CSV) files, and implements the interface\n"
1470 "described by PEP 305. Although many CSV files are simple to parse,\n"
1471 "the format is not formally defined by a stable specification and\n"
1472 "is subtle enough that parsing lines of a CSV file with something\n"
1473 "like line.split(\",\") is bound to fail. The module supports three\n"
1474 "basic APIs: reading, writing, and registration of dialects.\n"
1475 "\n"
1476 "\n"
1477 "DIALECT REGISTRATION:\n"
1478 "\n"
1479 "Readers and writers support a dialect argument, which is a convenient\n"
1480 "handle on a group of settings. When the dialect argument is a string,\n"
1481 "it identifies one of the dialects previously registered with the module.\n"
1482 "If it is a class or instance, the attributes of the argument are used as\n"
1483 "the settings for the reader or writer:\n"
1484 "\n"
1485 " class excel:\n"
1486 " delimiter = ','\n"
1487 " quotechar = '\"'\n"
1488 " escapechar = None\n"
1489 " doublequote = True\n"
1490 " skipinitialspace = False\n"
1491 " lineterminator = '\\r\\n'\n"
1492 " quoting = QUOTE_MINIMAL\n"
1493 "\n"
1494 "SETTINGS:\n"
1495 "\n"
1496 " * quotechar - specifies a one-character string to use as the \n"
1497 " quoting character. It defaults to '\"'.\n"
1498 " * delimiter - specifies a one-character string to use as the \n"
1499 " field separator. It defaults to ','.\n"
1500 " * skipinitialspace - specifies how to interpret whitespace which\n"
1501 " immediately follows a delimiter. It defaults to False, which\n"
1502 " means that whitespace immediately following a delimiter is part\n"
1503 " of the following field.\n"
1504 " * lineterminator - specifies the character sequence which should \n"
1505 " terminate rows.\n"
1506 " * quoting - controls when quotes should be generated by the writer.\n"
1507 " It can take on any of the following module constants:\n"
1508 "\n"
1509 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1510 " field contains either the quotechar or the delimiter\n"
1511 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1512 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1513 " fields which do not parse as integers or floating point\n"
1514 " numbers.\n"
1515 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1516 " * escapechar - specifies a one-character string used to escape \n"
1517 " the delimiter when quoting is set to QUOTE_NONE.\n"
1518 " * doublequote - controls the handling of quotes inside fields. When\n"
1519 " True, two consecutive quotes are interpreted as one during read,\n"
1520 " and when writing, each quote character embedded in the data is\n"
1521 " written as two quotes\n");
1522
1523 PyDoc_STRVAR(csv_reader_doc,
1524 " csv_reader = reader(iterable [, dialect='excel']\n"
1525 " [optional keyword args])\n"
1526 " for row in csv_reader:\n"
1527 " process(row)\n"
1528 "\n"
1529 "The \"iterable\" argument can be any object that returns a line\n"
1530 "of input for each iteration, such as a file object or a list. The\n"
1531 "optional \"dialect\" parameter is discussed below. The function\n"
1532 "also accepts optional keyword arguments which override settings\n"
1533 "provided by the dialect.\n"
1534 "\n"
1535 "The returned object is an iterator. Each iteration returns a row\n"
1536 "of the CSV file (which can span multiple input lines).\n");
1537
1538 PyDoc_STRVAR(csv_writer_doc,
1539 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1540 " [optional keyword args])\n"
1541 " for row in sequence:\n"
1542 " csv_writer.writerow(row)\n"
1543 "\n"
1544 " [or]\n"
1545 "\n"
1546 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1547 " [optional keyword args])\n"
1548 " csv_writer.writerows(rows)\n"
1549 "\n"
1550 "The \"fileobj\" argument can be any object that supports the file API.\n");
1551
1552 PyDoc_STRVAR(csv_list_dialects_doc,
1553 "Return a list of all know dialect names.\n"
1554 " names = csv.list_dialects()");
1555
1556 PyDoc_STRVAR(csv_get_dialect_doc,
1557 "Return the dialect instance associated with name.\n"
1558 " dialect = csv.get_dialect(name)");
1559
1560 PyDoc_STRVAR(csv_register_dialect_doc,
1561 "Create a mapping from a string name to a dialect class.\n"
1562 " dialect = csv.register_dialect(name, dialect)");
1563
1564 PyDoc_STRVAR(csv_unregister_dialect_doc,
1565 "Delete the name/dialect mapping associated with a string name.\n"
1566 " csv.unregister_dialect(name)");
1567
1568 PyDoc_STRVAR(csv_field_size_limit_doc,
1569 "Sets an upper limit on parsed fields.\n"
1570 " csv.field_size_limit([limit])\n"
1571 "\n"
1572 "Returns old limit. If limit is not given, no new limit is set and\n"
1573 "the old limit is returned");
1574
1575 static struct PyMethodDef csv_methods[] = {
1576 { "reader", (PyCFunction)csv_reader,
1577 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1578 { "writer", (PyCFunction)csv_writer,
1579 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1580 { "list_dialects", (PyCFunction)csv_list_dialects,
1581 METH_NOARGS, csv_list_dialects_doc},
1582 { "register_dialect", (PyCFunction)csv_register_dialect,
1583 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1584 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1585 METH_O, csv_unregister_dialect_doc},
1586 { "get_dialect", (PyCFunction)csv_get_dialect,
1587 METH_O, csv_get_dialect_doc},
1588 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1589 METH_VARARGS, csv_field_size_limit_doc},
1590 { NULL, NULL }
1591 };
1592
1593 PyMODINIT_FUNC
init_csv(void)1594 init_csv(void)
1595 {
1596 PyObject *module;
1597 StyleDesc *style;
1598
1599 if (PyType_Ready(&Dialect_Type) < 0)
1600 return;
1601
1602 if (PyType_Ready(&Reader_Type) < 0)
1603 return;
1604
1605 if (PyType_Ready(&Writer_Type) < 0)
1606 return;
1607
1608 /* Create the module and add the functions */
1609 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1610 if (module == NULL)
1611 return;
1612
1613 /* Add version to the module. */
1614 if (PyModule_AddStringConstant(module, "__version__",
1615 MODULE_VERSION) == -1)
1616 return;
1617
1618 /* Add _dialects dictionary */
1619 dialects = PyDict_New();
1620 if (dialects == NULL)
1621 return;
1622 if (PyModule_AddObject(module, "_dialects", dialects))
1623 return;
1624
1625 /* Add quote styles into dictionary */
1626 for (style = quote_styles; style->name; style++) {
1627 if (PyModule_AddIntConstant(module, style->name,
1628 style->style) == -1)
1629 return;
1630 }
1631
1632 /* Add the Dialect type */
1633 Py_INCREF(&Dialect_Type);
1634 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1635 return;
1636
1637 /* Add the CSV exception object to the module. */
1638 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1639 if (error_obj == NULL)
1640 return;
1641 PyModule_AddObject(module, "Error", error_obj);
1642 }
1643