1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h"
15 #include <stdbool.h>
16
17
18 typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22 } _csvstate;
23
24 #define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
25
26 static int
_csv_clear(PyObject * m)27 _csv_clear(PyObject *m)
28 {
29 Py_CLEAR(_csvstate(m)->error_obj);
30 Py_CLEAR(_csvstate(m)->dialects);
31 return 0;
32 }
33
34 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)35 _csv_traverse(PyObject *m, visitproc visit, void *arg)
36 {
37 Py_VISIT(_csvstate(m)->error_obj);
38 Py_VISIT(_csvstate(m)->dialects);
39 return 0;
40 }
41
42 static void
_csv_free(void * m)43 _csv_free(void *m)
44 {
45 _csv_clear((PyObject *)m);
46 }
47
48 static struct PyModuleDef _csvmodule;
49
50 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
51
52 typedef enum {
53 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
54 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
55 EAT_CRNL,AFTER_ESCAPED_CRNL
56 } ParserState;
57
58 typedef enum {
59 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
60 } QuoteStyle;
61
62 typedef struct {
63 QuoteStyle style;
64 const char *name;
65 } StyleDesc;
66
67 static const StyleDesc quote_styles[] = {
68 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
69 { QUOTE_ALL, "QUOTE_ALL" },
70 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
71 { QUOTE_NONE, "QUOTE_NONE" },
72 { 0 }
73 };
74
75 typedef struct {
76 PyObject_HEAD
77
78 char doublequote; /* is " represented by ""? */
79 char skipinitialspace; /* ignore spaces following delimiter? */
80 char strict; /* raise exception on bad CSV */
81 int quoting; /* style of quoting to write */
82 Py_UCS4 delimiter; /* field separator */
83 Py_UCS4 quotechar; /* quote character */
84 Py_UCS4 escapechar; /* escape character */
85 PyObject *lineterminator; /* string to write between records */
86
87 } DialectObj;
88
89 static PyTypeObject Dialect_Type;
90
91 typedef struct {
92 PyObject_HEAD
93
94 PyObject *input_iter; /* iterate over this for input lines */
95
96 DialectObj *dialect; /* parsing dialect */
97
98 PyObject *fields; /* field list for current record */
99 ParserState state; /* current CSV parse state */
100 Py_UCS4 *field; /* temporary buffer */
101 Py_ssize_t field_size; /* size of allocated buffer */
102 Py_ssize_t field_len; /* length of current field */
103 int numeric_field; /* treat field as numeric */
104 unsigned long line_num; /* Source-file line number */
105 } ReaderObj;
106
107 static PyTypeObject Reader_Type;
108
109 #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
110
111 typedef struct {
112 PyObject_HEAD
113
114 PyObject *write; /* write output lines to this file */
115
116 DialectObj *dialect; /* parsing dialect */
117
118 Py_UCS4 *rec; /* buffer for parser.join */
119 Py_ssize_t rec_size; /* size of allocated record */
120 Py_ssize_t rec_len; /* length of record */
121 int num_fields; /* number of fields in record */
122 } WriterObj;
123
124 static PyTypeObject Writer_Type;
125
126 /*
127 * DIALECT class
128 */
129
130 static PyObject *
get_dialect_from_registry(PyObject * name_obj)131 get_dialect_from_registry(PyObject * name_obj)
132 {
133 PyObject *dialect_obj;
134
135 dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
136 if (dialect_obj == NULL) {
137 if (!PyErr_Occurred())
138 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
139 }
140 else
141 Py_INCREF(dialect_obj);
142 return dialect_obj;
143 }
144
145 static PyObject *
get_string(PyObject * str)146 get_string(PyObject *str)
147 {
148 Py_XINCREF(str);
149 return str;
150 }
151
152 static PyObject *
get_nullchar_as_None(Py_UCS4 c)153 get_nullchar_as_None(Py_UCS4 c)
154 {
155 if (c == '\0') {
156 Py_RETURN_NONE;
157 }
158 else
159 return PyUnicode_FromOrdinal(c);
160 }
161
162 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))163 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
164 {
165 return get_string(self->lineterminator);
166 }
167
168 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))169 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
170 {
171 return get_nullchar_as_None(self->delimiter);
172 }
173
174 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))175 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
176 {
177 return get_nullchar_as_None(self->escapechar);
178 }
179
180 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))181 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
182 {
183 return get_nullchar_as_None(self->quotechar);
184 }
185
186 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))187 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
188 {
189 return PyLong_FromLong(self->quoting);
190 }
191
192 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)193 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
194 {
195 if (src == NULL)
196 *target = dflt;
197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = (char)b;
202 }
203 return 0;
204 }
205
206 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)207 _set_int(const char *name, int *target, PyObject *src, int dflt)
208 {
209 if (src == NULL)
210 *target = dflt;
211 else {
212 int value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = _PyLong_AsInt(src);
219 if (value == -1 && PyErr_Occurred()) {
220 return -1;
221 }
222 *target = value;
223 }
224 return 0;
225 }
226
227 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)228 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
229 {
230 if (src == NULL)
231 *target = dflt;
232 else {
233 *target = '\0';
234 if (src != Py_None) {
235 Py_ssize_t len;
236 if (!PyUnicode_Check(src)) {
237 PyErr_Format(PyExc_TypeError,
238 "\"%s\" must be string, not %.200s", name,
239 src->ob_type->tp_name);
240 return -1;
241 }
242 len = PyUnicode_GetLength(src);
243 if (len > 1) {
244 PyErr_Format(PyExc_TypeError,
245 "\"%s\" must be a 1-character string",
246 name);
247 return -1;
248 }
249 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
250 if (len > 0)
251 *target = PyUnicode_READ_CHAR(src, 0);
252 }
253 }
254 return 0;
255 }
256
257 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)258 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259 {
260 if (src == NULL)
261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262 else {
263 if (src == Py_None)
264 *target = NULL;
265 else if (!PyUnicode_Check(src)) {
266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be a string", name);
268 return -1;
269 }
270 else {
271 if (PyUnicode_READY(src) == -1)
272 return -1;
273 Py_INCREF(src);
274 Py_XSETREF(*target, src);
275 }
276 }
277 return 0;
278 }
279
280 static int
dialect_check_quoting(int quoting)281 dialect_check_quoting(int quoting)
282 {
283 const StyleDesc *qs;
284
285 for (qs = quote_styles; qs->name; qs++) {
286 if ((int)qs->style == quoting)
287 return 0;
288 }
289 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290 return -1;
291 }
292
293 #define D_OFF(x) offsetof(DialectObj, x)
294
295 static struct PyMemberDef Dialect_memberlist[] = {
296 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
297 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
298 { "strict", T_BOOL, D_OFF(strict), READONLY },
299 { NULL }
300 };
301
302 static PyGetSetDef Dialect_getsetlist[] = {
303 { "delimiter", (getter)Dialect_get_delimiter},
304 { "escapechar", (getter)Dialect_get_escapechar},
305 { "lineterminator", (getter)Dialect_get_lineterminator},
306 { "quotechar", (getter)Dialect_get_quotechar},
307 { "quoting", (getter)Dialect_get_quoting},
308 {NULL},
309 };
310
311 static void
Dialect_dealloc(DialectObj * self)312 Dialect_dealloc(DialectObj *self)
313 {
314 Py_XDECREF(self->lineterminator);
315 Py_TYPE(self)->tp_free((PyObject *)self);
316 }
317
318 static char *dialect_kws[] = {
319 "dialect",
320 "delimiter",
321 "doublequote",
322 "escapechar",
323 "lineterminator",
324 "quotechar",
325 "quoting",
326 "skipinitialspace",
327 "strict",
328 NULL
329 };
330
331 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)332 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
333 {
334 DialectObj *self;
335 PyObject *ret = NULL;
336 PyObject *dialect = NULL;
337 PyObject *delimiter = NULL;
338 PyObject *doublequote = NULL;
339 PyObject *escapechar = NULL;
340 PyObject *lineterminator = NULL;
341 PyObject *quotechar = NULL;
342 PyObject *quoting = NULL;
343 PyObject *skipinitialspace = NULL;
344 PyObject *strict = NULL;
345
346 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347 "|OOOOOOOOO", dialect_kws,
348 &dialect,
349 &delimiter,
350 &doublequote,
351 &escapechar,
352 &lineterminator,
353 "echar,
354 "ing,
355 &skipinitialspace,
356 &strict))
357 return NULL;
358
359 if (dialect != NULL) {
360 if (PyUnicode_Check(dialect)) {
361 dialect = get_dialect_from_registry(dialect);
362 if (dialect == NULL)
363 return NULL;
364 }
365 else
366 Py_INCREF(dialect);
367 /* Can we reuse this instance? */
368 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
369 delimiter == NULL &&
370 doublequote == NULL &&
371 escapechar == NULL &&
372 lineterminator == NULL &&
373 quotechar == NULL &&
374 quoting == NULL &&
375 skipinitialspace == NULL &&
376 strict == NULL)
377 return dialect;
378 }
379
380 self = (DialectObj *)type->tp_alloc(type, 0);
381 if (self == NULL) {
382 Py_XDECREF(dialect);
383 return NULL;
384 }
385 self->lineterminator = NULL;
386
387 Py_XINCREF(delimiter);
388 Py_XINCREF(doublequote);
389 Py_XINCREF(escapechar);
390 Py_XINCREF(lineterminator);
391 Py_XINCREF(quotechar);
392 Py_XINCREF(quoting);
393 Py_XINCREF(skipinitialspace);
394 Py_XINCREF(strict);
395 if (dialect != NULL) {
396 #define DIALECT_GETATTR(v, n) \
397 if (v == NULL) \
398 v = PyObject_GetAttrString(dialect, n)
399 DIALECT_GETATTR(delimiter, "delimiter");
400 DIALECT_GETATTR(doublequote, "doublequote");
401 DIALECT_GETATTR(escapechar, "escapechar");
402 DIALECT_GETATTR(lineterminator, "lineterminator");
403 DIALECT_GETATTR(quotechar, "quotechar");
404 DIALECT_GETATTR(quoting, "quoting");
405 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406 DIALECT_GETATTR(strict, "strict");
407 PyErr_Clear();
408 }
409
410 /* check types and convert to C values */
411 #define DIASET(meth, name, target, src, dflt) \
412 if (meth(name, target, src, dflt)) \
413 goto err
414 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
415 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
416 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
420 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
421 DIASET(_set_bool, "strict", &self->strict, strict, false);
422
423 /* validate options */
424 if (dialect_check_quoting(self->quoting))
425 goto err;
426 if (self->delimiter == 0) {
427 PyErr_SetString(PyExc_TypeError,
428 "\"delimiter\" must be a 1-character string");
429 goto err;
430 }
431 if (quotechar == Py_None && quoting == NULL)
432 self->quoting = QUOTE_NONE;
433 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434 PyErr_SetString(PyExc_TypeError,
435 "quotechar must be set if quoting enabled");
436 goto err;
437 }
438 if (self->lineterminator == 0) {
439 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440 goto err;
441 }
442
443 ret = (PyObject *)self;
444 Py_INCREF(self);
445 err:
446 Py_XDECREF(self);
447 Py_XDECREF(dialect);
448 Py_XDECREF(delimiter);
449 Py_XDECREF(doublequote);
450 Py_XDECREF(escapechar);
451 Py_XDECREF(lineterminator);
452 Py_XDECREF(quotechar);
453 Py_XDECREF(quoting);
454 Py_XDECREF(skipinitialspace);
455 Py_XDECREF(strict);
456 return ret;
457 }
458
459
460 PyDoc_STRVAR(Dialect_Type_doc,
461 "CSV dialect\n"
462 "\n"
463 "The Dialect type records CSV parsing and generation options.\n");
464
465 static PyTypeObject Dialect_Type = {
466 PyVarObject_HEAD_INIT(NULL, 0)
467 "_csv.Dialect", /* tp_name */
468 sizeof(DialectObj), /* tp_basicsize */
469 0, /* tp_itemsize */
470 /* methods */
471 (destructor)Dialect_dealloc, /* tp_dealloc */
472 0, /* tp_vectorcall_offset */
473 (getattrfunc)0, /* tp_getattr */
474 (setattrfunc)0, /* tp_setattr */
475 0, /* tp_as_async */
476 (reprfunc)0, /* tp_repr */
477 0, /* tp_as_number */
478 0, /* tp_as_sequence */
479 0, /* tp_as_mapping */
480 (hashfunc)0, /* tp_hash */
481 (ternaryfunc)0, /* tp_call */
482 (reprfunc)0, /* tp_str */
483 0, /* tp_getattro */
484 0, /* tp_setattro */
485 0, /* tp_as_buffer */
486 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487 Dialect_Type_doc, /* tp_doc */
488 0, /* tp_traverse */
489 0, /* tp_clear */
490 0, /* tp_richcompare */
491 0, /* tp_weaklistoffset */
492 0, /* tp_iter */
493 0, /* tp_iternext */
494 0, /* tp_methods */
495 Dialect_memberlist, /* tp_members */
496 Dialect_getsetlist, /* tp_getset */
497 0, /* tp_base */
498 0, /* tp_dict */
499 0, /* tp_descr_get */
500 0, /* tp_descr_set */
501 0, /* tp_dictoffset */
502 0, /* tp_init */
503 0, /* tp_alloc */
504 dialect_new, /* tp_new */
505 0, /* tp_free */
506 };
507
508 /*
509 * Return an instance of the dialect type, given a Python instance or kwarg
510 * description of the dialect
511 */
512 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)513 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514 {
515 PyObject *type = (PyObject *)&Dialect_Type;
516 if (dialect_inst) {
517 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
518 }
519 else {
520 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
521 }
522 }
523
524 /*
525 * READER
526 */
527 static int
parse_save_field(ReaderObj * self)528 parse_save_field(ReaderObj *self)
529 {
530 PyObject *field;
531
532 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
533 (void *) self->field, self->field_len);
534 if (field == NULL)
535 return -1;
536 self->field_len = 0;
537 if (self->numeric_field) {
538 PyObject *tmp;
539
540 self->numeric_field = 0;
541 tmp = PyNumber_Float(field);
542 Py_DECREF(field);
543 if (tmp == NULL)
544 return -1;
545 field = tmp;
546 }
547 if (PyList_Append(self->fields, field) < 0) {
548 Py_DECREF(field);
549 return -1;
550 }
551 Py_DECREF(field);
552 return 0;
553 }
554
555 static int
parse_grow_buff(ReaderObj * self)556 parse_grow_buff(ReaderObj *self)
557 {
558 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
559
560 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
561 Py_UCS4 *field_new = self->field;
562 PyMem_Resize(field_new, Py_UCS4, field_size_new);
563 if (field_new == NULL) {
564 PyErr_NoMemory();
565 return 0;
566 }
567 self->field = field_new;
568 self->field_size = field_size_new;
569 return 1;
570 }
571
572 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)573 parse_add_char(ReaderObj *self, Py_UCS4 c)
574 {
575 if (self->field_len >= _csvstate_global->field_limit) {
576 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
577 _csvstate_global->field_limit);
578 return -1;
579 }
580 if (self->field_len == self->field_size && !parse_grow_buff(self))
581 return -1;
582 self->field[self->field_len++] = c;
583 return 0;
584 }
585
586 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)587 parse_process_char(ReaderObj *self, Py_UCS4 c)
588 {
589 DialectObj *dialect = self->dialect;
590
591 switch (self->state) {
592 case START_RECORD:
593 /* start of record */
594 if (c == '\0')
595 /* empty line - return [] */
596 break;
597 else if (c == '\n' || c == '\r') {
598 self->state = EAT_CRNL;
599 break;
600 }
601 /* normal character - handle as START_FIELD */
602 self->state = START_FIELD;
603 /* fallthru */
604 case START_FIELD:
605 /* expecting field */
606 if (c == '\n' || c == '\r' || c == '\0') {
607 /* save empty field - return [fields] */
608 if (parse_save_field(self) < 0)
609 return -1;
610 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
611 }
612 else if (c == dialect->quotechar &&
613 dialect->quoting != QUOTE_NONE) {
614 /* start quoted field */
615 self->state = IN_QUOTED_FIELD;
616 }
617 else if (c == dialect->escapechar) {
618 /* possible escaped character */
619 self->state = ESCAPED_CHAR;
620 }
621 else if (c == ' ' && dialect->skipinitialspace)
622 /* ignore space at start of field */
623 ;
624 else if (c == dialect->delimiter) {
625 /* save empty field */
626 if (parse_save_field(self) < 0)
627 return -1;
628 }
629 else {
630 /* begin new unquoted field */
631 if (dialect->quoting == QUOTE_NONNUMERIC)
632 self->numeric_field = 1;
633 if (parse_add_char(self, c) < 0)
634 return -1;
635 self->state = IN_FIELD;
636 }
637 break;
638
639 case ESCAPED_CHAR:
640 if (c == '\n' || c=='\r') {
641 if (parse_add_char(self, c) < 0)
642 return -1;
643 self->state = AFTER_ESCAPED_CRNL;
644 break;
645 }
646 if (c == '\0')
647 c = '\n';
648 if (parse_add_char(self, c) < 0)
649 return -1;
650 self->state = IN_FIELD;
651 break;
652
653 case AFTER_ESCAPED_CRNL:
654 if (c == '\0')
655 break;
656 /*fallthru*/
657
658 case IN_FIELD:
659 /* in unquoted field */
660 if (c == '\n' || c == '\r' || c == '\0') {
661 /* end of line - return [fields] */
662 if (parse_save_field(self) < 0)
663 return -1;
664 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
665 }
666 else if (c == dialect->escapechar) {
667 /* possible escaped character */
668 self->state = ESCAPED_CHAR;
669 }
670 else if (c == dialect->delimiter) {
671 /* save field - wait for new field */
672 if (parse_save_field(self) < 0)
673 return -1;
674 self->state = START_FIELD;
675 }
676 else {
677 /* normal character - save in field */
678 if (parse_add_char(self, c) < 0)
679 return -1;
680 }
681 break;
682
683 case IN_QUOTED_FIELD:
684 /* in quoted field */
685 if (c == '\0')
686 ;
687 else if (c == dialect->escapechar) {
688 /* Possible escape character */
689 self->state = ESCAPE_IN_QUOTED_FIELD;
690 }
691 else if (c == dialect->quotechar &&
692 dialect->quoting != QUOTE_NONE) {
693 if (dialect->doublequote) {
694 /* doublequote; " represented by "" */
695 self->state = QUOTE_IN_QUOTED_FIELD;
696 }
697 else {
698 /* end of quote part of field */
699 self->state = IN_FIELD;
700 }
701 }
702 else {
703 /* normal character - save in field */
704 if (parse_add_char(self, c) < 0)
705 return -1;
706 }
707 break;
708
709 case ESCAPE_IN_QUOTED_FIELD:
710 if (c == '\0')
711 c = '\n';
712 if (parse_add_char(self, c) < 0)
713 return -1;
714 self->state = IN_QUOTED_FIELD;
715 break;
716
717 case QUOTE_IN_QUOTED_FIELD:
718 /* doublequote - seen a quote in a quoted field */
719 if (dialect->quoting != QUOTE_NONE &&
720 c == dialect->quotechar) {
721 /* save "" as " */
722 if (parse_add_char(self, c) < 0)
723 return -1;
724 self->state = IN_QUOTED_FIELD;
725 }
726 else if (c == dialect->delimiter) {
727 /* save field - wait for new field */
728 if (parse_save_field(self) < 0)
729 return -1;
730 self->state = START_FIELD;
731 }
732 else if (c == '\n' || c == '\r' || c == '\0') {
733 /* end of line - return [fields] */
734 if (parse_save_field(self) < 0)
735 return -1;
736 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
737 }
738 else if (!dialect->strict) {
739 if (parse_add_char(self, c) < 0)
740 return -1;
741 self->state = IN_FIELD;
742 }
743 else {
744 /* illegal */
745 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
746 dialect->delimiter,
747 dialect->quotechar);
748 return -1;
749 }
750 break;
751
752 case EAT_CRNL:
753 if (c == '\n' || c == '\r')
754 ;
755 else if (c == '\0')
756 self->state = START_RECORD;
757 else {
758 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
759 return -1;
760 }
761 break;
762
763 }
764 return 0;
765 }
766
767 static int
parse_reset(ReaderObj * self)768 parse_reset(ReaderObj *self)
769 {
770 Py_XSETREF(self->fields, PyList_New(0));
771 if (self->fields == NULL)
772 return -1;
773 self->field_len = 0;
774 self->state = START_RECORD;
775 self->numeric_field = 0;
776 return 0;
777 }
778
779 static PyObject *
Reader_iternext(ReaderObj * self)780 Reader_iternext(ReaderObj *self)
781 {
782 PyObject *fields = NULL;
783 Py_UCS4 c;
784 Py_ssize_t pos, linelen;
785 unsigned int kind;
786 void *data;
787 PyObject *lineobj;
788
789 if (parse_reset(self) < 0)
790 return NULL;
791 do {
792 lineobj = PyIter_Next(self->input_iter);
793 if (lineobj == NULL) {
794 /* End of input OR exception */
795 if (!PyErr_Occurred() && (self->field_len != 0 ||
796 self->state == IN_QUOTED_FIELD)) {
797 if (self->dialect->strict)
798 PyErr_SetString(_csvstate_global->error_obj,
799 "unexpected end of data");
800 else if (parse_save_field(self) >= 0)
801 break;
802 }
803 return NULL;
804 }
805 if (!PyUnicode_Check(lineobj)) {
806 PyErr_Format(_csvstate_global->error_obj,
807 "iterator should return strings, "
808 "not %.200s "
809 "(did you open the file in text mode?)",
810 lineobj->ob_type->tp_name
811 );
812 Py_DECREF(lineobj);
813 return NULL;
814 }
815 if (PyUnicode_READY(lineobj) == -1) {
816 Py_DECREF(lineobj);
817 return NULL;
818 }
819 ++self->line_num;
820 kind = PyUnicode_KIND(lineobj);
821 data = PyUnicode_DATA(lineobj);
822 pos = 0;
823 linelen = PyUnicode_GET_LENGTH(lineobj);
824 while (linelen--) {
825 c = PyUnicode_READ(kind, data, pos);
826 if (c == '\0') {
827 Py_DECREF(lineobj);
828 PyErr_Format(_csvstate_global->error_obj,
829 "line contains NUL");
830 goto err;
831 }
832 if (parse_process_char(self, c) < 0) {
833 Py_DECREF(lineobj);
834 goto err;
835 }
836 pos++;
837 }
838 Py_DECREF(lineobj);
839 if (parse_process_char(self, 0) < 0)
840 goto err;
841 } while (self->state != START_RECORD);
842
843 fields = self->fields;
844 self->fields = NULL;
845 err:
846 return fields;
847 }
848
849 static void
Reader_dealloc(ReaderObj * self)850 Reader_dealloc(ReaderObj *self)
851 {
852 PyObject_GC_UnTrack(self);
853 Py_XDECREF(self->dialect);
854 Py_XDECREF(self->input_iter);
855 Py_XDECREF(self->fields);
856 if (self->field != NULL)
857 PyMem_Free(self->field);
858 PyObject_GC_Del(self);
859 }
860
861 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)862 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
863 {
864 Py_VISIT(self->dialect);
865 Py_VISIT(self->input_iter);
866 Py_VISIT(self->fields);
867 return 0;
868 }
869
870 static int
Reader_clear(ReaderObj * self)871 Reader_clear(ReaderObj *self)
872 {
873 Py_CLEAR(self->dialect);
874 Py_CLEAR(self->input_iter);
875 Py_CLEAR(self->fields);
876 return 0;
877 }
878
879 PyDoc_STRVAR(Reader_Type_doc,
880 "CSV reader\n"
881 "\n"
882 "Reader objects are responsible for reading and parsing tabular data\n"
883 "in CSV format.\n"
884 );
885
886 static struct PyMethodDef Reader_methods[] = {
887 { NULL, NULL }
888 };
889 #define R_OFF(x) offsetof(ReaderObj, x)
890
891 static struct PyMemberDef Reader_memberlist[] = {
892 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
893 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
894 { NULL }
895 };
896
897
898 static PyTypeObject Reader_Type = {
899 PyVarObject_HEAD_INIT(NULL, 0)
900 "_csv.reader", /*tp_name*/
901 sizeof(ReaderObj), /*tp_basicsize*/
902 0, /*tp_itemsize*/
903 /* methods */
904 (destructor)Reader_dealloc, /*tp_dealloc*/
905 0, /*tp_vectorcall_offset*/
906 (getattrfunc)0, /*tp_getattr*/
907 (setattrfunc)0, /*tp_setattr*/
908 0, /*tp_as_async*/
909 (reprfunc)0, /*tp_repr*/
910 0, /*tp_as_number*/
911 0, /*tp_as_sequence*/
912 0, /*tp_as_mapping*/
913 (hashfunc)0, /*tp_hash*/
914 (ternaryfunc)0, /*tp_call*/
915 (reprfunc)0, /*tp_str*/
916 0, /*tp_getattro*/
917 0, /*tp_setattro*/
918 0, /*tp_as_buffer*/
919 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
920 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
921 Reader_Type_doc, /*tp_doc*/
922 (traverseproc)Reader_traverse, /*tp_traverse*/
923 (inquiry)Reader_clear, /*tp_clear*/
924 0, /*tp_richcompare*/
925 0, /*tp_weaklistoffset*/
926 PyObject_SelfIter, /*tp_iter*/
927 (getiterfunc)Reader_iternext, /*tp_iternext*/
928 Reader_methods, /*tp_methods*/
929 Reader_memberlist, /*tp_members*/
930 0, /*tp_getset*/
931
932 };
933
934 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)935 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
936 {
937 PyObject * iterator, * dialect = NULL;
938 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
939
940 if (!self)
941 return NULL;
942
943 self->dialect = NULL;
944 self->fields = NULL;
945 self->input_iter = NULL;
946 self->field = NULL;
947 self->field_size = 0;
948 self->line_num = 0;
949
950 if (parse_reset(self) < 0) {
951 Py_DECREF(self);
952 return NULL;
953 }
954
955 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
956 Py_DECREF(self);
957 return NULL;
958 }
959 self->input_iter = PyObject_GetIter(iterator);
960 if (self->input_iter == NULL) {
961 PyErr_SetString(PyExc_TypeError,
962 "argument 1 must be an iterator");
963 Py_DECREF(self);
964 return NULL;
965 }
966 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
967 if (self->dialect == NULL) {
968 Py_DECREF(self);
969 return NULL;
970 }
971
972 PyObject_GC_Track(self);
973 return (PyObject *)self;
974 }
975
976 /*
977 * WRITER
978 */
979 /* ---------------------------------------------------------------- */
980 static void
join_reset(WriterObj * self)981 join_reset(WriterObj *self)
982 {
983 self->rec_len = 0;
984 self->num_fields = 0;
985 }
986
987 #define MEM_INCR 32768
988
989 /* Calculate new record length or append field to record. Return new
990 * record length.
991 */
992 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)993 join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
994 Py_ssize_t field_len, int *quoted,
995 int copy_phase)
996 {
997 DialectObj *dialect = self->dialect;
998 int i;
999 Py_ssize_t rec_len;
1000
1001 #define INCLEN \
1002 do {\
1003 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1004 goto overflow; \
1005 } \
1006 rec_len++; \
1007 } while(0)
1008
1009 #define ADDCH(c) \
1010 do {\
1011 if (copy_phase) \
1012 self->rec[rec_len] = c;\
1013 INCLEN;\
1014 } while(0)
1015
1016 rec_len = self->rec_len;
1017
1018 /* If this is not the first field we need a field separator */
1019 if (self->num_fields > 0)
1020 ADDCH(dialect->delimiter);
1021
1022 /* Handle preceding quote */
1023 if (copy_phase && *quoted)
1024 ADDCH(dialect->quotechar);
1025
1026 /* Copy/count field data */
1027 /* If field is null just pass over */
1028 for (i = 0; field_data && (i < field_len); i++) {
1029 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1030 int want_escape = 0;
1031
1032 if (c == dialect->delimiter ||
1033 c == dialect->escapechar ||
1034 c == dialect->quotechar ||
1035 PyUnicode_FindChar(
1036 dialect->lineterminator, c, 0,
1037 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1038 if (dialect->quoting == QUOTE_NONE)
1039 want_escape = 1;
1040 else {
1041 if (c == dialect->quotechar) {
1042 if (dialect->doublequote)
1043 ADDCH(dialect->quotechar);
1044 else
1045 want_escape = 1;
1046 }
1047 if (!want_escape)
1048 *quoted = 1;
1049 }
1050 if (want_escape) {
1051 if (!dialect->escapechar) {
1052 PyErr_Format(_csvstate_global->error_obj,
1053 "need to escape, but no escapechar set");
1054 return -1;
1055 }
1056 ADDCH(dialect->escapechar);
1057 }
1058 }
1059 /* Copy field character into record buffer.
1060 */
1061 ADDCH(c);
1062 }
1063
1064 if (*quoted) {
1065 if (copy_phase)
1066 ADDCH(dialect->quotechar);
1067 else {
1068 INCLEN; /* starting quote */
1069 INCLEN; /* ending quote */
1070 }
1071 }
1072 return rec_len;
1073
1074 overflow:
1075 PyErr_NoMemory();
1076 return -1;
1077 #undef ADDCH
1078 #undef INCLEN
1079 }
1080
1081 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1082 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1083 {
1084 assert(rec_len >= 0);
1085
1086 if (rec_len > self->rec_size) {
1087 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1088 Py_UCS4 *rec_new = self->rec;
1089 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1090 if (rec_new == NULL) {
1091 PyErr_NoMemory();
1092 return 0;
1093 }
1094 self->rec = rec_new;
1095 self->rec_size = (Py_ssize_t)rec_size_new;
1096 }
1097 return 1;
1098 }
1099
1100 static int
join_append(WriterObj * self,PyObject * field,int quoted)1101 join_append(WriterObj *self, PyObject *field, int quoted)
1102 {
1103 unsigned int field_kind = -1;
1104 void *field_data = NULL;
1105 Py_ssize_t field_len = 0;
1106 Py_ssize_t rec_len;
1107
1108 if (field != NULL) {
1109 if (PyUnicode_READY(field) == -1)
1110 return 0;
1111 field_kind = PyUnicode_KIND(field);
1112 field_data = PyUnicode_DATA(field);
1113 field_len = PyUnicode_GET_LENGTH(field);
1114 }
1115 rec_len = join_append_data(self, field_kind, field_data, field_len,
1116 "ed, 0);
1117 if (rec_len < 0)
1118 return 0;
1119
1120 /* grow record buffer if necessary */
1121 if (!join_check_rec_size(self, rec_len))
1122 return 0;
1123
1124 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1125 "ed, 1);
1126 self->num_fields++;
1127
1128 return 1;
1129 }
1130
1131 static int
join_append_lineterminator(WriterObj * self)1132 join_append_lineterminator(WriterObj *self)
1133 {
1134 Py_ssize_t terminator_len, i;
1135 unsigned int term_kind;
1136 void *term_data;
1137
1138 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1139 if (terminator_len == -1)
1140 return 0;
1141
1142 /* grow record buffer if necessary */
1143 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1144 return 0;
1145
1146 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1147 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1148 for (i = 0; i < terminator_len; i++)
1149 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1150 self->rec_len += terminator_len;
1151
1152 return 1;
1153 }
1154
1155 PyDoc_STRVAR(csv_writerow_doc,
1156 "writerow(iterable)\n"
1157 "\n"
1158 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1159 "elements will be converted to string.");
1160
1161 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1162 csv_writerow(WriterObj *self, PyObject *seq)
1163 {
1164 DialectObj *dialect = self->dialect;
1165 PyObject *iter, *field, *line, *result;
1166
1167 iter = PyObject_GetIter(seq);
1168 if (iter == NULL)
1169 return PyErr_Format(_csvstate_global->error_obj,
1170 "iterable expected, not %.200s",
1171 seq->ob_type->tp_name);
1172
1173 /* Join all fields in internal buffer.
1174 */
1175 join_reset(self);
1176 while ((field = PyIter_Next(iter))) {
1177 int append_ok;
1178 int quoted;
1179
1180 switch (dialect->quoting) {
1181 case QUOTE_NONNUMERIC:
1182 quoted = !PyNumber_Check(field);
1183 break;
1184 case QUOTE_ALL:
1185 quoted = 1;
1186 break;
1187 default:
1188 quoted = 0;
1189 break;
1190 }
1191
1192 if (PyUnicode_Check(field)) {
1193 append_ok = join_append(self, field, quoted);
1194 Py_DECREF(field);
1195 }
1196 else if (field == Py_None) {
1197 append_ok = join_append(self, NULL, quoted);
1198 Py_DECREF(field);
1199 }
1200 else {
1201 PyObject *str;
1202
1203 str = PyObject_Str(field);
1204 Py_DECREF(field);
1205 if (str == NULL) {
1206 Py_DECREF(iter);
1207 return NULL;
1208 }
1209 append_ok = join_append(self, str, quoted);
1210 Py_DECREF(str);
1211 }
1212 if (!append_ok) {
1213 Py_DECREF(iter);
1214 return NULL;
1215 }
1216 }
1217 Py_DECREF(iter);
1218 if (PyErr_Occurred())
1219 return NULL;
1220
1221 if (self->num_fields > 0 && self->rec_len == 0) {
1222 if (dialect->quoting == QUOTE_NONE) {
1223 PyErr_Format(_csvstate_global->error_obj,
1224 "single empty field record must be quoted");
1225 return NULL;
1226 }
1227 self->num_fields--;
1228 if (!join_append(self, NULL, 1))
1229 return NULL;
1230 }
1231
1232 /* Add line terminator.
1233 */
1234 if (!join_append_lineterminator(self)) {
1235 return NULL;
1236 }
1237
1238 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1239 (void *) self->rec, self->rec_len);
1240 if (line == NULL) {
1241 return NULL;
1242 }
1243 result = PyObject_CallFunctionObjArgs(self->write, line, NULL);
1244 Py_DECREF(line);
1245 return result;
1246 }
1247
1248 PyDoc_STRVAR(csv_writerows_doc,
1249 "writerows(iterable of iterables)\n"
1250 "\n"
1251 "Construct and write a series of iterables to a csv file. Non-string\n"
1252 "elements will be converted to string.");
1253
1254 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1255 csv_writerows(WriterObj *self, PyObject *seqseq)
1256 {
1257 PyObject *row_iter, *row_obj, *result;
1258
1259 row_iter = PyObject_GetIter(seqseq);
1260 if (row_iter == NULL) {
1261 PyErr_SetString(PyExc_TypeError,
1262 "writerows() argument must be iterable");
1263 return NULL;
1264 }
1265 while ((row_obj = PyIter_Next(row_iter))) {
1266 result = csv_writerow(self, row_obj);
1267 Py_DECREF(row_obj);
1268 if (!result) {
1269 Py_DECREF(row_iter);
1270 return NULL;
1271 }
1272 else
1273 Py_DECREF(result);
1274 }
1275 Py_DECREF(row_iter);
1276 if (PyErr_Occurred())
1277 return NULL;
1278 Py_RETURN_NONE;
1279 }
1280
1281 static struct PyMethodDef Writer_methods[] = {
1282 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1283 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1284 { NULL, NULL }
1285 };
1286
1287 #define W_OFF(x) offsetof(WriterObj, x)
1288
1289 static struct PyMemberDef Writer_memberlist[] = {
1290 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1291 { NULL }
1292 };
1293
1294 static void
Writer_dealloc(WriterObj * self)1295 Writer_dealloc(WriterObj *self)
1296 {
1297 PyObject_GC_UnTrack(self);
1298 Py_XDECREF(self->dialect);
1299 Py_XDECREF(self->write);
1300 if (self->rec != NULL)
1301 PyMem_Free(self->rec);
1302 PyObject_GC_Del(self);
1303 }
1304
1305 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1306 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1307 {
1308 Py_VISIT(self->dialect);
1309 Py_VISIT(self->write);
1310 return 0;
1311 }
1312
1313 static int
Writer_clear(WriterObj * self)1314 Writer_clear(WriterObj *self)
1315 {
1316 Py_CLEAR(self->dialect);
1317 Py_CLEAR(self->write);
1318 return 0;
1319 }
1320
1321 PyDoc_STRVAR(Writer_Type_doc,
1322 "CSV writer\n"
1323 "\n"
1324 "Writer objects are responsible for generating tabular data\n"
1325 "in CSV format from sequence input.\n"
1326 );
1327
1328 static PyTypeObject Writer_Type = {
1329 PyVarObject_HEAD_INIT(NULL, 0)
1330 "_csv.writer", /*tp_name*/
1331 sizeof(WriterObj), /*tp_basicsize*/
1332 0, /*tp_itemsize*/
1333 /* methods */
1334 (destructor)Writer_dealloc, /*tp_dealloc*/
1335 0, /*tp_vectorcall_offset*/
1336 (getattrfunc)0, /*tp_getattr*/
1337 (setattrfunc)0, /*tp_setattr*/
1338 0, /*tp_as_async*/
1339 (reprfunc)0, /*tp_repr*/
1340 0, /*tp_as_number*/
1341 0, /*tp_as_sequence*/
1342 0, /*tp_as_mapping*/
1343 (hashfunc)0, /*tp_hash*/
1344 (ternaryfunc)0, /*tp_call*/
1345 (reprfunc)0, /*tp_str*/
1346 0, /*tp_getattro*/
1347 0, /*tp_setattro*/
1348 0, /*tp_as_buffer*/
1349 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1350 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1351 Writer_Type_doc,
1352 (traverseproc)Writer_traverse, /*tp_traverse*/
1353 (inquiry)Writer_clear, /*tp_clear*/
1354 0, /*tp_richcompare*/
1355 0, /*tp_weaklistoffset*/
1356 (getiterfunc)0, /*tp_iter*/
1357 (getiterfunc)0, /*tp_iternext*/
1358 Writer_methods, /*tp_methods*/
1359 Writer_memberlist, /*tp_members*/
1360 0, /*tp_getset*/
1361 };
1362
1363 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1364 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1365 {
1366 PyObject * output_file, * dialect = NULL;
1367 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1368 _Py_IDENTIFIER(write);
1369
1370 if (!self)
1371 return NULL;
1372
1373 self->dialect = NULL;
1374 self->write = NULL;
1375
1376 self->rec = NULL;
1377 self->rec_size = 0;
1378 self->rec_len = 0;
1379 self->num_fields = 0;
1380
1381 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1382 Py_DECREF(self);
1383 return NULL;
1384 }
1385 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1386 Py_DECREF(self);
1387 return NULL;
1388 }
1389 if (self->write == NULL || !PyCallable_Check(self->write)) {
1390 PyErr_SetString(PyExc_TypeError,
1391 "argument 1 must have a \"write\" method");
1392 Py_DECREF(self);
1393 return NULL;
1394 }
1395 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1396 if (self->dialect == NULL) {
1397 Py_DECREF(self);
1398 return NULL;
1399 }
1400 PyObject_GC_Track(self);
1401 return (PyObject *)self;
1402 }
1403
1404 /*
1405 * DIALECT REGISTRY
1406 */
1407 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1408 csv_list_dialects(PyObject *module, PyObject *args)
1409 {
1410 return PyDict_Keys(_csvstate_global->dialects);
1411 }
1412
1413 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1414 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1415 {
1416 PyObject *name_obj, *dialect_obj = NULL;
1417 PyObject *dialect;
1418
1419 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1420 return NULL;
1421 if (!PyUnicode_Check(name_obj)) {
1422 PyErr_SetString(PyExc_TypeError,
1423 "dialect name must be a string");
1424 return NULL;
1425 }
1426 if (PyUnicode_READY(name_obj) == -1)
1427 return NULL;
1428 dialect = _call_dialect(dialect_obj, kwargs);
1429 if (dialect == NULL)
1430 return NULL;
1431 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1432 Py_DECREF(dialect);
1433 return NULL;
1434 }
1435 Py_DECREF(dialect);
1436 Py_RETURN_NONE;
1437 }
1438
1439 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1440 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1441 {
1442 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1443 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1444 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1445 }
1446 return NULL;
1447 }
1448 Py_RETURN_NONE;
1449 }
1450
1451 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1452 csv_get_dialect(PyObject *module, PyObject *name_obj)
1453 {
1454 return get_dialect_from_registry(name_obj);
1455 }
1456
1457 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1458 csv_field_size_limit(PyObject *module, PyObject *args)
1459 {
1460 PyObject *new_limit = NULL;
1461 long old_limit = _csvstate_global->field_limit;
1462
1463 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1464 return NULL;
1465 if (new_limit != NULL) {
1466 if (!PyLong_CheckExact(new_limit)) {
1467 PyErr_Format(PyExc_TypeError,
1468 "limit must be an integer");
1469 return NULL;
1470 }
1471 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1472 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1473 _csvstate_global->field_limit = old_limit;
1474 return NULL;
1475 }
1476 }
1477 return PyLong_FromLong(old_limit);
1478 }
1479
1480 /*
1481 * MODULE
1482 */
1483
1484 PyDoc_STRVAR(csv_module_doc,
1485 "CSV parsing and writing.\n"
1486 "\n"
1487 "This module provides classes that assist in the reading and writing\n"
1488 "of Comma Separated Value (CSV) files, and implements the interface\n"
1489 "described by PEP 305. Although many CSV files are simple to parse,\n"
1490 "the format is not formally defined by a stable specification and\n"
1491 "is subtle enough that parsing lines of a CSV file with something\n"
1492 "like line.split(\",\") is bound to fail. The module supports three\n"
1493 "basic APIs: reading, writing, and registration of dialects.\n"
1494 "\n"
1495 "\n"
1496 "DIALECT REGISTRATION:\n"
1497 "\n"
1498 "Readers and writers support a dialect argument, which is a convenient\n"
1499 "handle on a group of settings. When the dialect argument is a string,\n"
1500 "it identifies one of the dialects previously registered with the module.\n"
1501 "If it is a class or instance, the attributes of the argument are used as\n"
1502 "the settings for the reader or writer:\n"
1503 "\n"
1504 " class excel:\n"
1505 " delimiter = ','\n"
1506 " quotechar = '\"'\n"
1507 " escapechar = None\n"
1508 " doublequote = True\n"
1509 " skipinitialspace = False\n"
1510 " lineterminator = '\\r\\n'\n"
1511 " quoting = QUOTE_MINIMAL\n"
1512 "\n"
1513 "SETTINGS:\n"
1514 "\n"
1515 " * quotechar - specifies a one-character string to use as the\n"
1516 " quoting character. It defaults to '\"'.\n"
1517 " * delimiter - specifies a one-character string to use as the\n"
1518 " field separator. It defaults to ','.\n"
1519 " * skipinitialspace - specifies how to interpret whitespace which\n"
1520 " immediately follows a delimiter. It defaults to False, which\n"
1521 " means that whitespace immediately following a delimiter is part\n"
1522 " of the following field.\n"
1523 " * lineterminator - specifies the character sequence which should\n"
1524 " terminate rows.\n"
1525 " * quoting - controls when quotes should be generated by the writer.\n"
1526 " It can take on any of the following module constants:\n"
1527 "\n"
1528 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1529 " field contains either the quotechar or the delimiter\n"
1530 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1531 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1532 " fields which do not parse as integers or floating point\n"
1533 " numbers.\n"
1534 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1535 " * escapechar - specifies a one-character string used to escape\n"
1536 " the delimiter when quoting is set to QUOTE_NONE.\n"
1537 " * doublequote - controls the handling of quotes inside fields. When\n"
1538 " True, two consecutive quotes are interpreted as one during read,\n"
1539 " and when writing, each quote character embedded in the data is\n"
1540 " written as two quotes\n");
1541
1542 PyDoc_STRVAR(csv_reader_doc,
1543 " csv_reader = reader(iterable [, dialect='excel']\n"
1544 " [optional keyword args])\n"
1545 " for row in csv_reader:\n"
1546 " process(row)\n"
1547 "\n"
1548 "The \"iterable\" argument can be any object that returns a line\n"
1549 "of input for each iteration, such as a file object or a list. The\n"
1550 "optional \"dialect\" parameter is discussed below. The function\n"
1551 "also accepts optional keyword arguments which override settings\n"
1552 "provided by the dialect.\n"
1553 "\n"
1554 "The returned object is an iterator. Each iteration returns a row\n"
1555 "of the CSV file (which can span multiple input lines).\n");
1556
1557 PyDoc_STRVAR(csv_writer_doc,
1558 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1559 " [optional keyword args])\n"
1560 " for row in sequence:\n"
1561 " csv_writer.writerow(row)\n"
1562 "\n"
1563 " [or]\n"
1564 "\n"
1565 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1566 " [optional keyword args])\n"
1567 " csv_writer.writerows(rows)\n"
1568 "\n"
1569 "The \"fileobj\" argument can be any object that supports the file API.\n");
1570
1571 PyDoc_STRVAR(csv_list_dialects_doc,
1572 "Return a list of all know dialect names.\n"
1573 " names = csv.list_dialects()");
1574
1575 PyDoc_STRVAR(csv_get_dialect_doc,
1576 "Return the dialect instance associated with name.\n"
1577 " dialect = csv.get_dialect(name)");
1578
1579 PyDoc_STRVAR(csv_register_dialect_doc,
1580 "Create a mapping from a string name to a dialect class.\n"
1581 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1582
1583 PyDoc_STRVAR(csv_unregister_dialect_doc,
1584 "Delete the name/dialect mapping associated with a string name.\n"
1585 " csv.unregister_dialect(name)");
1586
1587 PyDoc_STRVAR(csv_field_size_limit_doc,
1588 "Sets an upper limit on parsed fields.\n"
1589 " csv.field_size_limit([limit])\n"
1590 "\n"
1591 "Returns old limit. If limit is not given, no new limit is set and\n"
1592 "the old limit is returned");
1593
1594 static struct PyMethodDef csv_methods[] = {
1595 { "reader", (PyCFunction)(void(*)(void))csv_reader,
1596 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1597 { "writer", (PyCFunction)(void(*)(void))csv_writer,
1598 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1599 { "list_dialects", (PyCFunction)csv_list_dialects,
1600 METH_NOARGS, csv_list_dialects_doc},
1601 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
1602 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1603 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1604 METH_O, csv_unregister_dialect_doc},
1605 { "get_dialect", (PyCFunction)csv_get_dialect,
1606 METH_O, csv_get_dialect_doc},
1607 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1608 METH_VARARGS, csv_field_size_limit_doc},
1609 { NULL, NULL }
1610 };
1611
1612 static struct PyModuleDef _csvmodule = {
1613 PyModuleDef_HEAD_INIT,
1614 "_csv",
1615 csv_module_doc,
1616 sizeof(_csvstate),
1617 csv_methods,
1618 NULL,
1619 _csv_traverse,
1620 _csv_clear,
1621 _csv_free
1622 };
1623
1624 PyMODINIT_FUNC
PyInit__csv(void)1625 PyInit__csv(void)
1626 {
1627 PyObject *module;
1628 const StyleDesc *style;
1629
1630 if (PyType_Ready(&Dialect_Type) < 0)
1631 return NULL;
1632
1633 if (PyType_Ready(&Reader_Type) < 0)
1634 return NULL;
1635
1636 if (PyType_Ready(&Writer_Type) < 0)
1637 return NULL;
1638
1639 /* Create the module and add the functions */
1640 module = PyModule_Create(&_csvmodule);
1641 if (module == NULL)
1642 return NULL;
1643
1644 /* Add version to the module. */
1645 if (PyModule_AddStringConstant(module, "__version__",
1646 MODULE_VERSION) == -1)
1647 return NULL;
1648
1649 /* Set the field limit */
1650 _csvstate(module)->field_limit = 128 * 1024;
1651 /* Do I still need to add this var to the Module Dict? */
1652
1653 /* Add _dialects dictionary */
1654 _csvstate(module)->dialects = PyDict_New();
1655 if (_csvstate(module)->dialects == NULL)
1656 return NULL;
1657 Py_INCREF(_csvstate(module)->dialects);
1658 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1659 return NULL;
1660
1661 /* Add quote styles into dictionary */
1662 for (style = quote_styles; style->name; style++) {
1663 if (PyModule_AddIntConstant(module, style->name,
1664 style->style) == -1)
1665 return NULL;
1666 }
1667
1668 /* Add the Dialect type */
1669 Py_INCREF(&Dialect_Type);
1670 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1671 return NULL;
1672
1673 /* Add the CSV exception object to the module. */
1674 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1675 if (_csvstate(module)->error_obj == NULL)
1676 return NULL;
1677 Py_INCREF(_csvstate(module)->error_obj);
1678 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1679 return module;
1680 }
1681