1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h" // PyMemberDef
15 #include <stdbool.h>
16
17
18 typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22 } _csvstate;
23
24 static inline _csvstate*
get_csv_state(PyObject * module)25 get_csv_state(PyObject *module)
26 {
27 void *state = PyModule_GetState(module);
28 assert(state != NULL);
29 return (_csvstate *)state;
30 }
31
32 static int
_csv_clear(PyObject * m)33 _csv_clear(PyObject *m)
34 {
35 Py_CLEAR(get_csv_state(m)->error_obj);
36 Py_CLEAR(get_csv_state(m)->dialects);
37 return 0;
38 }
39
40 static int
_csv_traverse(PyObject * m,visitproc visit,void * arg)41 _csv_traverse(PyObject *m, visitproc visit, void *arg)
42 {
43 Py_VISIT(get_csv_state(m)->error_obj);
44 Py_VISIT(get_csv_state(m)->dialects);
45 return 0;
46 }
47
48 static void
_csv_free(void * m)49 _csv_free(void *m)
50 {
51 _csv_clear((PyObject *)m);
52 }
53
54 static struct PyModuleDef _csvmodule;
55
56 #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
57
58 typedef enum {
59 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
60 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
61 EAT_CRNL,AFTER_ESCAPED_CRNL
62 } ParserState;
63
64 typedef enum {
65 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
66 } QuoteStyle;
67
68 typedef struct {
69 QuoteStyle style;
70 const char *name;
71 } StyleDesc;
72
73 static const StyleDesc quote_styles[] = {
74 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
75 { QUOTE_ALL, "QUOTE_ALL" },
76 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
77 { QUOTE_NONE, "QUOTE_NONE" },
78 { 0 }
79 };
80
81 typedef struct {
82 PyObject_HEAD
83
84 char doublequote; /* is " represented by ""? */
85 char skipinitialspace; /* ignore spaces following delimiter? */
86 char strict; /* raise exception on bad CSV */
87 int quoting; /* style of quoting to write */
88 Py_UCS4 delimiter; /* field separator */
89 Py_UCS4 quotechar; /* quote character */
90 Py_UCS4 escapechar; /* escape character */
91 PyObject *lineterminator; /* string to write between records */
92
93 } DialectObj;
94
95 static PyTypeObject Dialect_Type;
96
97 typedef struct {
98 PyObject_HEAD
99
100 PyObject *input_iter; /* iterate over this for input lines */
101
102 DialectObj *dialect; /* parsing dialect */
103
104 PyObject *fields; /* field list for current record */
105 ParserState state; /* current CSV parse state */
106 Py_UCS4 *field; /* temporary buffer */
107 Py_ssize_t field_size; /* size of allocated buffer */
108 Py_ssize_t field_len; /* length of current field */
109 int numeric_field; /* treat field as numeric */
110 unsigned long line_num; /* Source-file line number */
111 } ReaderObj;
112
113 static PyTypeObject Reader_Type;
114
115 #define ReaderObject_Check(v) Py_IS_TYPE(v, &Reader_Type)
116
117 typedef struct {
118 PyObject_HEAD
119
120 PyObject *write; /* write output lines to this file */
121
122 DialectObj *dialect; /* parsing dialect */
123
124 Py_UCS4 *rec; /* buffer for parser.join */
125 Py_ssize_t rec_size; /* size of allocated record */
126 Py_ssize_t rec_len; /* length of record */
127 int num_fields; /* number of fields in record */
128 } WriterObj;
129
130 static PyTypeObject Writer_Type;
131
132 /*
133 * DIALECT class
134 */
135
136 static PyObject *
get_dialect_from_registry(PyObject * name_obj)137 get_dialect_from_registry(PyObject * name_obj)
138 {
139 PyObject *dialect_obj;
140
141 dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
142 if (dialect_obj == NULL) {
143 if (!PyErr_Occurred())
144 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
145 }
146 else
147 Py_INCREF(dialect_obj);
148 return dialect_obj;
149 }
150
151 static PyObject *
get_string(PyObject * str)152 get_string(PyObject *str)
153 {
154 Py_XINCREF(str);
155 return str;
156 }
157
158 static PyObject *
get_nullchar_as_None(Py_UCS4 c)159 get_nullchar_as_None(Py_UCS4 c)
160 {
161 if (c == '\0') {
162 Py_RETURN_NONE;
163 }
164 else
165 return PyUnicode_FromOrdinal(c);
166 }
167
168 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))169 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
170 {
171 return get_string(self->lineterminator);
172 }
173
174 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))175 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
176 {
177 return get_nullchar_as_None(self->delimiter);
178 }
179
180 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))181 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
182 {
183 return get_nullchar_as_None(self->escapechar);
184 }
185
186 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))187 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
188 {
189 return get_nullchar_as_None(self->quotechar);
190 }
191
192 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))193 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
194 {
195 return PyLong_FromLong(self->quoting);
196 }
197
198 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)199 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
200 {
201 if (src == NULL)
202 *target = dflt;
203 else {
204 int b = PyObject_IsTrue(src);
205 if (b < 0)
206 return -1;
207 *target = (char)b;
208 }
209 return 0;
210 }
211
212 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)213 _set_int(const char *name, int *target, PyObject *src, int dflt)
214 {
215 if (src == NULL)
216 *target = dflt;
217 else {
218 int value;
219 if (!PyLong_CheckExact(src)) {
220 PyErr_Format(PyExc_TypeError,
221 "\"%s\" must be an integer", name);
222 return -1;
223 }
224 value = _PyLong_AsInt(src);
225 if (value == -1 && PyErr_Occurred()) {
226 return -1;
227 }
228 *target = value;
229 }
230 return 0;
231 }
232
233 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)234 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
235 {
236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
241 Py_ssize_t len;
242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 Py_TYPE(src)->tp_name);
246 return -1;
247 }
248 len = PyUnicode_GetLength(src);
249 if (len > 1) {
250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be a 1-character string",
252 name);
253 return -1;
254 }
255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
256 if (len > 0)
257 *target = PyUnicode_READ_CHAR(src, 0);
258 }
259 }
260 return 0;
261 }
262
263 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)264 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265 {
266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
271 else if (!PyUnicode_Check(src)) {
272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
277 if (PyUnicode_READY(src) == -1)
278 return -1;
279 Py_INCREF(src);
280 Py_XSETREF(*target, src);
281 }
282 }
283 return 0;
284 }
285
286 static int
dialect_check_quoting(int quoting)287 dialect_check_quoting(int quoting)
288 {
289 const StyleDesc *qs;
290
291 for (qs = quote_styles; qs->name; qs++) {
292 if ((int)qs->style == quoting)
293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
297 }
298
299 #define D_OFF(x) offsetof(DialectObj, x)
300
301 static struct PyMemberDef Dialect_memberlist[] = {
302 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
303 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
304 { "strict", T_BOOL, D_OFF(strict), READONLY },
305 { NULL }
306 };
307
308 static PyGetSetDef Dialect_getsetlist[] = {
309 { "delimiter", (getter)Dialect_get_delimiter},
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
315 };
316
317 static void
Dialect_dealloc(DialectObj * self)318 Dialect_dealloc(DialectObj *self)
319 {
320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
322 }
323
324 static char *dialect_kws[] = {
325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
335 };
336
337 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)338 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339 {
340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
351
352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 "echar,
360 "ing,
361 &skipinitialspace,
362 &strict))
363 return NULL;
364
365 if (dialect != NULL) {
366 if (PyUnicode_Check(dialect)) {
367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == NULL &&
376 doublequote == NULL &&
377 escapechar == NULL &&
378 lineterminator == NULL &&
379 quotechar == NULL &&
380 quoting == NULL &&
381 skipinitialspace == NULL &&
382 strict == NULL)
383 return dialect;
384 }
385
386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
392
393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
402 #define DIALECT_GETATTR(v, n) \
403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
415
416 /* check types and convert to C values */
417 #define DIASET(meth, name, target, src, dflt) \
418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
427 DIASET(_set_bool, "strict", &self->strict, strict, false);
428
429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "\"delimiter\" must be a 1-character string");
435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
448
449 ret = (PyObject *)self;
450 Py_INCREF(self);
451 err:
452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
463 }
464
465
466 PyDoc_STRVAR(Dialect_Type_doc,
467 "CSV dialect\n"
468 "\n"
469 "The Dialect type records CSV parsing and generation options.\n");
470
471 static PyTypeObject Dialect_Type = {
472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 0, /* tp_vectorcall_offset */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 0, /* tp_as_async */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
512 };
513
514 /*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518 static PyObject *
_call_dialect(PyObject * dialect_inst,PyObject * kwargs)519 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520 {
521 PyObject *type = (PyObject *)&Dialect_Type;
522 if (dialect_inst) {
523 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
524 }
525 else {
526 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
527 }
528 }
529
530 /*
531 * READER
532 */
533 static int
parse_save_field(ReaderObj * self)534 parse_save_field(ReaderObj *self)
535 {
536 PyObject *field;
537
538 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
539 (void *) self->field, self->field_len);
540 if (field == NULL)
541 return -1;
542 self->field_len = 0;
543 if (self->numeric_field) {
544 PyObject *tmp;
545
546 self->numeric_field = 0;
547 tmp = PyNumber_Float(field);
548 Py_DECREF(field);
549 if (tmp == NULL)
550 return -1;
551 field = tmp;
552 }
553 if (PyList_Append(self->fields, field) < 0) {
554 Py_DECREF(field);
555 return -1;
556 }
557 Py_DECREF(field);
558 return 0;
559 }
560
561 static int
parse_grow_buff(ReaderObj * self)562 parse_grow_buff(ReaderObj *self)
563 {
564 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
565
566 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
567 Py_UCS4 *field_new = self->field;
568 PyMem_Resize(field_new, Py_UCS4, field_size_new);
569 if (field_new == NULL) {
570 PyErr_NoMemory();
571 return 0;
572 }
573 self->field = field_new;
574 self->field_size = field_size_new;
575 return 1;
576 }
577
578 static int
parse_add_char(ReaderObj * self,Py_UCS4 c)579 parse_add_char(ReaderObj *self, Py_UCS4 c)
580 {
581 if (self->field_len >= _csvstate_global->field_limit) {
582 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
583 _csvstate_global->field_limit);
584 return -1;
585 }
586 if (self->field_len == self->field_size && !parse_grow_buff(self))
587 return -1;
588 self->field[self->field_len++] = c;
589 return 0;
590 }
591
592 static int
parse_process_char(ReaderObj * self,Py_UCS4 c)593 parse_process_char(ReaderObj *self, Py_UCS4 c)
594 {
595 DialectObj *dialect = self->dialect;
596
597 switch (self->state) {
598 case START_RECORD:
599 /* start of record */
600 if (c == '\0')
601 /* empty line - return [] */
602 break;
603 else if (c == '\n' || c == '\r') {
604 self->state = EAT_CRNL;
605 break;
606 }
607 /* normal character - handle as START_FIELD */
608 self->state = START_FIELD;
609 /* fallthru */
610 case START_FIELD:
611 /* expecting field */
612 if (c == '\n' || c == '\r' || c == '\0') {
613 /* save empty field - return [fields] */
614 if (parse_save_field(self) < 0)
615 return -1;
616 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
617 }
618 else if (c == dialect->quotechar &&
619 dialect->quoting != QUOTE_NONE) {
620 /* start quoted field */
621 self->state = IN_QUOTED_FIELD;
622 }
623 else if (c == dialect->escapechar) {
624 /* possible escaped character */
625 self->state = ESCAPED_CHAR;
626 }
627 else if (c == ' ' && dialect->skipinitialspace)
628 /* ignore space at start of field */
629 ;
630 else if (c == dialect->delimiter) {
631 /* save empty field */
632 if (parse_save_field(self) < 0)
633 return -1;
634 }
635 else {
636 /* begin new unquoted field */
637 if (dialect->quoting == QUOTE_NONNUMERIC)
638 self->numeric_field = 1;
639 if (parse_add_char(self, c) < 0)
640 return -1;
641 self->state = IN_FIELD;
642 }
643 break;
644
645 case ESCAPED_CHAR:
646 if (c == '\n' || c=='\r') {
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = AFTER_ESCAPED_CRNL;
650 break;
651 }
652 if (c == '\0')
653 c = '\n';
654 if (parse_add_char(self, c) < 0)
655 return -1;
656 self->state = IN_FIELD;
657 break;
658
659 case AFTER_ESCAPED_CRNL:
660 if (c == '\0')
661 break;
662 /*fallthru*/
663
664 case IN_FIELD:
665 /* in unquoted field */
666 if (c == '\n' || c == '\r' || c == '\0') {
667 /* end of line - return [fields] */
668 if (parse_save_field(self) < 0)
669 return -1;
670 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
671 }
672 else if (c == dialect->escapechar) {
673 /* possible escaped character */
674 self->state = ESCAPED_CHAR;
675 }
676 else if (c == dialect->delimiter) {
677 /* save field - wait for new field */
678 if (parse_save_field(self) < 0)
679 return -1;
680 self->state = START_FIELD;
681 }
682 else {
683 /* normal character - save in field */
684 if (parse_add_char(self, c) < 0)
685 return -1;
686 }
687 break;
688
689 case IN_QUOTED_FIELD:
690 /* in quoted field */
691 if (c == '\0')
692 ;
693 else if (c == dialect->escapechar) {
694 /* Possible escape character */
695 self->state = ESCAPE_IN_QUOTED_FIELD;
696 }
697 else if (c == dialect->quotechar &&
698 dialect->quoting != QUOTE_NONE) {
699 if (dialect->doublequote) {
700 /* doublequote; " represented by "" */
701 self->state = QUOTE_IN_QUOTED_FIELD;
702 }
703 else {
704 /* end of quote part of field */
705 self->state = IN_FIELD;
706 }
707 }
708 else {
709 /* normal character - save in field */
710 if (parse_add_char(self, c) < 0)
711 return -1;
712 }
713 break;
714
715 case ESCAPE_IN_QUOTED_FIELD:
716 if (c == '\0')
717 c = '\n';
718 if (parse_add_char(self, c) < 0)
719 return -1;
720 self->state = IN_QUOTED_FIELD;
721 break;
722
723 case QUOTE_IN_QUOTED_FIELD:
724 /* doublequote - seen a quote in a quoted field */
725 if (dialect->quoting != QUOTE_NONE &&
726 c == dialect->quotechar) {
727 /* save "" as " */
728 if (parse_add_char(self, c) < 0)
729 return -1;
730 self->state = IN_QUOTED_FIELD;
731 }
732 else if (c == dialect->delimiter) {
733 /* save field - wait for new field */
734 if (parse_save_field(self) < 0)
735 return -1;
736 self->state = START_FIELD;
737 }
738 else if (c == '\n' || c == '\r' || c == '\0') {
739 /* end of line - return [fields] */
740 if (parse_save_field(self) < 0)
741 return -1;
742 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
743 }
744 else if (!dialect->strict) {
745 if (parse_add_char(self, c) < 0)
746 return -1;
747 self->state = IN_FIELD;
748 }
749 else {
750 /* illegal */
751 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
752 dialect->delimiter,
753 dialect->quotechar);
754 return -1;
755 }
756 break;
757
758 case EAT_CRNL:
759 if (c == '\n' || c == '\r')
760 ;
761 else if (c == '\0')
762 self->state = START_RECORD;
763 else {
764 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
765 return -1;
766 }
767 break;
768
769 }
770 return 0;
771 }
772
773 static int
parse_reset(ReaderObj * self)774 parse_reset(ReaderObj *self)
775 {
776 Py_XSETREF(self->fields, PyList_New(0));
777 if (self->fields == NULL)
778 return -1;
779 self->field_len = 0;
780 self->state = START_RECORD;
781 self->numeric_field = 0;
782 return 0;
783 }
784
785 static PyObject *
Reader_iternext(ReaderObj * self)786 Reader_iternext(ReaderObj *self)
787 {
788 PyObject *fields = NULL;
789 Py_UCS4 c;
790 Py_ssize_t pos, linelen;
791 unsigned int kind;
792 const void *data;
793 PyObject *lineobj;
794
795 if (parse_reset(self) < 0)
796 return NULL;
797 do {
798 lineobj = PyIter_Next(self->input_iter);
799 if (lineobj == NULL) {
800 /* End of input OR exception */
801 if (!PyErr_Occurred() && (self->field_len != 0 ||
802 self->state == IN_QUOTED_FIELD)) {
803 if (self->dialect->strict)
804 PyErr_SetString(_csvstate_global->error_obj,
805 "unexpected end of data");
806 else if (parse_save_field(self) >= 0)
807 break;
808 }
809 return NULL;
810 }
811 if (!PyUnicode_Check(lineobj)) {
812 PyErr_Format(_csvstate_global->error_obj,
813 "iterator should return strings, "
814 "not %.200s "
815 "(did you open the file in text mode?)",
816 Py_TYPE(lineobj)->tp_name
817 );
818 Py_DECREF(lineobj);
819 return NULL;
820 }
821 if (PyUnicode_READY(lineobj) == -1) {
822 Py_DECREF(lineobj);
823 return NULL;
824 }
825 ++self->line_num;
826 kind = PyUnicode_KIND(lineobj);
827 data = PyUnicode_DATA(lineobj);
828 pos = 0;
829 linelen = PyUnicode_GET_LENGTH(lineobj);
830 while (linelen--) {
831 c = PyUnicode_READ(kind, data, pos);
832 if (c == '\0') {
833 Py_DECREF(lineobj);
834 PyErr_Format(_csvstate_global->error_obj,
835 "line contains NUL");
836 goto err;
837 }
838 if (parse_process_char(self, c) < 0) {
839 Py_DECREF(lineobj);
840 goto err;
841 }
842 pos++;
843 }
844 Py_DECREF(lineobj);
845 if (parse_process_char(self, 0) < 0)
846 goto err;
847 } while (self->state != START_RECORD);
848
849 fields = self->fields;
850 self->fields = NULL;
851 err:
852 return fields;
853 }
854
855 static void
Reader_dealloc(ReaderObj * self)856 Reader_dealloc(ReaderObj *self)
857 {
858 PyObject_GC_UnTrack(self);
859 Py_XDECREF(self->dialect);
860 Py_XDECREF(self->input_iter);
861 Py_XDECREF(self->fields);
862 if (self->field != NULL)
863 PyMem_Free(self->field);
864 PyObject_GC_Del(self);
865 }
866
867 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)868 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
869 {
870 Py_VISIT(self->dialect);
871 Py_VISIT(self->input_iter);
872 Py_VISIT(self->fields);
873 return 0;
874 }
875
876 static int
Reader_clear(ReaderObj * self)877 Reader_clear(ReaderObj *self)
878 {
879 Py_CLEAR(self->dialect);
880 Py_CLEAR(self->input_iter);
881 Py_CLEAR(self->fields);
882 return 0;
883 }
884
885 PyDoc_STRVAR(Reader_Type_doc,
886 "CSV reader\n"
887 "\n"
888 "Reader objects are responsible for reading and parsing tabular data\n"
889 "in CSV format.\n"
890 );
891
892 static struct PyMethodDef Reader_methods[] = {
893 { NULL, NULL }
894 };
895 #define R_OFF(x) offsetof(ReaderObj, x)
896
897 static struct PyMemberDef Reader_memberlist[] = {
898 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
899 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
900 { NULL }
901 };
902
903
904 static PyTypeObject Reader_Type = {
905 PyVarObject_HEAD_INIT(NULL, 0)
906 "_csv.reader", /*tp_name*/
907 sizeof(ReaderObj), /*tp_basicsize*/
908 0, /*tp_itemsize*/
909 /* methods */
910 (destructor)Reader_dealloc, /*tp_dealloc*/
911 0, /*tp_vectorcall_offset*/
912 (getattrfunc)0, /*tp_getattr*/
913 (setattrfunc)0, /*tp_setattr*/
914 0, /*tp_as_async*/
915 (reprfunc)0, /*tp_repr*/
916 0, /*tp_as_number*/
917 0, /*tp_as_sequence*/
918 0, /*tp_as_mapping*/
919 (hashfunc)0, /*tp_hash*/
920 (ternaryfunc)0, /*tp_call*/
921 (reprfunc)0, /*tp_str*/
922 0, /*tp_getattro*/
923 0, /*tp_setattro*/
924 0, /*tp_as_buffer*/
925 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
926 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
927 Reader_Type_doc, /*tp_doc*/
928 (traverseproc)Reader_traverse, /*tp_traverse*/
929 (inquiry)Reader_clear, /*tp_clear*/
930 0, /*tp_richcompare*/
931 0, /*tp_weaklistoffset*/
932 PyObject_SelfIter, /*tp_iter*/
933 (getiterfunc)Reader_iternext, /*tp_iternext*/
934 Reader_methods, /*tp_methods*/
935 Reader_memberlist, /*tp_members*/
936 0, /*tp_getset*/
937
938 };
939
940 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)941 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
942 {
943 PyObject * iterator, * dialect = NULL;
944 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
945
946 if (!self)
947 return NULL;
948
949 self->dialect = NULL;
950 self->fields = NULL;
951 self->input_iter = NULL;
952 self->field = NULL;
953 self->field_size = 0;
954 self->line_num = 0;
955
956 if (parse_reset(self) < 0) {
957 Py_DECREF(self);
958 return NULL;
959 }
960
961 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
962 Py_DECREF(self);
963 return NULL;
964 }
965 self->input_iter = PyObject_GetIter(iterator);
966 if (self->input_iter == NULL) {
967 Py_DECREF(self);
968 return NULL;
969 }
970 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
971 if (self->dialect == NULL) {
972 Py_DECREF(self);
973 return NULL;
974 }
975
976 PyObject_GC_Track(self);
977 return (PyObject *)self;
978 }
979
980 /*
981 * WRITER
982 */
983 /* ---------------------------------------------------------------- */
984 static void
join_reset(WriterObj * self)985 join_reset(WriterObj *self)
986 {
987 self->rec_len = 0;
988 self->num_fields = 0;
989 }
990
991 #define MEM_INCR 32768
992
993 /* Calculate new record length or append field to record. Return new
994 * record length.
995 */
996 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)997 join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
998 Py_ssize_t field_len, int *quoted,
999 int copy_phase)
1000 {
1001 DialectObj *dialect = self->dialect;
1002 int i;
1003 Py_ssize_t rec_len;
1004
1005 #define INCLEN \
1006 do {\
1007 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1008 goto overflow; \
1009 } \
1010 rec_len++; \
1011 } while(0)
1012
1013 #define ADDCH(c) \
1014 do {\
1015 if (copy_phase) \
1016 self->rec[rec_len] = c;\
1017 INCLEN;\
1018 } while(0)
1019
1020 rec_len = self->rec_len;
1021
1022 /* If this is not the first field we need a field separator */
1023 if (self->num_fields > 0)
1024 ADDCH(dialect->delimiter);
1025
1026 /* Handle preceding quote */
1027 if (copy_phase && *quoted)
1028 ADDCH(dialect->quotechar);
1029
1030 /* Copy/count field data */
1031 /* If field is null just pass over */
1032 for (i = 0; field_data && (i < field_len); i++) {
1033 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1034 int want_escape = 0;
1035
1036 if (c == dialect->delimiter ||
1037 c == dialect->escapechar ||
1038 c == dialect->quotechar ||
1039 PyUnicode_FindChar(
1040 dialect->lineterminator, c, 0,
1041 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1042 if (dialect->quoting == QUOTE_NONE)
1043 want_escape = 1;
1044 else {
1045 if (c == dialect->quotechar) {
1046 if (dialect->doublequote)
1047 ADDCH(dialect->quotechar);
1048 else
1049 want_escape = 1;
1050 }
1051 if (!want_escape)
1052 *quoted = 1;
1053 }
1054 if (want_escape) {
1055 if (!dialect->escapechar) {
1056 PyErr_Format(_csvstate_global->error_obj,
1057 "need to escape, but no escapechar set");
1058 return -1;
1059 }
1060 ADDCH(dialect->escapechar);
1061 }
1062 }
1063 /* Copy field character into record buffer.
1064 */
1065 ADDCH(c);
1066 }
1067
1068 if (*quoted) {
1069 if (copy_phase)
1070 ADDCH(dialect->quotechar);
1071 else {
1072 INCLEN; /* starting quote */
1073 INCLEN; /* ending quote */
1074 }
1075 }
1076 return rec_len;
1077
1078 overflow:
1079 PyErr_NoMemory();
1080 return -1;
1081 #undef ADDCH
1082 #undef INCLEN
1083 }
1084
1085 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1086 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1087 {
1088 assert(rec_len >= 0);
1089
1090 if (rec_len > self->rec_size) {
1091 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1092 Py_UCS4 *rec_new = self->rec;
1093 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1094 if (rec_new == NULL) {
1095 PyErr_NoMemory();
1096 return 0;
1097 }
1098 self->rec = rec_new;
1099 self->rec_size = (Py_ssize_t)rec_size_new;
1100 }
1101 return 1;
1102 }
1103
1104 static int
join_append(WriterObj * self,PyObject * field,int quoted)1105 join_append(WriterObj *self, PyObject *field, int quoted)
1106 {
1107 unsigned int field_kind = -1;
1108 const void *field_data = NULL;
1109 Py_ssize_t field_len = 0;
1110 Py_ssize_t rec_len;
1111
1112 if (field != NULL) {
1113 if (PyUnicode_READY(field) == -1)
1114 return 0;
1115 field_kind = PyUnicode_KIND(field);
1116 field_data = PyUnicode_DATA(field);
1117 field_len = PyUnicode_GET_LENGTH(field);
1118 }
1119 rec_len = join_append_data(self, field_kind, field_data, field_len,
1120 "ed, 0);
1121 if (rec_len < 0)
1122 return 0;
1123
1124 /* grow record buffer if necessary */
1125 if (!join_check_rec_size(self, rec_len))
1126 return 0;
1127
1128 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1129 "ed, 1);
1130 self->num_fields++;
1131
1132 return 1;
1133 }
1134
1135 static int
join_append_lineterminator(WriterObj * self)1136 join_append_lineterminator(WriterObj *self)
1137 {
1138 Py_ssize_t terminator_len, i;
1139 unsigned int term_kind;
1140 const void *term_data;
1141
1142 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1143 if (terminator_len == -1)
1144 return 0;
1145
1146 /* grow record buffer if necessary */
1147 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1148 return 0;
1149
1150 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1151 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1152 for (i = 0; i < terminator_len; i++)
1153 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1154 self->rec_len += terminator_len;
1155
1156 return 1;
1157 }
1158
1159 PyDoc_STRVAR(csv_writerow_doc,
1160 "writerow(iterable)\n"
1161 "\n"
1162 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1163 "elements will be converted to string.");
1164
1165 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1166 csv_writerow(WriterObj *self, PyObject *seq)
1167 {
1168 DialectObj *dialect = self->dialect;
1169 PyObject *iter, *field, *line, *result;
1170
1171 iter = PyObject_GetIter(seq);
1172 if (iter == NULL) {
1173 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1174 PyErr_Format(_csvstate_global->error_obj,
1175 "iterable expected, not %.200s",
1176 Py_TYPE(seq)->tp_name);
1177 }
1178 return NULL;
1179 }
1180
1181 /* Join all fields in internal buffer.
1182 */
1183 join_reset(self);
1184 while ((field = PyIter_Next(iter))) {
1185 int append_ok;
1186 int quoted;
1187
1188 switch (dialect->quoting) {
1189 case QUOTE_NONNUMERIC:
1190 quoted = !PyNumber_Check(field);
1191 break;
1192 case QUOTE_ALL:
1193 quoted = 1;
1194 break;
1195 default:
1196 quoted = 0;
1197 break;
1198 }
1199
1200 if (PyUnicode_Check(field)) {
1201 append_ok = join_append(self, field, quoted);
1202 Py_DECREF(field);
1203 }
1204 else if (field == Py_None) {
1205 append_ok = join_append(self, NULL, quoted);
1206 Py_DECREF(field);
1207 }
1208 else {
1209 PyObject *str;
1210
1211 str = PyObject_Str(field);
1212 Py_DECREF(field);
1213 if (str == NULL) {
1214 Py_DECREF(iter);
1215 return NULL;
1216 }
1217 append_ok = join_append(self, str, quoted);
1218 Py_DECREF(str);
1219 }
1220 if (!append_ok) {
1221 Py_DECREF(iter);
1222 return NULL;
1223 }
1224 }
1225 Py_DECREF(iter);
1226 if (PyErr_Occurred())
1227 return NULL;
1228
1229 if (self->num_fields > 0 && self->rec_len == 0) {
1230 if (dialect->quoting == QUOTE_NONE) {
1231 PyErr_Format(_csvstate_global->error_obj,
1232 "single empty field record must be quoted");
1233 return NULL;
1234 }
1235 self->num_fields--;
1236 if (!join_append(self, NULL, 1))
1237 return NULL;
1238 }
1239
1240 /* Add line terminator.
1241 */
1242 if (!join_append_lineterminator(self)) {
1243 return NULL;
1244 }
1245
1246 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1247 (void *) self->rec, self->rec_len);
1248 if (line == NULL) {
1249 return NULL;
1250 }
1251 result = PyObject_CallOneArg(self->write, line);
1252 Py_DECREF(line);
1253 return result;
1254 }
1255
1256 PyDoc_STRVAR(csv_writerows_doc,
1257 "writerows(iterable of iterables)\n"
1258 "\n"
1259 "Construct and write a series of iterables to a csv file. Non-string\n"
1260 "elements will be converted to string.");
1261
1262 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1263 csv_writerows(WriterObj *self, PyObject *seqseq)
1264 {
1265 PyObject *row_iter, *row_obj, *result;
1266
1267 row_iter = PyObject_GetIter(seqseq);
1268 if (row_iter == NULL) {
1269 return NULL;
1270 }
1271 while ((row_obj = PyIter_Next(row_iter))) {
1272 result = csv_writerow(self, row_obj);
1273 Py_DECREF(row_obj);
1274 if (!result) {
1275 Py_DECREF(row_iter);
1276 return NULL;
1277 }
1278 else
1279 Py_DECREF(result);
1280 }
1281 Py_DECREF(row_iter);
1282 if (PyErr_Occurred())
1283 return NULL;
1284 Py_RETURN_NONE;
1285 }
1286
1287 static struct PyMethodDef Writer_methods[] = {
1288 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1289 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1290 { NULL, NULL }
1291 };
1292
1293 #define W_OFF(x) offsetof(WriterObj, x)
1294
1295 static struct PyMemberDef Writer_memberlist[] = {
1296 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1297 { NULL }
1298 };
1299
1300 static void
Writer_dealloc(WriterObj * self)1301 Writer_dealloc(WriterObj *self)
1302 {
1303 PyObject_GC_UnTrack(self);
1304 Py_XDECREF(self->dialect);
1305 Py_XDECREF(self->write);
1306 if (self->rec != NULL)
1307 PyMem_Free(self->rec);
1308 PyObject_GC_Del(self);
1309 }
1310
1311 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1312 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1313 {
1314 Py_VISIT(self->dialect);
1315 Py_VISIT(self->write);
1316 return 0;
1317 }
1318
1319 static int
Writer_clear(WriterObj * self)1320 Writer_clear(WriterObj *self)
1321 {
1322 Py_CLEAR(self->dialect);
1323 Py_CLEAR(self->write);
1324 return 0;
1325 }
1326
1327 PyDoc_STRVAR(Writer_Type_doc,
1328 "CSV writer\n"
1329 "\n"
1330 "Writer objects are responsible for generating tabular data\n"
1331 "in CSV format from sequence input.\n"
1332 );
1333
1334 static PyTypeObject Writer_Type = {
1335 PyVarObject_HEAD_INIT(NULL, 0)
1336 "_csv.writer", /*tp_name*/
1337 sizeof(WriterObj), /*tp_basicsize*/
1338 0, /*tp_itemsize*/
1339 /* methods */
1340 (destructor)Writer_dealloc, /*tp_dealloc*/
1341 0, /*tp_vectorcall_offset*/
1342 (getattrfunc)0, /*tp_getattr*/
1343 (setattrfunc)0, /*tp_setattr*/
1344 0, /*tp_as_async*/
1345 (reprfunc)0, /*tp_repr*/
1346 0, /*tp_as_number*/
1347 0, /*tp_as_sequence*/
1348 0, /*tp_as_mapping*/
1349 (hashfunc)0, /*tp_hash*/
1350 (ternaryfunc)0, /*tp_call*/
1351 (reprfunc)0, /*tp_str*/
1352 0, /*tp_getattro*/
1353 0, /*tp_setattro*/
1354 0, /*tp_as_buffer*/
1355 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1356 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1357 Writer_Type_doc,
1358 (traverseproc)Writer_traverse, /*tp_traverse*/
1359 (inquiry)Writer_clear, /*tp_clear*/
1360 0, /*tp_richcompare*/
1361 0, /*tp_weaklistoffset*/
1362 (getiterfunc)0, /*tp_iter*/
1363 (getiterfunc)0, /*tp_iternext*/
1364 Writer_methods, /*tp_methods*/
1365 Writer_memberlist, /*tp_members*/
1366 0, /*tp_getset*/
1367 };
1368
1369 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1370 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1371 {
1372 PyObject * output_file, * dialect = NULL;
1373 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1374 _Py_IDENTIFIER(write);
1375
1376 if (!self)
1377 return NULL;
1378
1379 self->dialect = NULL;
1380 self->write = NULL;
1381
1382 self->rec = NULL;
1383 self->rec_size = 0;
1384 self->rec_len = 0;
1385 self->num_fields = 0;
1386
1387 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1388 Py_DECREF(self);
1389 return NULL;
1390 }
1391 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1392 Py_DECREF(self);
1393 return NULL;
1394 }
1395 if (self->write == NULL || !PyCallable_Check(self->write)) {
1396 PyErr_SetString(PyExc_TypeError,
1397 "argument 1 must have a \"write\" method");
1398 Py_DECREF(self);
1399 return NULL;
1400 }
1401 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1402 if (self->dialect == NULL) {
1403 Py_DECREF(self);
1404 return NULL;
1405 }
1406 PyObject_GC_Track(self);
1407 return (PyObject *)self;
1408 }
1409
1410 /*
1411 * DIALECT REGISTRY
1412 */
1413 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1414 csv_list_dialects(PyObject *module, PyObject *args)
1415 {
1416 return PyDict_Keys(_csvstate_global->dialects);
1417 }
1418
1419 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1420 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1421 {
1422 PyObject *name_obj, *dialect_obj = NULL;
1423 PyObject *dialect;
1424
1425 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1426 return NULL;
1427 if (!PyUnicode_Check(name_obj)) {
1428 PyErr_SetString(PyExc_TypeError,
1429 "dialect name must be a string");
1430 return NULL;
1431 }
1432 if (PyUnicode_READY(name_obj) == -1)
1433 return NULL;
1434 dialect = _call_dialect(dialect_obj, kwargs);
1435 if (dialect == NULL)
1436 return NULL;
1437 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1438 Py_DECREF(dialect);
1439 return NULL;
1440 }
1441 Py_DECREF(dialect);
1442 Py_RETURN_NONE;
1443 }
1444
1445 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1446 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1447 {
1448 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1449 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1450 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1451 }
1452 return NULL;
1453 }
1454 Py_RETURN_NONE;
1455 }
1456
1457 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1458 csv_get_dialect(PyObject *module, PyObject *name_obj)
1459 {
1460 return get_dialect_from_registry(name_obj);
1461 }
1462
1463 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1464 csv_field_size_limit(PyObject *module, PyObject *args)
1465 {
1466 PyObject *new_limit = NULL;
1467 long old_limit = _csvstate_global->field_limit;
1468
1469 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1470 return NULL;
1471 if (new_limit != NULL) {
1472 if (!PyLong_CheckExact(new_limit)) {
1473 PyErr_Format(PyExc_TypeError,
1474 "limit must be an integer");
1475 return NULL;
1476 }
1477 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1478 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1479 _csvstate_global->field_limit = old_limit;
1480 return NULL;
1481 }
1482 }
1483 return PyLong_FromLong(old_limit);
1484 }
1485
1486 /*
1487 * MODULE
1488 */
1489
1490 PyDoc_STRVAR(csv_module_doc,
1491 "CSV parsing and writing.\n"
1492 "\n"
1493 "This module provides classes that assist in the reading and writing\n"
1494 "of Comma Separated Value (CSV) files, and implements the interface\n"
1495 "described by PEP 305. Although many CSV files are simple to parse,\n"
1496 "the format is not formally defined by a stable specification and\n"
1497 "is subtle enough that parsing lines of a CSV file with something\n"
1498 "like line.split(\",\") is bound to fail. The module supports three\n"
1499 "basic APIs: reading, writing, and registration of dialects.\n"
1500 "\n"
1501 "\n"
1502 "DIALECT REGISTRATION:\n"
1503 "\n"
1504 "Readers and writers support a dialect argument, which is a convenient\n"
1505 "handle on a group of settings. When the dialect argument is a string,\n"
1506 "it identifies one of the dialects previously registered with the module.\n"
1507 "If it is a class or instance, the attributes of the argument are used as\n"
1508 "the settings for the reader or writer:\n"
1509 "\n"
1510 " class excel:\n"
1511 " delimiter = ','\n"
1512 " quotechar = '\"'\n"
1513 " escapechar = None\n"
1514 " doublequote = True\n"
1515 " skipinitialspace = False\n"
1516 " lineterminator = '\\r\\n'\n"
1517 " quoting = QUOTE_MINIMAL\n"
1518 "\n"
1519 "SETTINGS:\n"
1520 "\n"
1521 " * quotechar - specifies a one-character string to use as the\n"
1522 " quoting character. It defaults to '\"'.\n"
1523 " * delimiter - specifies a one-character string to use as the\n"
1524 " field separator. It defaults to ','.\n"
1525 " * skipinitialspace - specifies how to interpret whitespace which\n"
1526 " immediately follows a delimiter. It defaults to False, which\n"
1527 " means that whitespace immediately following a delimiter is part\n"
1528 " of the following field.\n"
1529 " * lineterminator - specifies the character sequence which should\n"
1530 " terminate rows.\n"
1531 " * quoting - controls when quotes should be generated by the writer.\n"
1532 " It can take on any of the following module constants:\n"
1533 "\n"
1534 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1535 " field contains either the quotechar or the delimiter\n"
1536 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1537 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1538 " fields which do not parse as integers or floating point\n"
1539 " numbers.\n"
1540 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1541 " * escapechar - specifies a one-character string used to escape\n"
1542 " the delimiter when quoting is set to QUOTE_NONE.\n"
1543 " * doublequote - controls the handling of quotes inside fields. When\n"
1544 " True, two consecutive quotes are interpreted as one during read,\n"
1545 " and when writing, each quote character embedded in the data is\n"
1546 " written as two quotes\n");
1547
1548 PyDoc_STRVAR(csv_reader_doc,
1549 " csv_reader = reader(iterable [, dialect='excel']\n"
1550 " [optional keyword args])\n"
1551 " for row in csv_reader:\n"
1552 " process(row)\n"
1553 "\n"
1554 "The \"iterable\" argument can be any object that returns a line\n"
1555 "of input for each iteration, such as a file object or a list. The\n"
1556 "optional \"dialect\" parameter is discussed below. The function\n"
1557 "also accepts optional keyword arguments which override settings\n"
1558 "provided by the dialect.\n"
1559 "\n"
1560 "The returned object is an iterator. Each iteration returns a row\n"
1561 "of the CSV file (which can span multiple input lines).\n");
1562
1563 PyDoc_STRVAR(csv_writer_doc,
1564 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1565 " [optional keyword args])\n"
1566 " for row in sequence:\n"
1567 " csv_writer.writerow(row)\n"
1568 "\n"
1569 " [or]\n"
1570 "\n"
1571 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1572 " [optional keyword args])\n"
1573 " csv_writer.writerows(rows)\n"
1574 "\n"
1575 "The \"fileobj\" argument can be any object that supports the file API.\n");
1576
1577 PyDoc_STRVAR(csv_list_dialects_doc,
1578 "Return a list of all know dialect names.\n"
1579 " names = csv.list_dialects()");
1580
1581 PyDoc_STRVAR(csv_get_dialect_doc,
1582 "Return the dialect instance associated with name.\n"
1583 " dialect = csv.get_dialect(name)");
1584
1585 PyDoc_STRVAR(csv_register_dialect_doc,
1586 "Create a mapping from a string name to a dialect class.\n"
1587 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1588
1589 PyDoc_STRVAR(csv_unregister_dialect_doc,
1590 "Delete the name/dialect mapping associated with a string name.\n"
1591 " csv.unregister_dialect(name)");
1592
1593 PyDoc_STRVAR(csv_field_size_limit_doc,
1594 "Sets an upper limit on parsed fields.\n"
1595 " csv.field_size_limit([limit])\n"
1596 "\n"
1597 "Returns old limit. If limit is not given, no new limit is set and\n"
1598 "the old limit is returned");
1599
1600 static struct PyMethodDef csv_methods[] = {
1601 { "reader", (PyCFunction)(void(*)(void))csv_reader,
1602 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1603 { "writer", (PyCFunction)(void(*)(void))csv_writer,
1604 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1605 { "list_dialects", (PyCFunction)csv_list_dialects,
1606 METH_NOARGS, csv_list_dialects_doc},
1607 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
1608 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1609 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1610 METH_O, csv_unregister_dialect_doc},
1611 { "get_dialect", (PyCFunction)csv_get_dialect,
1612 METH_O, csv_get_dialect_doc},
1613 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1614 METH_VARARGS, csv_field_size_limit_doc},
1615 { NULL, NULL }
1616 };
1617
1618 static struct PyModuleDef _csvmodule = {
1619 PyModuleDef_HEAD_INIT,
1620 "_csv",
1621 csv_module_doc,
1622 sizeof(_csvstate),
1623 csv_methods,
1624 NULL,
1625 _csv_traverse,
1626 _csv_clear,
1627 _csv_free
1628 };
1629
1630 PyMODINIT_FUNC
PyInit__csv(void)1631 PyInit__csv(void)
1632 {
1633 PyObject *module;
1634 const StyleDesc *style;
1635
1636 if (PyType_Ready(&Reader_Type) < 0)
1637 return NULL;
1638
1639 if (PyType_Ready(&Writer_Type) < 0)
1640 return NULL;
1641
1642 /* Create the module and add the functions */
1643 module = PyModule_Create(&_csvmodule);
1644 if (module == NULL)
1645 return NULL;
1646
1647 /* Add version to the module. */
1648 if (PyModule_AddStringConstant(module, "__version__",
1649 MODULE_VERSION) == -1)
1650 return NULL;
1651
1652 /* Set the field limit */
1653 get_csv_state(module)->field_limit = 128 * 1024;
1654 /* Do I still need to add this var to the Module Dict? */
1655
1656 /* Add _dialects dictionary */
1657 get_csv_state(module)->dialects = PyDict_New();
1658 if (get_csv_state(module)->dialects == NULL)
1659 return NULL;
1660 Py_INCREF(get_csv_state(module)->dialects);
1661 if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects))
1662 return NULL;
1663
1664 /* Add quote styles into dictionary */
1665 for (style = quote_styles; style->name; style++) {
1666 if (PyModule_AddIntConstant(module, style->name,
1667 style->style) == -1)
1668 return NULL;
1669 }
1670
1671 if (PyModule_AddType(module, &Dialect_Type)) {
1672 return NULL;
1673 }
1674
1675 /* Add the CSV exception object to the module. */
1676 get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1677 if (get_csv_state(module)->error_obj == NULL)
1678 return NULL;
1679 Py_INCREF(get_csv_state(module)->error_obj);
1680 PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj);
1681 return module;
1682 }
1683