1 /* csv module */
2
3 /*
4
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module. Users should not use this module directly, but import the csv.py
7 module instead.
8
9 */
10
11 #define MODULE_VERSION "1.0"
12
13 #include "Python.h"
14 #include "structmember.h" // PyMemberDef
15 #include <stdbool.h>
16
17
18 typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
24 long field_limit; /* max parsed field size */
25 } _csvstate;
26
27 static struct PyModuleDef _csvmodule;
28
29 static inline _csvstate*
get_csv_state(PyObject * module)30 get_csv_state(PyObject *module)
31 {
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35 }
36
37 static int
_csv_clear(PyObject * module)38 _csv_clear(PyObject *module)
39 {
40 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
46 return 0;
47 }
48
49 static int
_csv_traverse(PyObject * module,visitproc visit,void * arg)50 _csv_traverse(PyObject *module, visitproc visit, void *arg)
51 {
52 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
58 return 0;
59 }
60
61 static void
_csv_free(void * module)62 _csv_free(void *module)
63 {
64 _csv_clear((PyObject *)module);
65 }
66
67 typedef enum {
68 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
70 EAT_CRNL,AFTER_ESCAPED_CRNL
71 } ParserState;
72
73 typedef enum {
74 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
75 } QuoteStyle;
76
77 typedef struct {
78 QuoteStyle style;
79 const char *name;
80 } StyleDesc;
81
82 static const StyleDesc quote_styles[] = {
83 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
88 };
89
90 typedef struct {
91 PyObject_HEAD
92
93 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
96 int quoting; /* style of quoting to write */
97 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
101
102 } DialectObj;
103
104 typedef struct {
105 PyObject_HEAD
106
107 PyObject *input_iter; /* iterate over this for input lines */
108
109 DialectObj *dialect; /* parsing dialect */
110
111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
113 Py_UCS4 *field; /* temporary buffer */
114 Py_ssize_t field_size; /* size of allocated buffer */
115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
118 } ReaderObj;
119
120 typedef struct {
121 PyObject_HEAD
122
123 PyObject *write; /* write output lines to this file */
124
125 DialectObj *dialect; /* parsing dialect */
126
127 Py_UCS4 *rec; /* buffer for parser.join */
128 Py_ssize_t rec_size; /* size of allocated record */
129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
131
132 PyObject *error_obj; /* cached error object */
133 } WriterObj;
134
135 /*
136 * DIALECT class
137 */
138
139 static PyObject *
get_dialect_from_registry(PyObject * name_obj,_csvstate * module_state)140 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
141 {
142 PyObject *dialect_obj;
143
144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
147 PyErr_Format(module_state->error_obj, "unknown dialect");
148 }
149 else
150 Py_INCREF(dialect_obj);
151
152 return dialect_obj;
153 }
154
155 static PyObject *
get_nullchar_as_None(Py_UCS4 c)156 get_nullchar_as_None(Py_UCS4 c)
157 {
158 if (c == '\0') {
159 Py_RETURN_NONE;
160 }
161 else
162 return PyUnicode_FromOrdinal(c);
163 }
164
165 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))166 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
167 {
168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
170 }
171
172 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))173 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
174 {
175 return get_nullchar_as_None(self->delimiter);
176 }
177
178 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))179 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
180 {
181 return get_nullchar_as_None(self->escapechar);
182 }
183
184 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))185 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
186 {
187 return get_nullchar_as_None(self->quotechar);
188 }
189
190 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))191 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
192 {
193 return PyLong_FromLong(self->quoting);
194 }
195
196 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)197 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
198 {
199 if (src == NULL)
200 *target = dflt;
201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
205 *target = (char)b;
206 }
207 return 0;
208 }
209
210 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)211 _set_int(const char *name, int *target, PyObject *src, int dflt)
212 {
213 if (src == NULL)
214 *target = dflt;
215 else {
216 int value;
217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
224 return -1;
225 }
226 *target = value;
227 }
228 return 0;
229 }
230
231 static int
_set_char_or_none(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)232 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
233 {
234 if (src == NULL) {
235 *target = dflt;
236 }
237 else {
238 *target = '\0';
239 if (src != Py_None) {
240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be string or None, not %.200s", name,
243 Py_TYPE(src)->tp_name);
244 return -1;
245 }
246 Py_ssize_t len = PyUnicode_GetLength(src);
247 if (len < 0) {
248 return -1;
249 }
250 if (len > 1) {
251 PyErr_Format(PyExc_TypeError,
252 "\"%s\" must be a 1-character string",
253 name);
254 return -1;
255 }
256 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
257 else {
258 *target = PyUnicode_READ_CHAR(src, 0);
259 }
260 }
261 }
262 return 0;
263 }
264
265 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)266 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
267 {
268 if (src == NULL) {
269 *target = dflt;
270 }
271 else {
272 *target = '\0';
273 if (!PyUnicode_Check(src)) {
274 PyErr_Format(PyExc_TypeError,
275 "\"%s\" must be string, not %.200s", name,
276 Py_TYPE(src)->tp_name);
277 return -1;
278 }
279 Py_ssize_t len = PyUnicode_GetLength(src);
280 if (len < 0) {
281 return -1;
282 }
283 if (len > 1) {
284 PyErr_Format(PyExc_TypeError,
285 "\"%s\" must be a 1-character string",
286 name);
287 return -1;
288 }
289 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
290 else {
291 *target = PyUnicode_READ_CHAR(src, 0);
292 }
293 }
294 return 0;
295 }
296
297 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)298 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
299 {
300 if (src == NULL)
301 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
302 else {
303 if (src == Py_None)
304 *target = NULL;
305 else if (!PyUnicode_Check(src)) {
306 PyErr_Format(PyExc_TypeError,
307 "\"%s\" must be a string", name);
308 return -1;
309 }
310 else {
311 if (PyUnicode_READY(src) == -1)
312 return -1;
313 Py_INCREF(src);
314 Py_XSETREF(*target, src);
315 }
316 }
317 return 0;
318 }
319
320 static int
dialect_check_quoting(int quoting)321 dialect_check_quoting(int quoting)
322 {
323 const StyleDesc *qs;
324
325 for (qs = quote_styles; qs->name; qs++) {
326 if ((int)qs->style == quoting)
327 return 0;
328 }
329 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
330 return -1;
331 }
332
333 #define D_OFF(x) offsetof(DialectObj, x)
334
335 static struct PyMemberDef Dialect_memberlist[] = {
336 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
337 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
338 { "strict", T_BOOL, D_OFF(strict), READONLY },
339 { NULL }
340 };
341
342 static PyGetSetDef Dialect_getsetlist[] = {
343 { "delimiter", (getter)Dialect_get_delimiter},
344 { "escapechar", (getter)Dialect_get_escapechar},
345 { "lineterminator", (getter)Dialect_get_lineterminator},
346 { "quotechar", (getter)Dialect_get_quotechar},
347 { "quoting", (getter)Dialect_get_quoting},
348 {NULL},
349 };
350
351 static void
Dialect_dealloc(DialectObj * self)352 Dialect_dealloc(DialectObj *self)
353 {
354 PyTypeObject *tp = Py_TYPE(self);
355 PyObject_GC_UnTrack(self);
356 tp->tp_clear((PyObject *)self);
357 PyObject_GC_Del(self);
358 Py_DECREF(tp);
359 }
360
361 static char *dialect_kws[] = {
362 "dialect",
363 "delimiter",
364 "doublequote",
365 "escapechar",
366 "lineterminator",
367 "quotechar",
368 "quoting",
369 "skipinitialspace",
370 "strict",
371 NULL
372 };
373
374 static _csvstate *
_csv_state_from_type(PyTypeObject * type,const char * name)375 _csv_state_from_type(PyTypeObject *type, const char *name)
376 {
377 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
378 if (module == NULL) {
379 return NULL;
380 }
381 _csvstate *module_state = PyModule_GetState(module);
382 if (module_state == NULL) {
383 PyErr_Format(PyExc_SystemError,
384 "%s: No _csv module state found", name);
385 return NULL;
386 }
387 return module_state;
388 }
389
390 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)391 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
392 {
393 DialectObj *self;
394 PyObject *ret = NULL;
395 PyObject *dialect = NULL;
396 PyObject *delimiter = NULL;
397 PyObject *doublequote = NULL;
398 PyObject *escapechar = NULL;
399 PyObject *lineterminator = NULL;
400 PyObject *quotechar = NULL;
401 PyObject *quoting = NULL;
402 PyObject *skipinitialspace = NULL;
403 PyObject *strict = NULL;
404
405 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
406 "|OOOOOOOOO", dialect_kws,
407 &dialect,
408 &delimiter,
409 &doublequote,
410 &escapechar,
411 &lineterminator,
412 "echar,
413 "ing,
414 &skipinitialspace,
415 &strict))
416 return NULL;
417
418 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
419 if (module_state == NULL) {
420 return NULL;
421 }
422
423 if (dialect != NULL) {
424 if (PyUnicode_Check(dialect)) {
425 dialect = get_dialect_from_registry(dialect, module_state);
426 if (dialect == NULL)
427 return NULL;
428 }
429 else
430 Py_INCREF(dialect);
431 /* Can we reuse this instance? */
432 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
433 delimiter == NULL &&
434 doublequote == NULL &&
435 escapechar == NULL &&
436 lineterminator == NULL &&
437 quotechar == NULL &&
438 quoting == NULL &&
439 skipinitialspace == NULL &&
440 strict == NULL)
441 return dialect;
442 }
443
444 self = (DialectObj *)type->tp_alloc(type, 0);
445 if (self == NULL) {
446 Py_CLEAR(dialect);
447 return NULL;
448 }
449 self->lineterminator = NULL;
450
451 Py_XINCREF(delimiter);
452 Py_XINCREF(doublequote);
453 Py_XINCREF(escapechar);
454 Py_XINCREF(lineterminator);
455 Py_XINCREF(quotechar);
456 Py_XINCREF(quoting);
457 Py_XINCREF(skipinitialspace);
458 Py_XINCREF(strict);
459 if (dialect != NULL) {
460 #define DIALECT_GETATTR(v, n) \
461 do { \
462 if (v == NULL) { \
463 v = PyObject_GetAttrString(dialect, n); \
464 if (v == NULL) \
465 PyErr_Clear(); \
466 } \
467 } while (0)
468 DIALECT_GETATTR(delimiter, "delimiter");
469 DIALECT_GETATTR(doublequote, "doublequote");
470 DIALECT_GETATTR(escapechar, "escapechar");
471 DIALECT_GETATTR(lineterminator, "lineterminator");
472 DIALECT_GETATTR(quotechar, "quotechar");
473 DIALECT_GETATTR(quoting, "quoting");
474 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
475 DIALECT_GETATTR(strict, "strict");
476 }
477
478 /* check types and convert to C values */
479 #define DIASET(meth, name, target, src, dflt) \
480 if (meth(name, target, src, dflt)) \
481 goto err
482 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
483 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
484 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, 0);
485 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
486 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
487 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
488 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
489 DIASET(_set_bool, "strict", &self->strict, strict, false);
490
491 /* validate options */
492 if (dialect_check_quoting(self->quoting))
493 goto err;
494 if (self->delimiter == 0) {
495 PyErr_SetString(PyExc_TypeError,
496 "\"delimiter\" must be a 1-character string");
497 goto err;
498 }
499 if (quotechar == Py_None && quoting == NULL)
500 self->quoting = QUOTE_NONE;
501 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
502 PyErr_SetString(PyExc_TypeError,
503 "quotechar must be set if quoting enabled");
504 goto err;
505 }
506 if (self->lineterminator == 0) {
507 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
508 goto err;
509 }
510
511 ret = (PyObject *)self;
512 Py_INCREF(self);
513 err:
514 Py_CLEAR(self);
515 Py_CLEAR(dialect);
516 Py_CLEAR(delimiter);
517 Py_CLEAR(doublequote);
518 Py_CLEAR(escapechar);
519 Py_CLEAR(lineterminator);
520 Py_CLEAR(quotechar);
521 Py_CLEAR(quoting);
522 Py_CLEAR(skipinitialspace);
523 Py_CLEAR(strict);
524 return ret;
525 }
526
527 /* Since dialect is now a heap type, it inherits pickling method for
528 * protocol 0 and 1 from object, therefore it needs to be overridden */
529
530 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
531
532 static PyObject *
Dialect_reduce(PyObject * self,PyObject * args)533 Dialect_reduce(PyObject *self, PyObject *args) {
534 PyErr_Format(PyExc_TypeError,
535 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
536 return NULL;
537 }
538
539 static struct PyMethodDef dialect_methods[] = {
540 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
541 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
542 {NULL, NULL}
543 };
544
545 PyDoc_STRVAR(Dialect_Type_doc,
546 "CSV dialect\n"
547 "\n"
548 "The Dialect type records CSV parsing and generation options.\n");
549
550 static int
Dialect_clear(DialectObj * self)551 Dialect_clear(DialectObj *self)
552 {
553 Py_CLEAR(self->lineterminator);
554 return 0;
555 }
556
557 static int
Dialect_traverse(DialectObj * self,visitproc visit,void * arg)558 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
559 {
560 Py_VISIT(self->lineterminator);
561 Py_VISIT(Py_TYPE(self));
562 return 0;
563 }
564
565 static PyType_Slot Dialect_Type_slots[] = {
566 {Py_tp_doc, (char*)Dialect_Type_doc},
567 {Py_tp_members, Dialect_memberlist},
568 {Py_tp_getset, Dialect_getsetlist},
569 {Py_tp_new, dialect_new},
570 {Py_tp_methods, dialect_methods},
571 {Py_tp_dealloc, Dialect_dealloc},
572 {Py_tp_clear, Dialect_clear},
573 {Py_tp_traverse, Dialect_traverse},
574 {0, NULL}
575 };
576
577 PyType_Spec Dialect_Type_spec = {
578 .name = "_csv.Dialect",
579 .basicsize = sizeof(DialectObj),
580 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
581 Py_TPFLAGS_IMMUTABLETYPE),
582 .slots = Dialect_Type_slots,
583 };
584
585
586 /*
587 * Return an instance of the dialect type, given a Python instance or kwarg
588 * description of the dialect
589 */
590 static PyObject *
_call_dialect(_csvstate * module_state,PyObject * dialect_inst,PyObject * kwargs)591 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
592 {
593 PyObject *type = (PyObject *)module_state->dialect_type;
594 if (dialect_inst) {
595 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
596 }
597 else {
598 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
599 }
600 }
601
602 /*
603 * READER
604 */
605 static int
parse_save_field(ReaderObj * self)606 parse_save_field(ReaderObj *self)
607 {
608 PyObject *field;
609
610 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
611 (void *) self->field, self->field_len);
612 if (field == NULL)
613 return -1;
614 self->field_len = 0;
615 if (self->numeric_field) {
616 PyObject *tmp;
617
618 self->numeric_field = 0;
619 tmp = PyNumber_Float(field);
620 Py_DECREF(field);
621 if (tmp == NULL)
622 return -1;
623 field = tmp;
624 }
625 if (PyList_Append(self->fields, field) < 0) {
626 Py_DECREF(field);
627 return -1;
628 }
629 Py_DECREF(field);
630 return 0;
631 }
632
633 static int
parse_grow_buff(ReaderObj * self)634 parse_grow_buff(ReaderObj *self)
635 {
636 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
637
638 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
639 Py_UCS4 *field_new = self->field;
640 PyMem_Resize(field_new, Py_UCS4, field_size_new);
641 if (field_new == NULL) {
642 PyErr_NoMemory();
643 return 0;
644 }
645 self->field = field_new;
646 self->field_size = field_size_new;
647 return 1;
648 }
649
650 static int
parse_add_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)651 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
652 {
653 if (self->field_len >= module_state->field_limit) {
654 PyErr_Format(module_state->error_obj,
655 "field larger than field limit (%ld)",
656 module_state->field_limit);
657 return -1;
658 }
659 if (self->field_len == self->field_size && !parse_grow_buff(self))
660 return -1;
661 self->field[self->field_len++] = c;
662 return 0;
663 }
664
665 static int
parse_process_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)666 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
667 {
668 DialectObj *dialect = self->dialect;
669
670 switch (self->state) {
671 case START_RECORD:
672 /* start of record */
673 if (c == '\0')
674 /* empty line - return [] */
675 break;
676 else if (c == '\n' || c == '\r') {
677 self->state = EAT_CRNL;
678 break;
679 }
680 /* normal character - handle as START_FIELD */
681 self->state = START_FIELD;
682 /* fallthru */
683 case START_FIELD:
684 /* expecting field */
685 if (c == '\n' || c == '\r' || c == '\0') {
686 /* save empty field - return [fields] */
687 if (parse_save_field(self) < 0)
688 return -1;
689 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
690 }
691 else if (c == dialect->quotechar &&
692 dialect->quoting != QUOTE_NONE) {
693 /* start quoted field */
694 self->state = IN_QUOTED_FIELD;
695 }
696 else if (c == dialect->escapechar) {
697 /* possible escaped character */
698 self->state = ESCAPED_CHAR;
699 }
700 else if (c == ' ' && dialect->skipinitialspace)
701 /* ignore space at start of field */
702 ;
703 else if (c == dialect->delimiter) {
704 /* save empty field */
705 if (parse_save_field(self) < 0)
706 return -1;
707 }
708 else {
709 /* begin new unquoted field */
710 if (dialect->quoting == QUOTE_NONNUMERIC)
711 self->numeric_field = 1;
712 if (parse_add_char(self, module_state, c) < 0)
713 return -1;
714 self->state = IN_FIELD;
715 }
716 break;
717
718 case ESCAPED_CHAR:
719 if (c == '\n' || c=='\r') {
720 if (parse_add_char(self, module_state, c) < 0)
721 return -1;
722 self->state = AFTER_ESCAPED_CRNL;
723 break;
724 }
725 if (c == '\0')
726 c = '\n';
727 if (parse_add_char(self, module_state, c) < 0)
728 return -1;
729 self->state = IN_FIELD;
730 break;
731
732 case AFTER_ESCAPED_CRNL:
733 if (c == '\0')
734 break;
735 /*fallthru*/
736
737 case IN_FIELD:
738 /* in unquoted field */
739 if (c == '\n' || c == '\r' || c == '\0') {
740 /* end of line - return [fields] */
741 if (parse_save_field(self) < 0)
742 return -1;
743 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
744 }
745 else if (c == dialect->escapechar) {
746 /* possible escaped character */
747 self->state = ESCAPED_CHAR;
748 }
749 else if (c == dialect->delimiter) {
750 /* save field - wait for new field */
751 if (parse_save_field(self) < 0)
752 return -1;
753 self->state = START_FIELD;
754 }
755 else {
756 /* normal character - save in field */
757 if (parse_add_char(self, module_state, c) < 0)
758 return -1;
759 }
760 break;
761
762 case IN_QUOTED_FIELD:
763 /* in quoted field */
764 if (c == '\0')
765 ;
766 else if (c == dialect->escapechar) {
767 /* Possible escape character */
768 self->state = ESCAPE_IN_QUOTED_FIELD;
769 }
770 else if (c == dialect->quotechar &&
771 dialect->quoting != QUOTE_NONE) {
772 if (dialect->doublequote) {
773 /* doublequote; " represented by "" */
774 self->state = QUOTE_IN_QUOTED_FIELD;
775 }
776 else {
777 /* end of quote part of field */
778 self->state = IN_FIELD;
779 }
780 }
781 else {
782 /* normal character - save in field */
783 if (parse_add_char(self, module_state, c) < 0)
784 return -1;
785 }
786 break;
787
788 case ESCAPE_IN_QUOTED_FIELD:
789 if (c == '\0')
790 c = '\n';
791 if (parse_add_char(self, module_state, c) < 0)
792 return -1;
793 self->state = IN_QUOTED_FIELD;
794 break;
795
796 case QUOTE_IN_QUOTED_FIELD:
797 /* doublequote - seen a quote in a quoted field */
798 if (dialect->quoting != QUOTE_NONE &&
799 c == dialect->quotechar) {
800 /* save "" as " */
801 if (parse_add_char(self, module_state, c) < 0)
802 return -1;
803 self->state = IN_QUOTED_FIELD;
804 }
805 else if (c == dialect->delimiter) {
806 /* save field - wait for new field */
807 if (parse_save_field(self) < 0)
808 return -1;
809 self->state = START_FIELD;
810 }
811 else if (c == '\n' || c == '\r' || c == '\0') {
812 /* end of line - return [fields] */
813 if (parse_save_field(self) < 0)
814 return -1;
815 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
816 }
817 else if (!dialect->strict) {
818 if (parse_add_char(self, module_state, c) < 0)
819 return -1;
820 self->state = IN_FIELD;
821 }
822 else {
823 /* illegal */
824 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
825 dialect->delimiter,
826 dialect->quotechar);
827 return -1;
828 }
829 break;
830
831 case EAT_CRNL:
832 if (c == '\n' || c == '\r')
833 ;
834 else if (c == '\0')
835 self->state = START_RECORD;
836 else {
837 PyErr_Format(module_state->error_obj,
838 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
839 return -1;
840 }
841 break;
842
843 }
844 return 0;
845 }
846
847 static int
parse_reset(ReaderObj * self)848 parse_reset(ReaderObj *self)
849 {
850 Py_XSETREF(self->fields, PyList_New(0));
851 if (self->fields == NULL)
852 return -1;
853 self->field_len = 0;
854 self->state = START_RECORD;
855 self->numeric_field = 0;
856 return 0;
857 }
858
859 static PyObject *
Reader_iternext(ReaderObj * self)860 Reader_iternext(ReaderObj *self)
861 {
862 PyObject *fields = NULL;
863 Py_UCS4 c;
864 Py_ssize_t pos, linelen;
865 unsigned int kind;
866 const void *data;
867 PyObject *lineobj;
868
869 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
870 "Reader.__next__");
871 if (module_state == NULL) {
872 return NULL;
873 }
874
875 if (parse_reset(self) < 0)
876 return NULL;
877 do {
878 lineobj = PyIter_Next(self->input_iter);
879 if (lineobj == NULL) {
880 /* End of input OR exception */
881 if (!PyErr_Occurred() && (self->field_len != 0 ||
882 self->state == IN_QUOTED_FIELD)) {
883 if (self->dialect->strict)
884 PyErr_SetString(module_state->error_obj,
885 "unexpected end of data");
886 else if (parse_save_field(self) >= 0)
887 break;
888 }
889 return NULL;
890 }
891 if (!PyUnicode_Check(lineobj)) {
892 PyErr_Format(module_state->error_obj,
893 "iterator should return strings, "
894 "not %.200s "
895 "(the file should be opened in text mode)",
896 Py_TYPE(lineobj)->tp_name
897 );
898 Py_DECREF(lineobj);
899 return NULL;
900 }
901 if (PyUnicode_READY(lineobj) == -1) {
902 Py_DECREF(lineobj);
903 return NULL;
904 }
905 ++self->line_num;
906 kind = PyUnicode_KIND(lineobj);
907 data = PyUnicode_DATA(lineobj);
908 pos = 0;
909 linelen = PyUnicode_GET_LENGTH(lineobj);
910 while (linelen--) {
911 c = PyUnicode_READ(kind, data, pos);
912 if (c == '\0') {
913 Py_DECREF(lineobj);
914 PyErr_Format(module_state->error_obj,
915 "line contains NUL");
916 goto err;
917 }
918 if (parse_process_char(self, module_state, c) < 0) {
919 Py_DECREF(lineobj);
920 goto err;
921 }
922 pos++;
923 }
924 Py_DECREF(lineobj);
925 if (parse_process_char(self, module_state, 0) < 0)
926 goto err;
927 } while (self->state != START_RECORD);
928
929 fields = self->fields;
930 self->fields = NULL;
931 err:
932 return fields;
933 }
934
935 static void
Reader_dealloc(ReaderObj * self)936 Reader_dealloc(ReaderObj *self)
937 {
938 PyTypeObject *tp = Py_TYPE(self);
939 PyObject_GC_UnTrack(self);
940 tp->tp_clear((PyObject *)self);
941 if (self->field != NULL) {
942 PyMem_Free(self->field);
943 self->field = NULL;
944 }
945 PyObject_GC_Del(self);
946 Py_DECREF(tp);
947 }
948
949 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)950 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951 {
952 Py_VISIT(self->dialect);
953 Py_VISIT(self->input_iter);
954 Py_VISIT(self->fields);
955 Py_VISIT(Py_TYPE(self));
956 return 0;
957 }
958
959 static int
Reader_clear(ReaderObj * self)960 Reader_clear(ReaderObj *self)
961 {
962 Py_CLEAR(self->dialect);
963 Py_CLEAR(self->input_iter);
964 Py_CLEAR(self->fields);
965 return 0;
966 }
967
968 PyDoc_STRVAR(Reader_Type_doc,
969 "CSV reader\n"
970 "\n"
971 "Reader objects are responsible for reading and parsing tabular data\n"
972 "in CSV format.\n"
973 );
974
975 static struct PyMethodDef Reader_methods[] = {
976 { NULL, NULL }
977 };
978 #define R_OFF(x) offsetof(ReaderObj, x)
979
980 static struct PyMemberDef Reader_memberlist[] = {
981 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983 { NULL }
984 };
985
986
987 static PyType_Slot Reader_Type_slots[] = {
988 {Py_tp_doc, (char*)Reader_Type_doc},
989 {Py_tp_traverse, Reader_traverse},
990 {Py_tp_iter, PyObject_SelfIter},
991 {Py_tp_iternext, Reader_iternext},
992 {Py_tp_methods, Reader_methods},
993 {Py_tp_members, Reader_memberlist},
994 {Py_tp_clear, Reader_clear},
995 {Py_tp_dealloc, Reader_dealloc},
996 {0, NULL}
997 };
998
999 PyType_Spec Reader_Type_spec = {
1000 .name = "_csv.reader",
1001 .basicsize = sizeof(ReaderObj),
1002 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003 Py_TPFLAGS_IMMUTABLETYPE),
1004 .slots = Reader_Type_slots
1005 };
1006
1007
1008 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)1009 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010 {
1011 PyObject * iterator, * dialect = NULL;
1012 _csvstate *module_state = get_csv_state(module);
1013 ReaderObj * self = PyObject_GC_New(
1014 ReaderObj,
1015 module_state->reader_type);
1016
1017 if (!self)
1018 return NULL;
1019
1020 self->dialect = NULL;
1021 self->fields = NULL;
1022 self->input_iter = NULL;
1023 self->field = NULL;
1024 self->field_size = 0;
1025 self->line_num = 0;
1026
1027 if (parse_reset(self) < 0) {
1028 Py_DECREF(self);
1029 return NULL;
1030 }
1031
1032 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033 Py_DECREF(self);
1034 return NULL;
1035 }
1036 self->input_iter = PyObject_GetIter(iterator);
1037 if (self->input_iter == NULL) {
1038 Py_DECREF(self);
1039 return NULL;
1040 }
1041 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042 keyword_args);
1043 if (self->dialect == NULL) {
1044 Py_DECREF(self);
1045 return NULL;
1046 }
1047
1048 PyObject_GC_Track(self);
1049 return (PyObject *)self;
1050 }
1051
1052 /*
1053 * WRITER
1054 */
1055 /* ---------------------------------------------------------------- */
1056 static void
join_reset(WriterObj * self)1057 join_reset(WriterObj *self)
1058 {
1059 self->rec_len = 0;
1060 self->num_fields = 0;
1061 }
1062
1063 #define MEM_INCR 32768
1064
1065 /* Calculate new record length or append field to record. Return new
1066 * record length.
1067 */
1068 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1069 join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
1070 Py_ssize_t field_len, int *quoted,
1071 int copy_phase)
1072 {
1073 DialectObj *dialect = self->dialect;
1074 int i;
1075 Py_ssize_t rec_len;
1076
1077 #define INCLEN \
1078 do {\
1079 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1080 goto overflow; \
1081 } \
1082 rec_len++; \
1083 } while(0)
1084
1085 #define ADDCH(c) \
1086 do {\
1087 if (copy_phase) \
1088 self->rec[rec_len] = c;\
1089 INCLEN;\
1090 } while(0)
1091
1092 rec_len = self->rec_len;
1093
1094 /* If this is not the first field we need a field separator */
1095 if (self->num_fields > 0)
1096 ADDCH(dialect->delimiter);
1097
1098 /* Handle preceding quote */
1099 if (copy_phase && *quoted)
1100 ADDCH(dialect->quotechar);
1101
1102 /* Copy/count field data */
1103 /* If field is null just pass over */
1104 for (i = 0; field_data && (i < field_len); i++) {
1105 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106 int want_escape = 0;
1107
1108 if (c == dialect->delimiter ||
1109 c == dialect->escapechar ||
1110 c == dialect->quotechar ||
1111 PyUnicode_FindChar(
1112 dialect->lineterminator, c, 0,
1113 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1114 if (dialect->quoting == QUOTE_NONE)
1115 want_escape = 1;
1116 else {
1117 if (c == dialect->quotechar) {
1118 if (dialect->doublequote)
1119 ADDCH(dialect->quotechar);
1120 else
1121 want_escape = 1;
1122 }
1123 else if (c == dialect->escapechar) {
1124 want_escape = 1;
1125 }
1126 if (!want_escape)
1127 *quoted = 1;
1128 }
1129 if (want_escape) {
1130 if (!dialect->escapechar) {
1131 PyErr_Format(self->error_obj,
1132 "need to escape, but no escapechar set");
1133 return -1;
1134 }
1135 ADDCH(dialect->escapechar);
1136 }
1137 }
1138 /* Copy field character into record buffer.
1139 */
1140 ADDCH(c);
1141 }
1142
1143 if (*quoted) {
1144 if (copy_phase)
1145 ADDCH(dialect->quotechar);
1146 else {
1147 INCLEN; /* starting quote */
1148 INCLEN; /* ending quote */
1149 }
1150 }
1151 return rec_len;
1152
1153 overflow:
1154 PyErr_NoMemory();
1155 return -1;
1156 #undef ADDCH
1157 #undef INCLEN
1158 }
1159
1160 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1161 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162 {
1163 assert(rec_len >= 0);
1164
1165 if (rec_len > self->rec_size) {
1166 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167 Py_UCS4 *rec_new = self->rec;
1168 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169 if (rec_new == NULL) {
1170 PyErr_NoMemory();
1171 return 0;
1172 }
1173 self->rec = rec_new;
1174 self->rec_size = (Py_ssize_t)rec_size_new;
1175 }
1176 return 1;
1177 }
1178
1179 static int
join_append(WriterObj * self,PyObject * field,int quoted)1180 join_append(WriterObj *self, PyObject *field, int quoted)
1181 {
1182 unsigned int field_kind = -1;
1183 const void *field_data = NULL;
1184 Py_ssize_t field_len = 0;
1185 Py_ssize_t rec_len;
1186
1187 if (field != NULL) {
1188 if (PyUnicode_READY(field) == -1)
1189 return 0;
1190 field_kind = PyUnicode_KIND(field);
1191 field_data = PyUnicode_DATA(field);
1192 field_len = PyUnicode_GET_LENGTH(field);
1193 }
1194 rec_len = join_append_data(self, field_kind, field_data, field_len,
1195 "ed, 0);
1196 if (rec_len < 0)
1197 return 0;
1198
1199 /* grow record buffer if necessary */
1200 if (!join_check_rec_size(self, rec_len))
1201 return 0;
1202
1203 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204 "ed, 1);
1205 self->num_fields++;
1206
1207 return 1;
1208 }
1209
1210 static int
join_append_lineterminator(WriterObj * self)1211 join_append_lineterminator(WriterObj *self)
1212 {
1213 Py_ssize_t terminator_len, i;
1214 unsigned int term_kind;
1215 const void *term_data;
1216
1217 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218 if (terminator_len == -1)
1219 return 0;
1220
1221 /* grow record buffer if necessary */
1222 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223 return 0;
1224
1225 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227 for (i = 0; i < terminator_len; i++)
1228 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229 self->rec_len += terminator_len;
1230
1231 return 1;
1232 }
1233
1234 PyDoc_STRVAR(csv_writerow_doc,
1235 "writerow(iterable)\n"
1236 "\n"
1237 "Construct and write a CSV record from an iterable of fields. Non-string\n"
1238 "elements will be converted to string.");
1239
1240 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1241 csv_writerow(WriterObj *self, PyObject *seq)
1242 {
1243 DialectObj *dialect = self->dialect;
1244 PyObject *iter, *field, *line, *result;
1245
1246 iter = PyObject_GetIter(seq);
1247 if (iter == NULL) {
1248 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249 PyErr_Format(self->error_obj,
1250 "iterable expected, not %.200s",
1251 Py_TYPE(seq)->tp_name);
1252 }
1253 return NULL;
1254 }
1255
1256 /* Join all fields in internal buffer.
1257 */
1258 join_reset(self);
1259 while ((field = PyIter_Next(iter))) {
1260 int append_ok;
1261 int quoted;
1262
1263 switch (dialect->quoting) {
1264 case QUOTE_NONNUMERIC:
1265 quoted = !PyNumber_Check(field);
1266 break;
1267 case QUOTE_ALL:
1268 quoted = 1;
1269 break;
1270 default:
1271 quoted = 0;
1272 break;
1273 }
1274
1275 if (PyUnicode_Check(field)) {
1276 append_ok = join_append(self, field, quoted);
1277 Py_DECREF(field);
1278 }
1279 else if (field == Py_None) {
1280 append_ok = join_append(self, NULL, quoted);
1281 Py_DECREF(field);
1282 }
1283 else {
1284 PyObject *str;
1285
1286 str = PyObject_Str(field);
1287 Py_DECREF(field);
1288 if (str == NULL) {
1289 Py_DECREF(iter);
1290 return NULL;
1291 }
1292 append_ok = join_append(self, str, quoted);
1293 Py_DECREF(str);
1294 }
1295 if (!append_ok) {
1296 Py_DECREF(iter);
1297 return NULL;
1298 }
1299 }
1300 Py_DECREF(iter);
1301 if (PyErr_Occurred())
1302 return NULL;
1303
1304 if (self->num_fields > 0 && self->rec_len == 0) {
1305 if (dialect->quoting == QUOTE_NONE) {
1306 PyErr_Format(self->error_obj,
1307 "single empty field record must be quoted");
1308 return NULL;
1309 }
1310 self->num_fields--;
1311 if (!join_append(self, NULL, 1))
1312 return NULL;
1313 }
1314
1315 /* Add line terminator.
1316 */
1317 if (!join_append_lineterminator(self)) {
1318 return NULL;
1319 }
1320
1321 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322 (void *) self->rec, self->rec_len);
1323 if (line == NULL) {
1324 return NULL;
1325 }
1326 result = PyObject_CallOneArg(self->write, line);
1327 Py_DECREF(line);
1328 return result;
1329 }
1330
1331 PyDoc_STRVAR(csv_writerows_doc,
1332 "writerows(iterable of iterables)\n"
1333 "\n"
1334 "Construct and write a series of iterables to a csv file. Non-string\n"
1335 "elements will be converted to string.");
1336
1337 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1338 csv_writerows(WriterObj *self, PyObject *seqseq)
1339 {
1340 PyObject *row_iter, *row_obj, *result;
1341
1342 row_iter = PyObject_GetIter(seqseq);
1343 if (row_iter == NULL) {
1344 return NULL;
1345 }
1346 while ((row_obj = PyIter_Next(row_iter))) {
1347 result = csv_writerow(self, row_obj);
1348 Py_DECREF(row_obj);
1349 if (!result) {
1350 Py_DECREF(row_iter);
1351 return NULL;
1352 }
1353 else
1354 Py_DECREF(result);
1355 }
1356 Py_DECREF(row_iter);
1357 if (PyErr_Occurred())
1358 return NULL;
1359 Py_RETURN_NONE;
1360 }
1361
1362 static struct PyMethodDef Writer_methods[] = {
1363 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365 { NULL, NULL }
1366 };
1367
1368 #define W_OFF(x) offsetof(WriterObj, x)
1369
1370 static struct PyMemberDef Writer_memberlist[] = {
1371 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372 { NULL }
1373 };
1374
1375 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1376 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1377 {
1378 Py_VISIT(self->dialect);
1379 Py_VISIT(self->write);
1380 Py_VISIT(self->error_obj);
1381 Py_VISIT(Py_TYPE(self));
1382 return 0;
1383 }
1384
1385 static int
Writer_clear(WriterObj * self)1386 Writer_clear(WriterObj *self)
1387 {
1388 Py_CLEAR(self->dialect);
1389 Py_CLEAR(self->write);
1390 Py_CLEAR(self->error_obj);
1391 return 0;
1392 }
1393
1394 static void
Writer_dealloc(WriterObj * self)1395 Writer_dealloc(WriterObj *self)
1396 {
1397 PyTypeObject *tp = Py_TYPE(self);
1398 PyObject_GC_UnTrack(self);
1399 tp->tp_clear((PyObject *)self);
1400 if (self->rec != NULL) {
1401 PyMem_Free(self->rec);
1402 }
1403 PyObject_GC_Del(self);
1404 Py_DECREF(tp);
1405 }
1406
1407 PyDoc_STRVAR(Writer_Type_doc,
1408 "CSV writer\n"
1409 "\n"
1410 "Writer objects are responsible for generating tabular data\n"
1411 "in CSV format from sequence input.\n"
1412 );
1413
1414 static PyType_Slot Writer_Type_slots[] = {
1415 {Py_tp_doc, (char*)Writer_Type_doc},
1416 {Py_tp_traverse, Writer_traverse},
1417 {Py_tp_clear, Writer_clear},
1418 {Py_tp_dealloc, Writer_dealloc},
1419 {Py_tp_methods, Writer_methods},
1420 {Py_tp_members, Writer_memberlist},
1421 {0, NULL}
1422 };
1423
1424 PyType_Spec Writer_Type_spec = {
1425 .name = "_csv.writer",
1426 .basicsize = sizeof(WriterObj),
1427 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1428 Py_TPFLAGS_IMMUTABLETYPE),
1429 .slots = Writer_Type_slots,
1430 };
1431
1432
1433 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1434 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1435 {
1436 PyObject * output_file, * dialect = NULL;
1437 _csvstate *module_state = get_csv_state(module);
1438 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1439 _Py_IDENTIFIER(write);
1440
1441 if (!self)
1442 return NULL;
1443
1444 self->dialect = NULL;
1445 self->write = NULL;
1446
1447 self->rec = NULL;
1448 self->rec_size = 0;
1449 self->rec_len = 0;
1450 self->num_fields = 0;
1451
1452 self->error_obj = Py_NewRef(module_state->error_obj);
1453
1454 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1455 Py_DECREF(self);
1456 return NULL;
1457 }
1458 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1459 Py_DECREF(self);
1460 return NULL;
1461 }
1462 if (self->write == NULL || !PyCallable_Check(self->write)) {
1463 PyErr_SetString(PyExc_TypeError,
1464 "argument 1 must have a \"write\" method");
1465 Py_DECREF(self);
1466 return NULL;
1467 }
1468 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1469 keyword_args);
1470 if (self->dialect == NULL) {
1471 Py_DECREF(self);
1472 return NULL;
1473 }
1474 PyObject_GC_Track(self);
1475 return (PyObject *)self;
1476 }
1477
1478 /*
1479 * DIALECT REGISTRY
1480 */
1481 static PyObject *
csv_list_dialects(PyObject * module,PyObject * args)1482 csv_list_dialects(PyObject *module, PyObject *args)
1483 {
1484 return PyDict_Keys(get_csv_state(module)->dialects);
1485 }
1486
1487 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1488 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1489 {
1490 PyObject *name_obj, *dialect_obj = NULL;
1491 _csvstate *module_state = get_csv_state(module);
1492 PyObject *dialect;
1493
1494 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1495 return NULL;
1496 if (!PyUnicode_Check(name_obj)) {
1497 PyErr_SetString(PyExc_TypeError,
1498 "dialect name must be a string");
1499 return NULL;
1500 }
1501 if (PyUnicode_READY(name_obj) == -1)
1502 return NULL;
1503 dialect = _call_dialect(module_state, dialect_obj, kwargs);
1504 if (dialect == NULL)
1505 return NULL;
1506 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1507 Py_DECREF(dialect);
1508 return NULL;
1509 }
1510 Py_DECREF(dialect);
1511 Py_RETURN_NONE;
1512 }
1513
1514 static PyObject *
csv_unregister_dialect(PyObject * module,PyObject * name_obj)1515 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1516 {
1517 _csvstate *module_state = get_csv_state(module);
1518 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
1519 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1520 PyErr_Format(module_state->error_obj, "unknown dialect");
1521 }
1522 return NULL;
1523 }
1524 Py_RETURN_NONE;
1525 }
1526
1527 static PyObject *
csv_get_dialect(PyObject * module,PyObject * name_obj)1528 csv_get_dialect(PyObject *module, PyObject *name_obj)
1529 {
1530 return get_dialect_from_registry(name_obj, get_csv_state(module));
1531 }
1532
1533 static PyObject *
csv_field_size_limit(PyObject * module,PyObject * args)1534 csv_field_size_limit(PyObject *module, PyObject *args)
1535 {
1536 PyObject *new_limit = NULL;
1537 _csvstate *module_state = get_csv_state(module);
1538 long old_limit = module_state->field_limit;
1539
1540 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1541 return NULL;
1542 if (new_limit != NULL) {
1543 if (!PyLong_CheckExact(new_limit)) {
1544 PyErr_Format(PyExc_TypeError,
1545 "limit must be an integer");
1546 return NULL;
1547 }
1548 module_state->field_limit = PyLong_AsLong(new_limit);
1549 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1550 module_state->field_limit = old_limit;
1551 return NULL;
1552 }
1553 }
1554 return PyLong_FromLong(old_limit);
1555 }
1556
1557 static PyType_Slot error_slots[] = {
1558 {0, NULL},
1559 };
1560
1561 PyType_Spec error_spec = {
1562 .name = "_csv.Error",
1563 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1564 .slots = error_slots,
1565 };
1566
1567 /*
1568 * MODULE
1569 */
1570
1571 PyDoc_STRVAR(csv_module_doc,
1572 "CSV parsing and writing.\n"
1573 "\n"
1574 "This module provides classes that assist in the reading and writing\n"
1575 "of Comma Separated Value (CSV) files, and implements the interface\n"
1576 "described by PEP 305. Although many CSV files are simple to parse,\n"
1577 "the format is not formally defined by a stable specification and\n"
1578 "is subtle enough that parsing lines of a CSV file with something\n"
1579 "like line.split(\",\") is bound to fail. The module supports three\n"
1580 "basic APIs: reading, writing, and registration of dialects.\n"
1581 "\n"
1582 "\n"
1583 "DIALECT REGISTRATION:\n"
1584 "\n"
1585 "Readers and writers support a dialect argument, which is a convenient\n"
1586 "handle on a group of settings. When the dialect argument is a string,\n"
1587 "it identifies one of the dialects previously registered with the module.\n"
1588 "If it is a class or instance, the attributes of the argument are used as\n"
1589 "the settings for the reader or writer:\n"
1590 "\n"
1591 " class excel:\n"
1592 " delimiter = ','\n"
1593 " quotechar = '\"'\n"
1594 " escapechar = None\n"
1595 " doublequote = True\n"
1596 " skipinitialspace = False\n"
1597 " lineterminator = '\\r\\n'\n"
1598 " quoting = QUOTE_MINIMAL\n"
1599 "\n"
1600 "SETTINGS:\n"
1601 "\n"
1602 " * quotechar - specifies a one-character string to use as the\n"
1603 " quoting character. It defaults to '\"'.\n"
1604 " * delimiter - specifies a one-character string to use as the\n"
1605 " field separator. It defaults to ','.\n"
1606 " * skipinitialspace - specifies how to interpret whitespace which\n"
1607 " immediately follows a delimiter. It defaults to False, which\n"
1608 " means that whitespace immediately following a delimiter is part\n"
1609 " of the following field.\n"
1610 " * lineterminator - specifies the character sequence which should\n"
1611 " terminate rows.\n"
1612 " * quoting - controls when quotes should be generated by the writer.\n"
1613 " It can take on any of the following module constants:\n"
1614 "\n"
1615 " csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1616 " field contains either the quotechar or the delimiter\n"
1617 " csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1618 " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1619 " fields which do not parse as integers or floating point\n"
1620 " numbers.\n"
1621 " csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1622 " * escapechar - specifies a one-character string used to escape\n"
1623 " the delimiter when quoting is set to QUOTE_NONE.\n"
1624 " * doublequote - controls the handling of quotes inside fields. When\n"
1625 " True, two consecutive quotes are interpreted as one during read,\n"
1626 " and when writing, each quote character embedded in the data is\n"
1627 " written as two quotes\n");
1628
1629 PyDoc_STRVAR(csv_reader_doc,
1630 " csv_reader = reader(iterable [, dialect='excel']\n"
1631 " [optional keyword args])\n"
1632 " for row in csv_reader:\n"
1633 " process(row)\n"
1634 "\n"
1635 "The \"iterable\" argument can be any object that returns a line\n"
1636 "of input for each iteration, such as a file object or a list. The\n"
1637 "optional \"dialect\" parameter is discussed below. The function\n"
1638 "also accepts optional keyword arguments which override settings\n"
1639 "provided by the dialect.\n"
1640 "\n"
1641 "The returned object is an iterator. Each iteration returns a row\n"
1642 "of the CSV file (which can span multiple input lines).\n");
1643
1644 PyDoc_STRVAR(csv_writer_doc,
1645 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1646 " [optional keyword args])\n"
1647 " for row in sequence:\n"
1648 " csv_writer.writerow(row)\n"
1649 "\n"
1650 " [or]\n"
1651 "\n"
1652 " csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1653 " [optional keyword args])\n"
1654 " csv_writer.writerows(rows)\n"
1655 "\n"
1656 "The \"fileobj\" argument can be any object that supports the file API.\n");
1657
1658 PyDoc_STRVAR(csv_list_dialects_doc,
1659 "Return a list of all know dialect names.\n"
1660 " names = csv.list_dialects()");
1661
1662 PyDoc_STRVAR(csv_get_dialect_doc,
1663 "Return the dialect instance associated with name.\n"
1664 " dialect = csv.get_dialect(name)");
1665
1666 PyDoc_STRVAR(csv_register_dialect_doc,
1667 "Create a mapping from a string name to a dialect class.\n"
1668 " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1669
1670 PyDoc_STRVAR(csv_unregister_dialect_doc,
1671 "Delete the name/dialect mapping associated with a string name.\n"
1672 " csv.unregister_dialect(name)");
1673
1674 PyDoc_STRVAR(csv_field_size_limit_doc,
1675 "Sets an upper limit on parsed fields.\n"
1676 " csv.field_size_limit([limit])\n"
1677 "\n"
1678 "Returns old limit. If limit is not given, no new limit is set and\n"
1679 "the old limit is returned");
1680
1681 static struct PyMethodDef csv_methods[] = {
1682 { "reader", (PyCFunction)(void(*)(void))csv_reader,
1683 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1684 { "writer", (PyCFunction)(void(*)(void))csv_writer,
1685 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1686 { "list_dialects", (PyCFunction)csv_list_dialects,
1687 METH_NOARGS, csv_list_dialects_doc},
1688 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
1689 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1690 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1691 METH_O, csv_unregister_dialect_doc},
1692 { "get_dialect", (PyCFunction)csv_get_dialect,
1693 METH_O, csv_get_dialect_doc},
1694 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1695 METH_VARARGS, csv_field_size_limit_doc},
1696 { NULL, NULL }
1697 };
1698
1699 static int
csv_exec(PyObject * module)1700 csv_exec(PyObject *module) {
1701 const StyleDesc *style;
1702 PyObject *temp;
1703 _csvstate *module_state = get_csv_state(module);
1704
1705 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1706 module_state->dialect_type = (PyTypeObject *)temp;
1707 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1708 return -1;
1709 }
1710
1711 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1712 module_state->reader_type = (PyTypeObject *)temp;
1713 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1714 return -1;
1715 }
1716
1717 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1718 module_state->writer_type = (PyTypeObject *)temp;
1719 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1720 return -1;
1721 }
1722
1723 /* Add version to the module. */
1724 if (PyModule_AddStringConstant(module, "__version__",
1725 MODULE_VERSION) == -1) {
1726 return -1;
1727 }
1728
1729 /* Set the field limit */
1730 module_state->field_limit = 128 * 1024;
1731
1732 /* Add _dialects dictionary */
1733 module_state->dialects = PyDict_New();
1734 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1735 return -1;
1736 }
1737
1738 /* Add quote styles into dictionary */
1739 for (style = quote_styles; style->name; style++) {
1740 if (PyModule_AddIntConstant(module, style->name,
1741 style->style) == -1)
1742 return -1;
1743 }
1744
1745 /* Add the CSV exception object to the module. */
1746 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1747 if (bases == NULL) {
1748 return -1;
1749 }
1750 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1751 bases);
1752 Py_DECREF(bases);
1753 if (module_state->error_obj == NULL) {
1754 return -1;
1755 }
1756 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1757 return -1;
1758 }
1759
1760 return 0;
1761 }
1762
1763 static PyModuleDef_Slot csv_slots[] = {
1764 {Py_mod_exec, csv_exec},
1765 {0, NULL}
1766 };
1767
1768 static struct PyModuleDef _csvmodule = {
1769 PyModuleDef_HEAD_INIT,
1770 "_csv",
1771 csv_module_doc,
1772 sizeof(_csvstate),
1773 csv_methods,
1774 csv_slots,
1775 _csv_traverse,
1776 _csv_clear,
1777 _csv_free
1778 };
1779
1780 PyMODINIT_FUNC
PyInit__csv(void)1781 PyInit__csv(void)
1782 {
1783 return PyModuleDef_Init(&_csvmodule);
1784 }
1785