1# Copyright (C) 2001,2002 Python Software Foundation 2# csv package unit tests 3 4import copy 5import sys 6import unittest 7from io import StringIO 8from tempfile import TemporaryFile 9import csv 10import gc 11import pickle 12from test import support 13from test.support import warnings_helper 14from itertools import permutations 15from textwrap import dedent 16from collections import OrderedDict 17 18 19class BadIterable: 20 def __iter__(self): 21 raise OSError 22 23 24class Test_Csv(unittest.TestCase): 25 """ 26 Test the underlying C csv parser in ways that are not appropriate 27 from the high level interface. Further tests of this nature are done 28 in TestDialectRegistry. 29 """ 30 def _test_arg_valid(self, ctor, arg): 31 self.assertRaises(TypeError, ctor) 32 self.assertRaises(TypeError, ctor, None) 33 self.assertRaises(TypeError, ctor, arg, bad_attr = 0) 34 self.assertRaises(TypeError, ctor, arg, delimiter = 0) 35 self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') 36 self.assertRaises(csv.Error, ctor, arg, 'foo') 37 self.assertRaises(TypeError, ctor, arg, delimiter=None) 38 self.assertRaises(TypeError, ctor, arg, delimiter=1) 39 self.assertRaises(TypeError, ctor, arg, quotechar=1) 40 self.assertRaises(TypeError, ctor, arg, lineterminator=None) 41 self.assertRaises(TypeError, ctor, arg, lineterminator=1) 42 self.assertRaises(TypeError, ctor, arg, quoting=None) 43 self.assertRaises(TypeError, ctor, arg, 44 quoting=csv.QUOTE_ALL, quotechar='') 45 self.assertRaises(TypeError, ctor, arg, 46 quoting=csv.QUOTE_ALL, quotechar=None) 47 48 def test_reader_arg_valid(self): 49 self._test_arg_valid(csv.reader, []) 50 self.assertRaises(OSError, csv.reader, BadIterable()) 51 52 def test_writer_arg_valid(self): 53 self._test_arg_valid(csv.writer, StringIO()) 54 class BadWriter: 55 @property 56 def write(self): 57 raise OSError 58 self.assertRaises(OSError, csv.writer, BadWriter()) 59 60 def _test_default_attrs(self, ctor, *args): 61 obj = ctor(*args) 62 # Check defaults 63 self.assertEqual(obj.dialect.delimiter, ',') 64 self.assertIs(obj.dialect.doublequote, True) 65 self.assertEqual(obj.dialect.escapechar, None) 66 self.assertEqual(obj.dialect.lineterminator, "\r\n") 67 self.assertEqual(obj.dialect.quotechar, '"') 68 self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL) 69 self.assertIs(obj.dialect.skipinitialspace, False) 70 self.assertIs(obj.dialect.strict, False) 71 # Try deleting or changing attributes (they are read-only) 72 self.assertRaises(AttributeError, delattr, obj.dialect, 'delimiter') 73 self.assertRaises(AttributeError, setattr, obj.dialect, 'delimiter', ':') 74 self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting') 75 self.assertRaises(AttributeError, setattr, obj.dialect, 76 'quoting', None) 77 78 def test_reader_attrs(self): 79 self._test_default_attrs(csv.reader, []) 80 81 def test_writer_attrs(self): 82 self._test_default_attrs(csv.writer, StringIO()) 83 84 def _test_kw_attrs(self, ctor, *args): 85 # Now try with alternate options 86 kwargs = dict(delimiter=':', doublequote=False, escapechar='\\', 87 lineterminator='\r', quotechar='*', 88 quoting=csv.QUOTE_NONE, skipinitialspace=True, 89 strict=True) 90 obj = ctor(*args, **kwargs) 91 self.assertEqual(obj.dialect.delimiter, ':') 92 self.assertIs(obj.dialect.doublequote, False) 93 self.assertEqual(obj.dialect.escapechar, '\\') 94 self.assertEqual(obj.dialect.lineterminator, "\r") 95 self.assertEqual(obj.dialect.quotechar, '*') 96 self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE) 97 self.assertIs(obj.dialect.skipinitialspace, True) 98 self.assertIs(obj.dialect.strict, True) 99 100 def test_reader_kw_attrs(self): 101 self._test_kw_attrs(csv.reader, []) 102 103 def test_writer_kw_attrs(self): 104 self._test_kw_attrs(csv.writer, StringIO()) 105 106 def _test_dialect_attrs(self, ctor, *args): 107 # Now try with dialect-derived options 108 class dialect: 109 delimiter='-' 110 doublequote=False 111 escapechar='^' 112 lineterminator='$' 113 quotechar='#' 114 quoting=csv.QUOTE_ALL 115 skipinitialspace=True 116 strict=False 117 args = args + (dialect,) 118 obj = ctor(*args) 119 self.assertEqual(obj.dialect.delimiter, '-') 120 self.assertIs(obj.dialect.doublequote, False) 121 self.assertEqual(obj.dialect.escapechar, '^') 122 self.assertEqual(obj.dialect.lineterminator, "$") 123 self.assertEqual(obj.dialect.quotechar, '#') 124 self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL) 125 self.assertIs(obj.dialect.skipinitialspace, True) 126 self.assertIs(obj.dialect.strict, False) 127 128 def test_reader_dialect_attrs(self): 129 self._test_dialect_attrs(csv.reader, []) 130 131 def test_writer_dialect_attrs(self): 132 self._test_dialect_attrs(csv.writer, StringIO()) 133 134 135 def _write_test(self, fields, expect, **kwargs): 136 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 137 writer = csv.writer(fileobj, **kwargs) 138 writer.writerow(fields) 139 fileobj.seek(0) 140 self.assertEqual(fileobj.read(), 141 expect + writer.dialect.lineterminator) 142 143 def _write_error_test(self, exc, fields, **kwargs): 144 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 145 writer = csv.writer(fileobj, **kwargs) 146 with self.assertRaises(exc): 147 writer.writerow(fields) 148 fileobj.seek(0) 149 self.assertEqual(fileobj.read(), '') 150 151 def test_write_arg_valid(self): 152 self._write_error_test(csv.Error, None) 153 self._write_test((), '') 154 self._write_test([None], '""') 155 self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE) 156 # Check that exceptions are passed up the chain 157 self._write_error_test(OSError, BadIterable()) 158 class BadList: 159 def __len__(self): 160 return 10 161 def __getitem__(self, i): 162 if i > 2: 163 raise OSError 164 self._write_error_test(OSError, BadList()) 165 class BadItem: 166 def __str__(self): 167 raise OSError 168 self._write_error_test(OSError, [BadItem()]) 169 170 def test_write_bigfield(self): 171 # This exercises the buffer realloc functionality 172 bigstring = 'X' * 50000 173 self._write_test([bigstring,bigstring], '%s,%s' % \ 174 (bigstring, bigstring)) 175 176 def test_write_quoting(self): 177 self._write_test(['a',1,'p,q'], 'a,1,"p,q"') 178 self._write_error_test(csv.Error, ['a',1,'p,q'], 179 quoting = csv.QUOTE_NONE) 180 self._write_test(['a',1,'p,q'], 'a,1,"p,q"', 181 quoting = csv.QUOTE_MINIMAL) 182 self._write_test(['a',1,'p,q'], '"a",1,"p,q"', 183 quoting = csv.QUOTE_NONNUMERIC) 184 self._write_test(['a',1,'p,q'], '"a","1","p,q"', 185 quoting = csv.QUOTE_ALL) 186 self._write_test(['a\nb',1], '"a\nb","1"', 187 quoting = csv.QUOTE_ALL) 188 189 def test_write_escape(self): 190 self._write_test(['a',1,'p,q'], 'a,1,"p,q"', 191 escapechar='\\') 192 self._write_error_test(csv.Error, ['a',1,'p,"q"'], 193 escapechar=None, doublequote=False) 194 self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', 195 escapechar='\\', doublequote = False) 196 self._write_test(['"'], '""""', 197 escapechar='\\', quoting = csv.QUOTE_MINIMAL) 198 self._write_test(['"'], '\\"', 199 escapechar='\\', quoting = csv.QUOTE_MINIMAL, 200 doublequote = False) 201 self._write_test(['"'], '\\"', 202 escapechar='\\', quoting = csv.QUOTE_NONE) 203 self._write_test(['a',1,'p,q'], 'a,1,p\\,q', 204 escapechar='\\', quoting = csv.QUOTE_NONE) 205 self._write_test(['\\', 'a'], '\\\\,a', 206 escapechar='\\', quoting=csv.QUOTE_NONE) 207 self._write_test(['\\', 'a'], '\\\\,a', 208 escapechar='\\', quoting=csv.QUOTE_MINIMAL) 209 self._write_test(['\\', 'a'], '"\\\\","a"', 210 escapechar='\\', quoting=csv.QUOTE_ALL) 211 self._write_test(['\\ ', 'a'], '\\\\ ,a', 212 escapechar='\\', quoting=csv.QUOTE_MINIMAL) 213 self._write_test(['\\,', 'a'], '\\\\\\,,a', 214 escapechar='\\', quoting=csv.QUOTE_NONE) 215 self._write_test([',\\', 'a'], '",\\\\",a', 216 escapechar='\\', quoting=csv.QUOTE_MINIMAL) 217 self._write_test(['C\\', '6', '7', 'X"'], 'C\\\\,6,7,"X"""', 218 escapechar='\\', quoting=csv.QUOTE_MINIMAL) 219 220 def test_write_iterable(self): 221 self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"') 222 self._write_test(iter(['a', 1, None]), 'a,1,') 223 self._write_test(iter([]), '') 224 self._write_test(iter([None]), '""') 225 self._write_error_test(csv.Error, iter([None]), quoting=csv.QUOTE_NONE) 226 self._write_test(iter([None, None]), ',') 227 228 def test_writerows(self): 229 class BrokenFile: 230 def write(self, buf): 231 raise OSError 232 writer = csv.writer(BrokenFile()) 233 self.assertRaises(OSError, writer.writerows, [['a']]) 234 235 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 236 writer = csv.writer(fileobj) 237 self.assertRaises(TypeError, writer.writerows, None) 238 writer.writerows([['a', 'b'], ['c', 'd']]) 239 fileobj.seek(0) 240 self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n") 241 242 def test_writerows_with_none(self): 243 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 244 writer = csv.writer(fileobj) 245 writer.writerows([['a', None], [None, 'd']]) 246 fileobj.seek(0) 247 self.assertEqual(fileobj.read(), "a,\r\n,d\r\n") 248 249 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 250 writer = csv.writer(fileobj) 251 writer.writerows([[None], ['a']]) 252 fileobj.seek(0) 253 self.assertEqual(fileobj.read(), '""\r\na\r\n') 254 255 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 256 writer = csv.writer(fileobj) 257 writer.writerows([['a'], [None]]) 258 fileobj.seek(0) 259 self.assertEqual(fileobj.read(), 'a\r\n""\r\n') 260 261 def test_writerows_errors(self): 262 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 263 writer = csv.writer(fileobj) 264 self.assertRaises(TypeError, writer.writerows, None) 265 self.assertRaises(OSError, writer.writerows, BadIterable()) 266 267 @support.cpython_only 268 @support.requires_legacy_unicode_capi 269 @warnings_helper.ignore_warnings(category=DeprecationWarning) 270 def test_writerows_legacy_strings(self): 271 import _testcapi 272 c = _testcapi.unicode_legacy_string('a') 273 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 274 writer = csv.writer(fileobj) 275 writer.writerows([[c]]) 276 fileobj.seek(0) 277 self.assertEqual(fileobj.read(), "a\r\n") 278 279 def _read_test(self, input, expect, **kwargs): 280 reader = csv.reader(input, **kwargs) 281 result = list(reader) 282 self.assertEqual(result, expect) 283 284 def test_read_oddinputs(self): 285 self._read_test([], []) 286 self._read_test([''], [[]]) 287 self.assertRaises(csv.Error, self._read_test, 288 ['"ab"c'], None, strict = 1) 289 # cannot handle null bytes for the moment 290 self.assertRaises(csv.Error, self._read_test, 291 ['ab\0c'], None, strict = 1) 292 self._read_test(['"ab"c'], [['abc']], doublequote = 0) 293 294 self.assertRaises(csv.Error, self._read_test, 295 [b'ab\0c'], None) 296 297 298 def test_read_eol(self): 299 self._read_test(['a,b'], [['a','b']]) 300 self._read_test(['a,b\n'], [['a','b']]) 301 self._read_test(['a,b\r\n'], [['a','b']]) 302 self._read_test(['a,b\r'], [['a','b']]) 303 self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], []) 304 self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], []) 305 self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], []) 306 307 def test_read_eof(self): 308 self._read_test(['a,"'], [['a', '']]) 309 self._read_test(['"a'], [['a']]) 310 self._read_test(['^'], [['\n']], escapechar='^') 311 self.assertRaises(csv.Error, self._read_test, ['a,"'], [], strict=True) 312 self.assertRaises(csv.Error, self._read_test, ['"a'], [], strict=True) 313 self.assertRaises(csv.Error, self._read_test, 314 ['^'], [], escapechar='^', strict=True) 315 316 def test_read_escape(self): 317 self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\') 318 self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\') 319 self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\') 320 self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\') 321 self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\') 322 self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\') 323 324 def test_read_quoting(self): 325 self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) 326 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], 327 quotechar=None, escapechar='\\') 328 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], 329 quoting=csv.QUOTE_NONE, escapechar='\\') 330 # will this fail where locale uses comma for decimals? 331 self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]], 332 quoting=csv.QUOTE_NONNUMERIC) 333 self._read_test(['"a\nb", 7'], [['a\nb', ' 7']]) 334 self.assertRaises(ValueError, self._read_test, 335 ['abc,3'], [[]], 336 quoting=csv.QUOTE_NONNUMERIC) 337 338 def test_read_bigfield(self): 339 # This exercises the buffer realloc functionality and field size 340 # limits. 341 limit = csv.field_size_limit() 342 try: 343 size = 50000 344 bigstring = 'X' * size 345 bigline = '%s,%s' % (bigstring, bigstring) 346 self._read_test([bigline], [[bigstring, bigstring]]) 347 csv.field_size_limit(size) 348 self._read_test([bigline], [[bigstring, bigstring]]) 349 self.assertEqual(csv.field_size_limit(), size) 350 csv.field_size_limit(size-1) 351 self.assertRaises(csv.Error, self._read_test, [bigline], []) 352 self.assertRaises(TypeError, csv.field_size_limit, None) 353 self.assertRaises(TypeError, csv.field_size_limit, 1, None) 354 finally: 355 csv.field_size_limit(limit) 356 357 def test_read_linenum(self): 358 r = csv.reader(['line,1', 'line,2', 'line,3']) 359 self.assertEqual(r.line_num, 0) 360 next(r) 361 self.assertEqual(r.line_num, 1) 362 next(r) 363 self.assertEqual(r.line_num, 2) 364 next(r) 365 self.assertEqual(r.line_num, 3) 366 self.assertRaises(StopIteration, next, r) 367 self.assertEqual(r.line_num, 3) 368 369 def test_roundtrip_quoteed_newlines(self): 370 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 371 writer = csv.writer(fileobj) 372 rows = [['a\nb','b'],['c','x\r\nd']] 373 writer.writerows(rows) 374 fileobj.seek(0) 375 for i, row in enumerate(csv.reader(fileobj)): 376 self.assertEqual(row, rows[i]) 377 378 def test_roundtrip_escaped_unquoted_newlines(self): 379 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 380 writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\") 381 rows = [['a\nb','b'],['c','x\r\nd']] 382 writer.writerows(rows) 383 fileobj.seek(0) 384 for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")): 385 self.assertEqual(row,rows[i]) 386 387class TestDialectRegistry(unittest.TestCase): 388 def test_registry_badargs(self): 389 self.assertRaises(TypeError, csv.list_dialects, None) 390 self.assertRaises(TypeError, csv.get_dialect) 391 self.assertRaises(csv.Error, csv.get_dialect, None) 392 self.assertRaises(csv.Error, csv.get_dialect, "nonesuch") 393 self.assertRaises(TypeError, csv.unregister_dialect) 394 self.assertRaises(csv.Error, csv.unregister_dialect, None) 395 self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch") 396 self.assertRaises(TypeError, csv.register_dialect, None) 397 self.assertRaises(TypeError, csv.register_dialect, None, None) 398 self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 0, 0) 399 self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 400 badargument=None) 401 self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 402 quoting=None) 403 self.assertRaises(TypeError, csv.register_dialect, []) 404 405 def test_registry(self): 406 class myexceltsv(csv.excel): 407 delimiter = "\t" 408 name = "myexceltsv" 409 expected_dialects = csv.list_dialects() + [name] 410 expected_dialects.sort() 411 csv.register_dialect(name, myexceltsv) 412 self.addCleanup(csv.unregister_dialect, name) 413 self.assertEqual(csv.get_dialect(name).delimiter, '\t') 414 got_dialects = sorted(csv.list_dialects()) 415 self.assertEqual(expected_dialects, got_dialects) 416 417 def test_register_kwargs(self): 418 name = 'fedcba' 419 csv.register_dialect(name, delimiter=';') 420 self.addCleanup(csv.unregister_dialect, name) 421 self.assertEqual(csv.get_dialect(name).delimiter, ';') 422 self.assertEqual([['X', 'Y', 'Z']], list(csv.reader(['X;Y;Z'], name))) 423 424 def test_incomplete_dialect(self): 425 class myexceltsv(csv.Dialect): 426 delimiter = "\t" 427 self.assertRaises(csv.Error, myexceltsv) 428 429 def test_space_dialect(self): 430 class space(csv.excel): 431 delimiter = " " 432 quoting = csv.QUOTE_NONE 433 escapechar = "\\" 434 435 with TemporaryFile("w+", encoding="utf-8") as fileobj: 436 fileobj.write("abc def\nc1ccccc1 benzene\n") 437 fileobj.seek(0) 438 reader = csv.reader(fileobj, dialect=space()) 439 self.assertEqual(next(reader), ["abc", "def"]) 440 self.assertEqual(next(reader), ["c1ccccc1", "benzene"]) 441 442 def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): 443 444 with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj: 445 446 writer = csv.writer(fileobj, *writeargs, **kwwriteargs) 447 writer.writerow([1,2,3]) 448 fileobj.seek(0) 449 self.assertEqual(fileobj.read(), expected) 450 451 def test_dialect_apply(self): 452 class testA(csv.excel): 453 delimiter = "\t" 454 class testB(csv.excel): 455 delimiter = ":" 456 class testC(csv.excel): 457 delimiter = "|" 458 class testUni(csv.excel): 459 delimiter = "\u039B" 460 461 class unspecified(): 462 # A class to pass as dialect but with no dialect attributes. 463 pass 464 465 csv.register_dialect('testC', testC) 466 try: 467 self.compare_dialect_123("1,2,3\r\n") 468 self.compare_dialect_123("1,2,3\r\n", dialect=None) 469 self.compare_dialect_123("1,2,3\r\n", dialect=unspecified) 470 self.compare_dialect_123("1\t2\t3\r\n", testA) 471 self.compare_dialect_123("1:2:3\r\n", dialect=testB()) 472 self.compare_dialect_123("1|2|3\r\n", dialect='testC') 473 self.compare_dialect_123("1;2;3\r\n", dialect=testA, 474 delimiter=';') 475 self.compare_dialect_123("1\u039B2\u039B3\r\n", 476 dialect=testUni) 477 478 finally: 479 csv.unregister_dialect('testC') 480 481 def test_bad_dialect(self): 482 # Unknown parameter 483 self.assertRaises(TypeError, csv.reader, [], bad_attr = 0) 484 # Bad values 485 self.assertRaises(TypeError, csv.reader, [], delimiter = None) 486 self.assertRaises(TypeError, csv.reader, [], quoting = -1) 487 self.assertRaises(TypeError, csv.reader, [], quoting = 100) 488 489 def test_copy(self): 490 for name in csv.list_dialects(): 491 dialect = csv.get_dialect(name) 492 self.assertRaises(TypeError, copy.copy, dialect) 493 494 def test_pickle(self): 495 for name in csv.list_dialects(): 496 dialect = csv.get_dialect(name) 497 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 498 self.assertRaises(TypeError, pickle.dumps, dialect, proto) 499 500class TestCsvBase(unittest.TestCase): 501 def readerAssertEqual(self, input, expected_result): 502 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 503 fileobj.write(input) 504 fileobj.seek(0) 505 reader = csv.reader(fileobj, dialect = self.dialect) 506 fields = list(reader) 507 self.assertEqual(fields, expected_result) 508 509 def writerAssertEqual(self, input, expected_result): 510 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 511 writer = csv.writer(fileobj, dialect = self.dialect) 512 writer.writerows(input) 513 fileobj.seek(0) 514 self.assertEqual(fileobj.read(), expected_result) 515 516class TestDialectExcel(TestCsvBase): 517 dialect = 'excel' 518 519 def test_single(self): 520 self.readerAssertEqual('abc', [['abc']]) 521 522 def test_simple(self): 523 self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']]) 524 525 def test_blankline(self): 526 self.readerAssertEqual('', []) 527 528 def test_empty_fields(self): 529 self.readerAssertEqual(',', [['', '']]) 530 531 def test_singlequoted(self): 532 self.readerAssertEqual('""', [['']]) 533 534 def test_singlequoted_left_empty(self): 535 self.readerAssertEqual('"",', [['','']]) 536 537 def test_singlequoted_right_empty(self): 538 self.readerAssertEqual(',""', [['','']]) 539 540 def test_single_quoted_quote(self): 541 self.readerAssertEqual('""""', [['"']]) 542 543 def test_quoted_quotes(self): 544 self.readerAssertEqual('""""""', [['""']]) 545 546 def test_inline_quote(self): 547 self.readerAssertEqual('a""b', [['a""b']]) 548 549 def test_inline_quotes(self): 550 self.readerAssertEqual('a"b"c', [['a"b"c']]) 551 552 def test_quotes_and_more(self): 553 # Excel would never write a field containing '"a"b', but when 554 # reading one, it will return 'ab'. 555 self.readerAssertEqual('"a"b', [['ab']]) 556 557 def test_lone_quote(self): 558 self.readerAssertEqual('a"b', [['a"b']]) 559 560 def test_quote_and_quote(self): 561 # Excel would never write a field containing '"a" "b"', but when 562 # reading one, it will return 'a "b"'. 563 self.readerAssertEqual('"a" "b"', [['a "b"']]) 564 565 def test_space_and_quote(self): 566 self.readerAssertEqual(' "a"', [[' "a"']]) 567 568 def test_quoted(self): 569 self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6', 570 [['1', '2', '3', 571 'I think, therefore I am', 572 '5', '6']]) 573 574 def test_quoted_quote(self): 575 self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"', 576 [['1', '2', '3', 577 '"I see," said the blind man', 578 'as he picked up his hammer and saw']]) 579 580 def test_quoted_nl(self): 581 input = '''\ 5821,2,3,"""I see,"" 583said the blind man","as he picked up his 584hammer and saw" 5859,8,7,6''' 586 self.readerAssertEqual(input, 587 [['1', '2', '3', 588 '"I see,"\nsaid the blind man', 589 'as he picked up his\nhammer and saw'], 590 ['9','8','7','6']]) 591 592 def test_dubious_quote(self): 593 self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']]) 594 595 def test_null(self): 596 self.writerAssertEqual([], '') 597 598 def test_single_writer(self): 599 self.writerAssertEqual([['abc']], 'abc\r\n') 600 601 def test_simple_writer(self): 602 self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n') 603 604 def test_quotes(self): 605 self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n') 606 607 def test_quote_fieldsep(self): 608 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') 609 610 def test_newlines(self): 611 self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n') 612 613class EscapedExcel(csv.excel): 614 quoting = csv.QUOTE_NONE 615 escapechar = '\\' 616 617class TestEscapedExcel(TestCsvBase): 618 dialect = EscapedExcel() 619 620 def test_escape_fieldsep(self): 621 self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n') 622 623 def test_read_escape_fieldsep(self): 624 self.readerAssertEqual('abc\\,def\r\n', [['abc,def']]) 625 626class TestDialectUnix(TestCsvBase): 627 dialect = 'unix' 628 629 def test_simple_writer(self): 630 self.writerAssertEqual([[1, 'abc def', 'abc']], '"1","abc def","abc"\n') 631 632 def test_simple_reader(self): 633 self.readerAssertEqual('"1","abc def","abc"\n', [['1', 'abc def', 'abc']]) 634 635class QuotedEscapedExcel(csv.excel): 636 quoting = csv.QUOTE_NONNUMERIC 637 escapechar = '\\' 638 639class TestQuotedEscapedExcel(TestCsvBase): 640 dialect = QuotedEscapedExcel() 641 642 def test_write_escape_fieldsep(self): 643 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') 644 645 def test_read_escape_fieldsep(self): 646 self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']]) 647 648class TestDictFields(unittest.TestCase): 649 ### "long" means the row is longer than the number of fieldnames 650 ### "short" means there are fewer elements in the row than fieldnames 651 def test_writeheader_return_value(self): 652 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 653 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"]) 654 writeheader_return_value = writer.writeheader() 655 self.assertEqual(writeheader_return_value, 10) 656 657 def test_write_simple_dict(self): 658 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 659 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"]) 660 writer.writeheader() 661 fileobj.seek(0) 662 self.assertEqual(fileobj.readline(), "f1,f2,f3\r\n") 663 writer.writerow({"f1": 10, "f3": "abc"}) 664 fileobj.seek(0) 665 fileobj.readline() # header 666 self.assertEqual(fileobj.read(), "10,,abc\r\n") 667 668 def test_write_multiple_dict_rows(self): 669 fileobj = StringIO() 670 writer = csv.DictWriter(fileobj, fieldnames=["f1", "f2", "f3"]) 671 writer.writeheader() 672 self.assertEqual(fileobj.getvalue(), "f1,f2,f3\r\n") 673 writer.writerows([{"f1": 1, "f2": "abc", "f3": "f"}, 674 {"f1": 2, "f2": 5, "f3": "xyz"}]) 675 self.assertEqual(fileobj.getvalue(), 676 "f1,f2,f3\r\n1,abc,f\r\n2,5,xyz\r\n") 677 678 def test_write_no_fields(self): 679 fileobj = StringIO() 680 self.assertRaises(TypeError, csv.DictWriter, fileobj) 681 682 def test_write_fields_not_in_fieldnames(self): 683 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 684 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"]) 685 # Of special note is the non-string key (issue 19449) 686 with self.assertRaises(ValueError) as cx: 687 writer.writerow({"f4": 10, "f2": "spam", 1: "abc"}) 688 exception = str(cx.exception) 689 self.assertIn("fieldnames", exception) 690 self.assertIn("'f4'", exception) 691 self.assertNotIn("'f2'", exception) 692 self.assertIn("1", exception) 693 694 def test_typo_in_extrasaction_raises_error(self): 695 fileobj = StringIO() 696 self.assertRaises(ValueError, csv.DictWriter, fileobj, ['f1', 'f2'], 697 extrasaction="raised") 698 699 def test_write_field_not_in_field_names_raise(self): 700 fileobj = StringIO() 701 writer = csv.DictWriter(fileobj, ['f1', 'f2'], extrasaction="raise") 702 dictrow = {'f0': 0, 'f1': 1, 'f2': 2, 'f3': 3} 703 self.assertRaises(ValueError, csv.DictWriter.writerow, writer, dictrow) 704 705 def test_write_field_not_in_field_names_ignore(self): 706 fileobj = StringIO() 707 writer = csv.DictWriter(fileobj, ['f1', 'f2'], extrasaction="ignore") 708 dictrow = {'f0': 0, 'f1': 1, 'f2': 2, 'f3': 3} 709 csv.DictWriter.writerow(writer, dictrow) 710 self.assertEqual(fileobj.getvalue(), "1,2\r\n") 711 712 def test_read_dict_fields(self): 713 with TemporaryFile("w+", encoding="utf-8") as fileobj: 714 fileobj.write("1,2,abc\r\n") 715 fileobj.seek(0) 716 reader = csv.DictReader(fileobj, 717 fieldnames=["f1", "f2", "f3"]) 718 self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'}) 719 720 def test_read_dict_no_fieldnames(self): 721 with TemporaryFile("w+", encoding="utf-8") as fileobj: 722 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") 723 fileobj.seek(0) 724 reader = csv.DictReader(fileobj) 725 self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'}) 726 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) 727 728 # Two test cases to make sure existing ways of implicitly setting 729 # fieldnames continue to work. Both arise from discussion in issue3436. 730 def test_read_dict_fieldnames_from_file(self): 731 with TemporaryFile("w+", encoding="utf-8") as fileobj: 732 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") 733 fileobj.seek(0) 734 reader = csv.DictReader(fileobj, 735 fieldnames=next(csv.reader(fileobj))) 736 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) 737 self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'}) 738 739 def test_read_dict_fieldnames_chain(self): 740 import itertools 741 with TemporaryFile("w+", encoding="utf-8") as fileobj: 742 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") 743 fileobj.seek(0) 744 reader = csv.DictReader(fileobj) 745 first = next(reader) 746 for row in itertools.chain([first], reader): 747 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) 748 self.assertEqual(row, {"f1": '1', "f2": '2', "f3": 'abc'}) 749 750 def test_read_long(self): 751 with TemporaryFile("w+", encoding="utf-8") as fileobj: 752 fileobj.write("1,2,abc,4,5,6\r\n") 753 fileobj.seek(0) 754 reader = csv.DictReader(fileobj, 755 fieldnames=["f1", "f2"]) 756 self.assertEqual(next(reader), {"f1": '1', "f2": '2', 757 None: ["abc", "4", "5", "6"]}) 758 759 def test_read_long_with_rest(self): 760 with TemporaryFile("w+", encoding="utf-8") as fileobj: 761 fileobj.write("1,2,abc,4,5,6\r\n") 762 fileobj.seek(0) 763 reader = csv.DictReader(fileobj, 764 fieldnames=["f1", "f2"], restkey="_rest") 765 self.assertEqual(next(reader), {"f1": '1', "f2": '2', 766 "_rest": ["abc", "4", "5", "6"]}) 767 768 def test_read_long_with_rest_no_fieldnames(self): 769 with TemporaryFile("w+", encoding="utf-8") as fileobj: 770 fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n") 771 fileobj.seek(0) 772 reader = csv.DictReader(fileobj, restkey="_rest") 773 self.assertEqual(reader.fieldnames, ["f1", "f2"]) 774 self.assertEqual(next(reader), {"f1": '1', "f2": '2', 775 "_rest": ["abc", "4", "5", "6"]}) 776 777 def test_read_short(self): 778 with TemporaryFile("w+", encoding="utf-8") as fileobj: 779 fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n") 780 fileobj.seek(0) 781 reader = csv.DictReader(fileobj, 782 fieldnames="1 2 3 4 5 6".split(), 783 restval="DEFAULT") 784 self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', 785 "4": '4', "5": '5', "6": '6'}) 786 self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', 787 "4": 'DEFAULT', "5": 'DEFAULT', 788 "6": 'DEFAULT'}) 789 790 def test_read_multi(self): 791 sample = [ 792 '2147483648,43.0e12,17,abc,def\r\n', 793 '147483648,43.0e2,17,abc,def\r\n', 794 '47483648,43.0,170,abc,def\r\n' 795 ] 796 797 reader = csv.DictReader(sample, 798 fieldnames="i1 float i2 s1 s2".split()) 799 self.assertEqual(next(reader), {"i1": '2147483648', 800 "float": '43.0e12', 801 "i2": '17', 802 "s1": 'abc', 803 "s2": 'def'}) 804 805 def test_read_with_blanks(self): 806 reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n", 807 "1,2,abc,4,5,6\r\n"], 808 fieldnames="1 2 3 4 5 6".split()) 809 self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', 810 "4": '4', "5": '5', "6": '6'}) 811 self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', 812 "4": '4', "5": '5', "6": '6'}) 813 814 def test_read_semi_sep(self): 815 reader = csv.DictReader(["1;2;abc;4;5;6\r\n"], 816 fieldnames="1 2 3 4 5 6".split(), 817 delimiter=';') 818 self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc', 819 "4": '4', "5": '5', "6": '6'}) 820 821class TestArrayWrites(unittest.TestCase): 822 def test_int_write(self): 823 import array 824 contents = [(20-i) for i in range(20)] 825 a = array.array('i', contents) 826 827 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 828 writer = csv.writer(fileobj, dialect="excel") 829 writer.writerow(a) 830 expected = ",".join([str(i) for i in a])+"\r\n" 831 fileobj.seek(0) 832 self.assertEqual(fileobj.read(), expected) 833 834 def test_double_write(self): 835 import array 836 contents = [(20-i)*0.1 for i in range(20)] 837 a = array.array('d', contents) 838 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 839 writer = csv.writer(fileobj, dialect="excel") 840 writer.writerow(a) 841 expected = ",".join([str(i) for i in a])+"\r\n" 842 fileobj.seek(0) 843 self.assertEqual(fileobj.read(), expected) 844 845 def test_float_write(self): 846 import array 847 contents = [(20-i)*0.1 for i in range(20)] 848 a = array.array('f', contents) 849 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 850 writer = csv.writer(fileobj, dialect="excel") 851 writer.writerow(a) 852 expected = ",".join([str(i) for i in a])+"\r\n" 853 fileobj.seek(0) 854 self.assertEqual(fileobj.read(), expected) 855 856 def test_char_write(self): 857 import array, string 858 a = array.array('u', string.ascii_letters) 859 860 with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: 861 writer = csv.writer(fileobj, dialect="excel") 862 writer.writerow(a) 863 expected = ",".join(a)+"\r\n" 864 fileobj.seek(0) 865 self.assertEqual(fileobj.read(), expected) 866 867class TestDialectValidity(unittest.TestCase): 868 def test_quoting(self): 869 class mydialect(csv.Dialect): 870 delimiter = ";" 871 escapechar = '\\' 872 doublequote = False 873 skipinitialspace = True 874 lineterminator = '\r\n' 875 quoting = csv.QUOTE_NONE 876 d = mydialect() 877 self.assertEqual(d.quoting, csv.QUOTE_NONE) 878 879 mydialect.quoting = None 880 self.assertRaises(csv.Error, mydialect) 881 882 mydialect.doublequote = True 883 mydialect.quoting = csv.QUOTE_ALL 884 mydialect.quotechar = '"' 885 d = mydialect() 886 self.assertEqual(d.quoting, csv.QUOTE_ALL) 887 self.assertEqual(d.quotechar, '"') 888 self.assertTrue(d.doublequote) 889 890 mydialect.quotechar = "''" 891 with self.assertRaises(csv.Error) as cm: 892 mydialect() 893 self.assertEqual(str(cm.exception), 894 '"quotechar" must be a 1-character string') 895 896 mydialect.quotechar = 4 897 with self.assertRaises(csv.Error) as cm: 898 mydialect() 899 self.assertEqual(str(cm.exception), 900 '"quotechar" must be string or None, not int') 901 902 def test_delimiter(self): 903 class mydialect(csv.Dialect): 904 delimiter = ";" 905 escapechar = '\\' 906 doublequote = False 907 skipinitialspace = True 908 lineterminator = '\r\n' 909 quoting = csv.QUOTE_NONE 910 d = mydialect() 911 self.assertEqual(d.delimiter, ";") 912 913 mydialect.delimiter = ":::" 914 with self.assertRaises(csv.Error) as cm: 915 mydialect() 916 self.assertEqual(str(cm.exception), 917 '"delimiter" must be a 1-character string') 918 919 mydialect.delimiter = "" 920 with self.assertRaises(csv.Error) as cm: 921 mydialect() 922 self.assertEqual(str(cm.exception), 923 '"delimiter" must be a 1-character string') 924 925 mydialect.delimiter = b"," 926 with self.assertRaises(csv.Error) as cm: 927 mydialect() 928 self.assertEqual(str(cm.exception), 929 '"delimiter" must be string, not bytes') 930 931 mydialect.delimiter = 4 932 with self.assertRaises(csv.Error) as cm: 933 mydialect() 934 self.assertEqual(str(cm.exception), 935 '"delimiter" must be string, not int') 936 937 mydialect.delimiter = None 938 with self.assertRaises(csv.Error) as cm: 939 mydialect() 940 self.assertEqual(str(cm.exception), 941 '"delimiter" must be string, not NoneType') 942 943 def test_escapechar(self): 944 class mydialect(csv.Dialect): 945 delimiter = ";" 946 escapechar = '\\' 947 doublequote = False 948 skipinitialspace = True 949 lineterminator = '\r\n' 950 quoting = csv.QUOTE_NONE 951 d = mydialect() 952 self.assertEqual(d.escapechar, "\\") 953 954 mydialect.escapechar = "**" 955 with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): 956 mydialect() 957 958 mydialect.escapechar = b"*" 959 with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not bytes'): 960 mydialect() 961 962 mydialect.escapechar = 4 963 with self.assertRaisesRegex(csv.Error, '"escapechar" must be string or None, not int'): 964 mydialect() 965 966 def test_lineterminator(self): 967 class mydialect(csv.Dialect): 968 delimiter = ";" 969 escapechar = '\\' 970 doublequote = False 971 skipinitialspace = True 972 lineterminator = '\r\n' 973 quoting = csv.QUOTE_NONE 974 d = mydialect() 975 self.assertEqual(d.lineterminator, '\r\n') 976 977 mydialect.lineterminator = ":::" 978 d = mydialect() 979 self.assertEqual(d.lineterminator, ":::") 980 981 mydialect.lineterminator = 4 982 with self.assertRaises(csv.Error) as cm: 983 mydialect() 984 self.assertEqual(str(cm.exception), 985 '"lineterminator" must be a string') 986 987 def test_invalid_chars(self): 988 def create_invalid(field_name, value): 989 class mydialect(csv.Dialect): 990 pass 991 setattr(mydialect, field_name, value) 992 d = mydialect() 993 994 for field_name in ("delimiter", "escapechar", "quotechar"): 995 with self.subTest(field_name=field_name): 996 self.assertRaises(csv.Error, create_invalid, field_name, "") 997 self.assertRaises(csv.Error, create_invalid, field_name, "abc") 998 self.assertRaises(csv.Error, create_invalid, field_name, b'x') 999 self.assertRaises(csv.Error, create_invalid, field_name, 5) 1000 1001 1002class TestSniffer(unittest.TestCase): 1003 sample1 = """\ 1004Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes 1005Shark City, Glendale Heights, IL, 12/28/02, Prezence 1006Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow 1007Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back 1008""" 1009 sample2 = """\ 1010'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes' 1011'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence' 1012'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 1013'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back' 1014""" 1015 header1 = '''\ 1016"venue","city","state","date","performers" 1017''' 1018 sample3 = '''\ 101905/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03 102005/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03 102105/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03 1022''' 1023 1024 sample4 = '''\ 10252147483648;43.0e12;17;abc;def 1026147483648;43.0e2;17;abc;def 102747483648;43.0;170;abc;def 1028''' 1029 1030 sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n" 1031 sample6 = "a|b|c\r\nd|e|f\r\n" 1032 sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n" 1033 1034# Issue 18155: Use a delimiter that is a special char to regex: 1035 1036 header2 = '''\ 1037"venue"+"city"+"state"+"date"+"performers" 1038''' 1039 sample8 = """\ 1040Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes 1041Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence 1042Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow 1043Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back 1044""" 1045 sample9 = """\ 1046'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes' 1047'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence' 1048'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow' 1049'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back' 1050""" 1051 1052 sample10 = dedent(""" 1053 abc,def 1054 ghijkl,mno 1055 ghi,jkl 1056 """) 1057 1058 sample11 = dedent(""" 1059 abc,def 1060 ghijkl,mnop 1061 ghi,jkl 1062 """) 1063 1064 sample12 = dedent(""""time","forces" 1065 1,1.5 1066 0.5,5+0j 1067 0,0 1068 1+1j,6 1069 """) 1070 1071 sample13 = dedent(""""time","forces" 1072 0,0 1073 1,2 1074 a,b 1075 """) 1076 1077 def test_issue43625(self): 1078 sniffer = csv.Sniffer() 1079 self.assertTrue(sniffer.has_header(self.sample12)) 1080 self.assertFalse(sniffer.has_header(self.sample13)) 1081 1082 def test_has_header_strings(self): 1083 "More to document existing (unexpected?) behavior than anything else." 1084 sniffer = csv.Sniffer() 1085 self.assertFalse(sniffer.has_header(self.sample10)) 1086 self.assertFalse(sniffer.has_header(self.sample11)) 1087 1088 def test_has_header(self): 1089 sniffer = csv.Sniffer() 1090 self.assertIs(sniffer.has_header(self.sample1), False) 1091 self.assertIs(sniffer.has_header(self.header1 + self.sample1), True) 1092 1093 def test_has_header_regex_special_delimiter(self): 1094 sniffer = csv.Sniffer() 1095 self.assertIs(sniffer.has_header(self.sample8), False) 1096 self.assertIs(sniffer.has_header(self.header2 + self.sample8), True) 1097 1098 def test_guess_quote_and_delimiter(self): 1099 sniffer = csv.Sniffer() 1100 for header in (";'123;4';", "'123;4';", ";'123;4'", "'123;4'"): 1101 with self.subTest(header): 1102 dialect = sniffer.sniff(header, ",;") 1103 self.assertEqual(dialect.delimiter, ';') 1104 self.assertEqual(dialect.quotechar, "'") 1105 self.assertIs(dialect.doublequote, False) 1106 self.assertIs(dialect.skipinitialspace, False) 1107 1108 def test_sniff(self): 1109 sniffer = csv.Sniffer() 1110 dialect = sniffer.sniff(self.sample1) 1111 self.assertEqual(dialect.delimiter, ",") 1112 self.assertEqual(dialect.quotechar, '"') 1113 self.assertIs(dialect.skipinitialspace, True) 1114 1115 dialect = sniffer.sniff(self.sample2) 1116 self.assertEqual(dialect.delimiter, ":") 1117 self.assertEqual(dialect.quotechar, "'") 1118 self.assertIs(dialect.skipinitialspace, False) 1119 1120 def test_delimiters(self): 1121 sniffer = csv.Sniffer() 1122 dialect = sniffer.sniff(self.sample3) 1123 # given that all three lines in sample3 are equal, 1124 # I think that any character could have been 'guessed' as the 1125 # delimiter, depending on dictionary order 1126 self.assertIn(dialect.delimiter, self.sample3) 1127 dialect = sniffer.sniff(self.sample3, delimiters="?,") 1128 self.assertEqual(dialect.delimiter, "?") 1129 dialect = sniffer.sniff(self.sample3, delimiters="/,") 1130 self.assertEqual(dialect.delimiter, "/") 1131 dialect = sniffer.sniff(self.sample4) 1132 self.assertEqual(dialect.delimiter, ";") 1133 dialect = sniffer.sniff(self.sample5) 1134 self.assertEqual(dialect.delimiter, "\t") 1135 dialect = sniffer.sniff(self.sample6) 1136 self.assertEqual(dialect.delimiter, "|") 1137 dialect = sniffer.sniff(self.sample7) 1138 self.assertEqual(dialect.delimiter, "|") 1139 self.assertEqual(dialect.quotechar, "'") 1140 dialect = sniffer.sniff(self.sample8) 1141 self.assertEqual(dialect.delimiter, '+') 1142 dialect = sniffer.sniff(self.sample9) 1143 self.assertEqual(dialect.delimiter, '+') 1144 self.assertEqual(dialect.quotechar, "'") 1145 1146 def test_doublequote(self): 1147 sniffer = csv.Sniffer() 1148 dialect = sniffer.sniff(self.header1) 1149 self.assertFalse(dialect.doublequote) 1150 dialect = sniffer.sniff(self.header2) 1151 self.assertFalse(dialect.doublequote) 1152 dialect = sniffer.sniff(self.sample2) 1153 self.assertTrue(dialect.doublequote) 1154 dialect = sniffer.sniff(self.sample8) 1155 self.assertFalse(dialect.doublequote) 1156 dialect = sniffer.sniff(self.sample9) 1157 self.assertTrue(dialect.doublequote) 1158 1159class NUL: 1160 def write(s, *args): 1161 pass 1162 writelines = write 1163 1164@unittest.skipUnless(hasattr(sys, "gettotalrefcount"), 1165 'requires sys.gettotalrefcount()') 1166class TestLeaks(unittest.TestCase): 1167 def test_create_read(self): 1168 delta = 0 1169 lastrc = sys.gettotalrefcount() 1170 for i in range(20): 1171 gc.collect() 1172 self.assertEqual(gc.garbage, []) 1173 rc = sys.gettotalrefcount() 1174 csv.reader(["a,b,c\r\n"]) 1175 csv.reader(["a,b,c\r\n"]) 1176 csv.reader(["a,b,c\r\n"]) 1177 delta = rc-lastrc 1178 lastrc = rc 1179 # if csv.reader() leaks, last delta should be 3 or more 1180 self.assertLess(delta, 3) 1181 1182 def test_create_write(self): 1183 delta = 0 1184 lastrc = sys.gettotalrefcount() 1185 s = NUL() 1186 for i in range(20): 1187 gc.collect() 1188 self.assertEqual(gc.garbage, []) 1189 rc = sys.gettotalrefcount() 1190 csv.writer(s) 1191 csv.writer(s) 1192 csv.writer(s) 1193 delta = rc-lastrc 1194 lastrc = rc 1195 # if csv.writer() leaks, last delta should be 3 or more 1196 self.assertLess(delta, 3) 1197 1198 def test_read(self): 1199 delta = 0 1200 rows = ["a,b,c\r\n"]*5 1201 lastrc = sys.gettotalrefcount() 1202 for i in range(20): 1203 gc.collect() 1204 self.assertEqual(gc.garbage, []) 1205 rc = sys.gettotalrefcount() 1206 rdr = csv.reader(rows) 1207 for row in rdr: 1208 pass 1209 delta = rc-lastrc 1210 lastrc = rc 1211 # if reader leaks during read, delta should be 5 or more 1212 self.assertLess(delta, 5) 1213 1214 def test_write(self): 1215 delta = 0 1216 rows = [[1,2,3]]*5 1217 s = NUL() 1218 lastrc = sys.gettotalrefcount() 1219 for i in range(20): 1220 gc.collect() 1221 self.assertEqual(gc.garbage, []) 1222 rc = sys.gettotalrefcount() 1223 writer = csv.writer(s) 1224 for row in rows: 1225 writer.writerow(row) 1226 delta = rc-lastrc 1227 lastrc = rc 1228 # if writer leaks during write, last delta should be 5 or more 1229 self.assertLess(delta, 5) 1230 1231class TestUnicode(unittest.TestCase): 1232 1233 names = ["Martin von Löwis", 1234 "Marc André Lemburg", 1235 "Guido van Rossum", 1236 "François Pinard"] 1237 1238 def test_unicode_read(self): 1239 with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj: 1240 fileobj.write(",".join(self.names) + "\r\n") 1241 fileobj.seek(0) 1242 reader = csv.reader(fileobj) 1243 self.assertEqual(list(reader), [self.names]) 1244 1245 1246 def test_unicode_write(self): 1247 with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj: 1248 writer = csv.writer(fileobj) 1249 writer.writerow(self.names) 1250 expected = ",".join(self.names)+"\r\n" 1251 fileobj.seek(0) 1252 self.assertEqual(fileobj.read(), expected) 1253 1254class KeyOrderingTest(unittest.TestCase): 1255 1256 def test_ordering_for_the_dict_reader_and_writer(self): 1257 resultset = set() 1258 for keys in permutations("abcde"): 1259 with TemporaryFile('w+', newline='', encoding="utf-8") as fileobject: 1260 dw = csv.DictWriter(fileobject, keys) 1261 dw.writeheader() 1262 fileobject.seek(0) 1263 dr = csv.DictReader(fileobject) 1264 kt = tuple(dr.fieldnames) 1265 self.assertEqual(keys, kt) 1266 resultset.add(kt) 1267 # Final sanity check: were all permutations unique? 1268 self.assertEqual(len(resultset), 120, "Key ordering: some key permutations not collected (expected 120)") 1269 1270 def test_ordered_dict_reader(self): 1271 data = dedent('''\ 1272 FirstName,LastName 1273 Eric,Idle 1274 Graham,Chapman,Over1,Over2 1275 1276 Under1 1277 John,Cleese 1278 ''').splitlines() 1279 1280 self.assertEqual(list(csv.DictReader(data)), 1281 [OrderedDict([('FirstName', 'Eric'), ('LastName', 'Idle')]), 1282 OrderedDict([('FirstName', 'Graham'), ('LastName', 'Chapman'), 1283 (None, ['Over1', 'Over2'])]), 1284 OrderedDict([('FirstName', 'Under1'), ('LastName', None)]), 1285 OrderedDict([('FirstName', 'John'), ('LastName', 'Cleese')]), 1286 ]) 1287 1288 self.assertEqual(list(csv.DictReader(data, restkey='OtherInfo')), 1289 [OrderedDict([('FirstName', 'Eric'), ('LastName', 'Idle')]), 1290 OrderedDict([('FirstName', 'Graham'), ('LastName', 'Chapman'), 1291 ('OtherInfo', ['Over1', 'Over2'])]), 1292 OrderedDict([('FirstName', 'Under1'), ('LastName', None)]), 1293 OrderedDict([('FirstName', 'John'), ('LastName', 'Cleese')]), 1294 ]) 1295 1296 del data[0] # Remove the header row 1297 self.assertEqual(list(csv.DictReader(data, fieldnames=['fname', 'lname'])), 1298 [OrderedDict([('fname', 'Eric'), ('lname', 'Idle')]), 1299 OrderedDict([('fname', 'Graham'), ('lname', 'Chapman'), 1300 (None, ['Over1', 'Over2'])]), 1301 OrderedDict([('fname', 'Under1'), ('lname', None)]), 1302 OrderedDict([('fname', 'John'), ('lname', 'Cleese')]), 1303 ]) 1304 1305 1306class MiscTestCase(unittest.TestCase): 1307 def test__all__(self): 1308 extra = {'__doc__', '__version__'} 1309 support.check__all__(self, csv, ('csv', '_csv'), extra=extra) 1310 1311 def test_subclassable(self): 1312 # issue 44089 1313 class Foo(csv.Error): ... 1314 1315if __name__ == '__main__': 1316 unittest.main() 1317