1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: email-sig@python.org 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10 11from io import StringIO, BytesIO 12from itertools import chain 13from random import choice 14from socket import getfqdn 15from threading import Thread 16 17import email 18import email.policy 19 20from email.charset import Charset 21from email.header import Header, decode_header, make_header 22from email.parser import Parser, HeaderParser 23from email.generator import Generator, DecodedGenerator, BytesGenerator 24from email.message import Message 25from email.mime.application import MIMEApplication 26from email.mime.audio import MIMEAudio 27from email.mime.text import MIMEText 28from email.mime.image import MIMEImage 29from email.mime.base import MIMEBase 30from email.mime.message import MIMEMessage 31from email.mime.multipart import MIMEMultipart 32from email.mime.nonmultipart import MIMENonMultipart 33from email import utils 34from email import errors 35from email import encoders 36from email import iterators 37from email import base64mime 38from email import quoprimime 39 40from test.support import unlink, start_threads 41from test.test_email import openfile, TestEmailBase 42 43# These imports are documented to work, but we are testing them using a 44# different path, so we import them here just to make sure they are importable. 45from email.parser import FeedParser, BytesFeedParser 46 47NL = '\n' 48EMPTYSTRING = '' 49SPACE = ' ' 50 51 52# Test various aspects of the Message class's API 53class TestMessageAPI(TestEmailBase): 54 def test_get_all(self): 55 eq = self.assertEqual 56 msg = self._msgobj('msg_20.txt') 57 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) 58 eq(msg.get_all('xx', 'n/a'), 'n/a') 59 60 def test_getset_charset(self): 61 eq = self.assertEqual 62 msg = Message() 63 eq(msg.get_charset(), None) 64 charset = Charset('iso-8859-1') 65 msg.set_charset(charset) 66 eq(msg['mime-version'], '1.0') 67 eq(msg.get_content_type(), 'text/plain') 68 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 69 eq(msg.get_param('charset'), 'iso-8859-1') 70 eq(msg['content-transfer-encoding'], 'quoted-printable') 71 eq(msg.get_charset().input_charset, 'iso-8859-1') 72 # Remove the charset 73 msg.set_charset(None) 74 eq(msg.get_charset(), None) 75 eq(msg['content-type'], 'text/plain') 76 # Try adding a charset when there's already MIME headers present 77 msg = Message() 78 msg['MIME-Version'] = '2.0' 79 msg['Content-Type'] = 'text/x-weird' 80 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 81 msg.set_charset(charset) 82 eq(msg['mime-version'], '2.0') 83 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 84 eq(msg['content-transfer-encoding'], 'quinted-puntable') 85 86 def test_set_charset_from_string(self): 87 eq = self.assertEqual 88 msg = Message() 89 msg.set_charset('us-ascii') 90 eq(msg.get_charset().input_charset, 'us-ascii') 91 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 92 93 def test_set_payload_with_charset(self): 94 msg = Message() 95 charset = Charset('iso-8859-1') 96 msg.set_payload('This is a string payload', charset) 97 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 98 99 def test_set_payload_with_8bit_data_and_charset(self): 100 data = b'\xd0\x90\xd0\x91\xd0\x92' 101 charset = Charset('utf-8') 102 msg = Message() 103 msg.set_payload(data, charset) 104 self.assertEqual(msg['content-transfer-encoding'], 'base64') 105 self.assertEqual(msg.get_payload(decode=True), data) 106 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 107 108 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 109 data = b'\xd0\x90\xd0\x91\xd0\x92' 110 charset = Charset('utf-8') 111 charset.body_encoding = None # Disable base64 encoding 112 msg = Message() 113 msg.set_payload(data.decode('utf-8'), charset) 114 self.assertEqual(msg['content-transfer-encoding'], '8bit') 115 self.assertEqual(msg.get_payload(decode=True), data) 116 117 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 118 data = b'\xd0\x90\xd0\x91\xd0\x92' 119 charset = Charset('utf-8') 120 charset.body_encoding = None # Disable base64 encoding 121 msg = Message() 122 msg.set_payload(data, charset) 123 self.assertEqual(msg['content-transfer-encoding'], '8bit') 124 self.assertEqual(msg.get_payload(decode=True), data) 125 126 def test_set_payload_to_list(self): 127 msg = Message() 128 msg.set_payload([]) 129 self.assertEqual(msg.get_payload(), []) 130 131 def test_attach_when_payload_is_string(self): 132 msg = Message() 133 msg['Content-Type'] = 'multipart/mixed' 134 msg.set_payload('string payload') 135 sub_msg = MIMEMessage(Message()) 136 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 137 msg.attach, sub_msg) 138 139 def test_get_charsets(self): 140 eq = self.assertEqual 141 142 msg = self._msgobj('msg_08.txt') 143 charsets = msg.get_charsets() 144 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 145 146 msg = self._msgobj('msg_09.txt') 147 charsets = msg.get_charsets('dingbat') 148 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 149 'koi8-r']) 150 151 msg = self._msgobj('msg_12.txt') 152 charsets = msg.get_charsets() 153 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 154 'iso-8859-3', 'us-ascii', 'koi8-r']) 155 156 def test_get_filename(self): 157 eq = self.assertEqual 158 159 msg = self._msgobj('msg_04.txt') 160 filenames = [p.get_filename() for p in msg.get_payload()] 161 eq(filenames, ['msg.txt', 'msg.txt']) 162 163 msg = self._msgobj('msg_07.txt') 164 subpart = msg.get_payload(1) 165 eq(subpart.get_filename(), 'dingusfish.gif') 166 167 def test_get_filename_with_name_parameter(self): 168 eq = self.assertEqual 169 170 msg = self._msgobj('msg_44.txt') 171 filenames = [p.get_filename() for p in msg.get_payload()] 172 eq(filenames, ['msg.txt', 'msg.txt']) 173 174 def test_get_boundary(self): 175 eq = self.assertEqual 176 msg = self._msgobj('msg_07.txt') 177 # No quotes! 178 eq(msg.get_boundary(), 'BOUNDARY') 179 180 def test_set_boundary(self): 181 eq = self.assertEqual 182 # This one has no existing boundary parameter, but the Content-Type: 183 # header appears fifth. 184 msg = self._msgobj('msg_01.txt') 185 msg.set_boundary('BOUNDARY') 186 header, value = msg.items()[4] 187 eq(header.lower(), 'content-type') 188 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 189 # This one has a Content-Type: header, with a boundary, stuck in the 190 # middle of its headers. Make sure the order is preserved; it should 191 # be fifth. 192 msg = self._msgobj('msg_04.txt') 193 msg.set_boundary('BOUNDARY') 194 header, value = msg.items()[4] 195 eq(header.lower(), 'content-type') 196 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 197 # And this one has no Content-Type: header at all. 198 msg = self._msgobj('msg_03.txt') 199 self.assertRaises(errors.HeaderParseError, 200 msg.set_boundary, 'BOUNDARY') 201 202 def test_make_boundary(self): 203 msg = MIMEMultipart('form-data') 204 # Note that when the boundary gets created is an implementation 205 # detail and might change. 206 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 207 # Trigger creation of boundary 208 msg.as_string() 209 self.assertEqual(msg.items()[0][1][:33], 210 'multipart/form-data; boundary="==') 211 # XXX: there ought to be tests of the uniqueness of the boundary, too. 212 213 def test_message_rfc822_only(self): 214 # Issue 7970: message/rfc822 not in multipart parsed by 215 # HeaderParser caused an exception when flattened. 216 with openfile('msg_46.txt') as fp: 217 msgdata = fp.read() 218 parser = HeaderParser() 219 msg = parser.parsestr(msgdata) 220 out = StringIO() 221 gen = Generator(out, True, 0) 222 gen.flatten(msg, False) 223 self.assertEqual(out.getvalue(), msgdata) 224 225 def test_byte_message_rfc822_only(self): 226 # Make sure new bytes header parser also passes this. 227 with openfile('msg_46.txt') as fp: 228 msgdata = fp.read().encode('ascii') 229 parser = email.parser.BytesHeaderParser() 230 msg = parser.parsebytes(msgdata) 231 out = BytesIO() 232 gen = email.generator.BytesGenerator(out) 233 gen.flatten(msg) 234 self.assertEqual(out.getvalue(), msgdata) 235 236 def test_get_decoded_payload(self): 237 eq = self.assertEqual 238 msg = self._msgobj('msg_10.txt') 239 # The outer message is a multipart 240 eq(msg.get_payload(decode=True), None) 241 # Subpart 1 is 7bit encoded 242 eq(msg.get_payload(0).get_payload(decode=True), 243 b'This is a 7bit encoded message.\n') 244 # Subpart 2 is quopri 245 eq(msg.get_payload(1).get_payload(decode=True), 246 b'\xa1This is a Quoted Printable encoded message!\n') 247 # Subpart 3 is base64 248 eq(msg.get_payload(2).get_payload(decode=True), 249 b'This is a Base64 encoded message.') 250 # Subpart 4 is base64 with a trailing newline, which 251 # used to be stripped (issue 7143). 252 eq(msg.get_payload(3).get_payload(decode=True), 253 b'This is a Base64 encoded message.\n') 254 # Subpart 5 has no Content-Transfer-Encoding: header. 255 eq(msg.get_payload(4).get_payload(decode=True), 256 b'This has no Content-Transfer-Encoding: header.\n') 257 258 def test_get_decoded_uu_payload(self): 259 eq = self.assertEqual 260 msg = Message() 261 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 262 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 263 msg['content-transfer-encoding'] = cte 264 eq(msg.get_payload(decode=True), b'hello world') 265 # Now try some bogus data 266 msg.set_payload('foo') 267 eq(msg.get_payload(decode=True), b'foo') 268 269 def test_get_payload_n_raises_on_non_multipart(self): 270 msg = Message() 271 self.assertRaises(TypeError, msg.get_payload, 1) 272 273 def test_decoded_generator(self): 274 eq = self.assertEqual 275 msg = self._msgobj('msg_07.txt') 276 with openfile('msg_17.txt') as fp: 277 text = fp.read() 278 s = StringIO() 279 g = DecodedGenerator(s) 280 g.flatten(msg) 281 eq(s.getvalue(), text) 282 283 def test__contains__(self): 284 msg = Message() 285 msg['From'] = 'Me' 286 msg['to'] = 'You' 287 # Check for case insensitivity 288 self.assertIn('from', msg) 289 self.assertIn('From', msg) 290 self.assertIn('FROM', msg) 291 self.assertIn('to', msg) 292 self.assertIn('To', msg) 293 self.assertIn('TO', msg) 294 295 def test_as_string(self): 296 msg = self._msgobj('msg_01.txt') 297 with openfile('msg_01.txt') as fp: 298 text = fp.read() 299 self.assertEqual(text, str(msg)) 300 fullrepr = msg.as_string(unixfrom=True) 301 lines = fullrepr.split('\n') 302 self.assertTrue(lines[0].startswith('From ')) 303 self.assertEqual(text, NL.join(lines[1:])) 304 305 def test_as_string_policy(self): 306 msg = self._msgobj('msg_01.txt') 307 newpolicy = msg.policy.clone(linesep='\r\n') 308 fullrepr = msg.as_string(policy=newpolicy) 309 s = StringIO() 310 g = Generator(s, policy=newpolicy) 311 g.flatten(msg) 312 self.assertEqual(fullrepr, s.getvalue()) 313 314 def test_as_bytes(self): 315 msg = self._msgobj('msg_01.txt') 316 with openfile('msg_01.txt') as fp: 317 data = fp.read().encode('ascii') 318 self.assertEqual(data, bytes(msg)) 319 fullrepr = msg.as_bytes(unixfrom=True) 320 lines = fullrepr.split(b'\n') 321 self.assertTrue(lines[0].startswith(b'From ')) 322 self.assertEqual(data, b'\n'.join(lines[1:])) 323 324 def test_as_bytes_policy(self): 325 msg = self._msgobj('msg_01.txt') 326 newpolicy = msg.policy.clone(linesep='\r\n') 327 fullrepr = msg.as_bytes(policy=newpolicy) 328 s = BytesIO() 329 g = BytesGenerator(s,policy=newpolicy) 330 g.flatten(msg) 331 self.assertEqual(fullrepr, s.getvalue()) 332 333 # test_headerregistry.TestContentTypeHeader.bad_params 334 def test_bad_param(self): 335 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 336 self.assertEqual(msg.get_param('baz'), '') 337 338 def test_missing_filename(self): 339 msg = email.message_from_string("From: foo\n") 340 self.assertEqual(msg.get_filename(), None) 341 342 def test_bogus_filename(self): 343 msg = email.message_from_string( 344 "Content-Disposition: blarg; filename\n") 345 self.assertEqual(msg.get_filename(), '') 346 347 def test_missing_boundary(self): 348 msg = email.message_from_string("From: foo\n") 349 self.assertEqual(msg.get_boundary(), None) 350 351 def test_get_params(self): 352 eq = self.assertEqual 353 msg = email.message_from_string( 354 'X-Header: foo=one; bar=two; baz=three\n') 355 eq(msg.get_params(header='x-header'), 356 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 357 msg = email.message_from_string( 358 'X-Header: foo; bar=one; baz=two\n') 359 eq(msg.get_params(header='x-header'), 360 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 361 eq(msg.get_params(), None) 362 msg = email.message_from_string( 363 'X-Header: foo; bar="one"; baz=two\n') 364 eq(msg.get_params(header='x-header'), 365 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 366 367 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 368 def test_get_param_liberal(self): 369 msg = Message() 370 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 371 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 372 373 def test_get_param(self): 374 eq = self.assertEqual 375 msg = email.message_from_string( 376 "X-Header: foo=one; bar=two; baz=three\n") 377 eq(msg.get_param('bar', header='x-header'), 'two') 378 eq(msg.get_param('quuz', header='x-header'), None) 379 eq(msg.get_param('quuz'), None) 380 msg = email.message_from_string( 381 'X-Header: foo; bar="one"; baz=two\n') 382 eq(msg.get_param('foo', header='x-header'), '') 383 eq(msg.get_param('bar', header='x-header'), 'one') 384 eq(msg.get_param('baz', header='x-header'), 'two') 385 # XXX: We are not RFC-2045 compliant! We cannot parse: 386 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 387 # msg.get_param("weird") 388 # yet. 389 390 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 391 def test_get_param_funky_continuation_lines(self): 392 msg = self._msgobj('msg_22.txt') 393 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 394 395 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 396 def test_get_param_with_semis_in_quotes(self): 397 msg = email.message_from_string( 398 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 399 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 400 self.assertEqual(msg.get_param('name', unquote=False), 401 '"Jim&&Jill"') 402 403 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 404 def test_get_param_with_quotes(self): 405 msg = email.message_from_string( 406 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 407 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 408 msg = email.message_from_string( 409 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 410 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 411 412 def test_field_containment(self): 413 msg = email.message_from_string('Header: exists') 414 self.assertIn('header', msg) 415 self.assertIn('Header', msg) 416 self.assertIn('HEADER', msg) 417 self.assertNotIn('headerx', msg) 418 419 def test_set_param(self): 420 eq = self.assertEqual 421 msg = Message() 422 msg.set_param('charset', 'iso-2022-jp') 423 eq(msg.get_param('charset'), 'iso-2022-jp') 424 msg.set_param('importance', 'high value') 425 eq(msg.get_param('importance'), 'high value') 426 eq(msg.get_param('importance', unquote=False), '"high value"') 427 eq(msg.get_params(), [('text/plain', ''), 428 ('charset', 'iso-2022-jp'), 429 ('importance', 'high value')]) 430 eq(msg.get_params(unquote=False), [('text/plain', ''), 431 ('charset', '"iso-2022-jp"'), 432 ('importance', '"high value"')]) 433 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 434 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 435 436 def test_del_param(self): 437 eq = self.assertEqual 438 msg = self._msgobj('msg_05.txt') 439 eq(msg.get_params(), 440 [('multipart/report', ''), ('report-type', 'delivery-status'), 441 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 442 old_val = msg.get_param("report-type") 443 msg.del_param("report-type") 444 eq(msg.get_params(), 445 [('multipart/report', ''), 446 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 447 msg.set_param("report-type", old_val) 448 eq(msg.get_params(), 449 [('multipart/report', ''), 450 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 451 ('report-type', old_val)]) 452 453 def test_del_param_on_other_header(self): 454 msg = Message() 455 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 456 msg.del_param('filename', 'content-disposition') 457 self.assertEqual(msg['content-disposition'], 'attachment') 458 459 def test_del_param_on_nonexistent_header(self): 460 msg = Message() 461 # Deleting param on empty msg should not raise exception. 462 msg.del_param('filename', 'content-disposition') 463 464 def test_del_nonexistent_param(self): 465 msg = Message() 466 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 467 existing_header = msg['Content-Type'] 468 msg.del_param('foobar', header='Content-Type') 469 self.assertEqual(msg['Content-Type'], existing_header) 470 471 def test_set_type(self): 472 eq = self.assertEqual 473 msg = Message() 474 self.assertRaises(ValueError, msg.set_type, 'text') 475 msg.set_type('text/plain') 476 eq(msg['content-type'], 'text/plain') 477 msg.set_param('charset', 'us-ascii') 478 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 479 msg.set_type('text/html') 480 eq(msg['content-type'], 'text/html; charset="us-ascii"') 481 482 def test_set_type_on_other_header(self): 483 msg = Message() 484 msg['X-Content-Type'] = 'text/plain' 485 msg.set_type('application/octet-stream', 'X-Content-Type') 486 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 487 488 def test_get_content_type_missing(self): 489 msg = Message() 490 self.assertEqual(msg.get_content_type(), 'text/plain') 491 492 def test_get_content_type_missing_with_default_type(self): 493 msg = Message() 494 msg.set_default_type('message/rfc822') 495 self.assertEqual(msg.get_content_type(), 'message/rfc822') 496 497 def test_get_content_type_from_message_implicit(self): 498 msg = self._msgobj('msg_30.txt') 499 self.assertEqual(msg.get_payload(0).get_content_type(), 500 'message/rfc822') 501 502 def test_get_content_type_from_message_explicit(self): 503 msg = self._msgobj('msg_28.txt') 504 self.assertEqual(msg.get_payload(0).get_content_type(), 505 'message/rfc822') 506 507 def test_get_content_type_from_message_text_plain_implicit(self): 508 msg = self._msgobj('msg_03.txt') 509 self.assertEqual(msg.get_content_type(), 'text/plain') 510 511 def test_get_content_type_from_message_text_plain_explicit(self): 512 msg = self._msgobj('msg_01.txt') 513 self.assertEqual(msg.get_content_type(), 'text/plain') 514 515 def test_get_content_maintype_missing(self): 516 msg = Message() 517 self.assertEqual(msg.get_content_maintype(), 'text') 518 519 def test_get_content_maintype_missing_with_default_type(self): 520 msg = Message() 521 msg.set_default_type('message/rfc822') 522 self.assertEqual(msg.get_content_maintype(), 'message') 523 524 def test_get_content_maintype_from_message_implicit(self): 525 msg = self._msgobj('msg_30.txt') 526 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 527 528 def test_get_content_maintype_from_message_explicit(self): 529 msg = self._msgobj('msg_28.txt') 530 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 531 532 def test_get_content_maintype_from_message_text_plain_implicit(self): 533 msg = self._msgobj('msg_03.txt') 534 self.assertEqual(msg.get_content_maintype(), 'text') 535 536 def test_get_content_maintype_from_message_text_plain_explicit(self): 537 msg = self._msgobj('msg_01.txt') 538 self.assertEqual(msg.get_content_maintype(), 'text') 539 540 def test_get_content_subtype_missing(self): 541 msg = Message() 542 self.assertEqual(msg.get_content_subtype(), 'plain') 543 544 def test_get_content_subtype_missing_with_default_type(self): 545 msg = Message() 546 msg.set_default_type('message/rfc822') 547 self.assertEqual(msg.get_content_subtype(), 'rfc822') 548 549 def test_get_content_subtype_from_message_implicit(self): 550 msg = self._msgobj('msg_30.txt') 551 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 552 553 def test_get_content_subtype_from_message_explicit(self): 554 msg = self._msgobj('msg_28.txt') 555 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 556 557 def test_get_content_subtype_from_message_text_plain_implicit(self): 558 msg = self._msgobj('msg_03.txt') 559 self.assertEqual(msg.get_content_subtype(), 'plain') 560 561 def test_get_content_subtype_from_message_text_plain_explicit(self): 562 msg = self._msgobj('msg_01.txt') 563 self.assertEqual(msg.get_content_subtype(), 'plain') 564 565 def test_get_content_maintype_error(self): 566 msg = Message() 567 msg['Content-Type'] = 'no-slash-in-this-string' 568 self.assertEqual(msg.get_content_maintype(), 'text') 569 570 def test_get_content_subtype_error(self): 571 msg = Message() 572 msg['Content-Type'] = 'no-slash-in-this-string' 573 self.assertEqual(msg.get_content_subtype(), 'plain') 574 575 def test_replace_header(self): 576 eq = self.assertEqual 577 msg = Message() 578 msg.add_header('First', 'One') 579 msg.add_header('Second', 'Two') 580 msg.add_header('Third', 'Three') 581 eq(msg.keys(), ['First', 'Second', 'Third']) 582 eq(msg.values(), ['One', 'Two', 'Three']) 583 msg.replace_header('Second', 'Twenty') 584 eq(msg.keys(), ['First', 'Second', 'Third']) 585 eq(msg.values(), ['One', 'Twenty', 'Three']) 586 msg.add_header('First', 'Eleven') 587 msg.replace_header('First', 'One Hundred') 588 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 589 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 590 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 591 592 def test_get_content_disposition(self): 593 msg = Message() 594 self.assertIsNone(msg.get_content_disposition()) 595 msg.add_header('Content-Disposition', 'attachment', 596 filename='random.avi') 597 self.assertEqual(msg.get_content_disposition(), 'attachment') 598 msg.replace_header('Content-Disposition', 'inline') 599 self.assertEqual(msg.get_content_disposition(), 'inline') 600 msg.replace_header('Content-Disposition', 'InlinE') 601 self.assertEqual(msg.get_content_disposition(), 'inline') 602 603 # test_defect_handling:test_invalid_chars_in_base64_payload 604 def test_broken_base64_payload(self): 605 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 606 msg = Message() 607 msg['content-type'] = 'audio/x-midi' 608 msg['content-transfer-encoding'] = 'base64' 609 msg.set_payload(x) 610 self.assertEqual(msg.get_payload(decode=True), 611 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 612 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 613 self.assertIsInstance(msg.defects[0], 614 errors.InvalidBase64CharactersDefect) 615 616 def test_broken_unicode_payload(self): 617 # This test improves coverage but is not a compliance test. 618 # The behavior in this situation is currently undefined by the API. 619 x = 'this is a br\xf6ken thing to do' 620 msg = Message() 621 msg['content-type'] = 'text/plain' 622 msg['content-transfer-encoding'] = '8bit' 623 msg.set_payload(x) 624 self.assertEqual(msg.get_payload(decode=True), 625 bytes(x, 'raw-unicode-escape')) 626 627 def test_questionable_bytes_payload(self): 628 # This test improves coverage but is not a compliance test, 629 # since it involves poking inside the black box. 630 x = 'this is a quéstionable thing to do'.encode('utf-8') 631 msg = Message() 632 msg['content-type'] = 'text/plain; charset="utf-8"' 633 msg['content-transfer-encoding'] = '8bit' 634 msg._payload = x 635 self.assertEqual(msg.get_payload(decode=True), x) 636 637 # Issue 1078919 638 def test_ascii_add_header(self): 639 msg = Message() 640 msg.add_header('Content-Disposition', 'attachment', 641 filename='bud.gif') 642 self.assertEqual('attachment; filename="bud.gif"', 643 msg['Content-Disposition']) 644 645 def test_noascii_add_header(self): 646 msg = Message() 647 msg.add_header('Content-Disposition', 'attachment', 648 filename="Fußballer.ppt") 649 self.assertEqual( 650 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 651 msg['Content-Disposition']) 652 653 def test_nonascii_add_header_via_triple(self): 654 msg = Message() 655 msg.add_header('Content-Disposition', 'attachment', 656 filename=('iso-8859-1', '', 'Fußballer.ppt')) 657 self.assertEqual( 658 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 659 msg['Content-Disposition']) 660 661 def test_ascii_add_header_with_tspecial(self): 662 msg = Message() 663 msg.add_header('Content-Disposition', 'attachment', 664 filename="windows [filename].ppt") 665 self.assertEqual( 666 'attachment; filename="windows [filename].ppt"', 667 msg['Content-Disposition']) 668 669 def test_nonascii_add_header_with_tspecial(self): 670 msg = Message() 671 msg.add_header('Content-Disposition', 'attachment', 672 filename="Fußballer [filename].ppt") 673 self.assertEqual( 674 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 675 msg['Content-Disposition']) 676 677 def test_binary_quopri_payload(self): 678 for charset in ('latin-1', 'ascii'): 679 msg = Message() 680 msg['content-type'] = 'text/plain; charset=%s' % charset 681 msg['content-transfer-encoding'] = 'quoted-printable' 682 msg.set_payload(b'foo=e6=96=87bar') 683 self.assertEqual( 684 msg.get_payload(decode=True), 685 b'foo\xe6\x96\x87bar', 686 'get_payload returns wrong result with charset %s.' % charset) 687 688 def test_binary_base64_payload(self): 689 for charset in ('latin-1', 'ascii'): 690 msg = Message() 691 msg['content-type'] = 'text/plain; charset=%s' % charset 692 msg['content-transfer-encoding'] = 'base64' 693 msg.set_payload(b'Zm9v5paHYmFy') 694 self.assertEqual( 695 msg.get_payload(decode=True), 696 b'foo\xe6\x96\x87bar', 697 'get_payload returns wrong result with charset %s.' % charset) 698 699 def test_binary_uuencode_payload(self): 700 for charset in ('latin-1', 'ascii'): 701 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 702 msg = Message() 703 msg['content-type'] = 'text/plain; charset=%s' % charset 704 msg['content-transfer-encoding'] = encoding 705 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 706 self.assertEqual( 707 msg.get_payload(decode=True), 708 b'foo\xe6\x96\x87bar', 709 str(('get_payload returns wrong result ', 710 'with charset {0} and encoding {1}.')).\ 711 format(charset, encoding)) 712 713 def test_add_header_with_name_only_param(self): 714 msg = Message() 715 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 716 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 717 718 def test_add_header_with_no_value(self): 719 msg = Message() 720 msg.add_header('X-Status', None) 721 self.assertEqual('', msg['X-Status']) 722 723 # Issue 5871: reject an attempt to embed a header inside a header value 724 # (header injection attack). 725 def test_embedded_header_via_Header_rejected(self): 726 msg = Message() 727 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 728 self.assertRaises(errors.HeaderParseError, msg.as_string) 729 730 def test_embedded_header_via_string_rejected(self): 731 msg = Message() 732 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 733 self.assertRaises(errors.HeaderParseError, msg.as_string) 734 735 def test_unicode_header_defaults_to_utf8_encoding(self): 736 # Issue 14291 737 m = MIMEText('abc\n') 738 m['Subject'] = 'É test' 739 self.assertEqual(str(m),textwrap.dedent("""\ 740 Content-Type: text/plain; charset="us-ascii" 741 MIME-Version: 1.0 742 Content-Transfer-Encoding: 7bit 743 Subject: =?utf-8?q?=C3=89_test?= 744 745 abc 746 """)) 747 748 def test_unicode_body_defaults_to_utf8_encoding(self): 749 # Issue 14291 750 m = MIMEText('É testabc\n') 751 self.assertEqual(str(m),textwrap.dedent("""\ 752 Content-Type: text/plain; charset="utf-8" 753 MIME-Version: 1.0 754 Content-Transfer-Encoding: base64 755 756 w4kgdGVzdGFiYwo= 757 """)) 758 759 760# Test the email.encoders module 761class TestEncoders(unittest.TestCase): 762 763 def test_EncodersEncode_base64(self): 764 with openfile('PyBanner048.gif', 'rb') as fp: 765 bindata = fp.read() 766 mimed = email.mime.image.MIMEImage(bindata) 767 base64ed = mimed.get_payload() 768 # the transfer-encoded body lines should all be <=76 characters 769 lines = base64ed.split('\n') 770 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 771 772 def test_encode_empty_payload(self): 773 eq = self.assertEqual 774 msg = Message() 775 msg.set_charset('us-ascii') 776 eq(msg['content-transfer-encoding'], '7bit') 777 778 def test_default_cte(self): 779 eq = self.assertEqual 780 # 7bit data and the default us-ascii _charset 781 msg = MIMEText('hello world') 782 eq(msg['content-transfer-encoding'], '7bit') 783 # Similar, but with 8bit data 784 msg = MIMEText('hello \xf8 world') 785 eq(msg['content-transfer-encoding'], 'base64') 786 # And now with a different charset 787 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 788 eq(msg['content-transfer-encoding'], 'quoted-printable') 789 790 def test_encode7or8bit(self): 791 # Make sure a charset whose input character set is 8bit but 792 # whose output character set is 7bit gets a transfer-encoding 793 # of 7bit. 794 eq = self.assertEqual 795 msg = MIMEText('文\n', _charset='euc-jp') 796 eq(msg['content-transfer-encoding'], '7bit') 797 eq(msg.as_string(), textwrap.dedent("""\ 798 MIME-Version: 1.0 799 Content-Type: text/plain; charset="iso-2022-jp" 800 Content-Transfer-Encoding: 7bit 801 802 \x1b$BJ8\x1b(B 803 """)) 804 805 def test_qp_encode_latin1(self): 806 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 807 self.assertEqual(str(msg), textwrap.dedent("""\ 808 MIME-Version: 1.0 809 Content-Type: text/text; charset="iso-8859-1" 810 Content-Transfer-Encoding: quoted-printable 811 812 =E1=F6 813 """)) 814 815 def test_qp_encode_non_latin1(self): 816 # Issue 16948 817 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 818 self.assertEqual(str(msg), textwrap.dedent("""\ 819 MIME-Version: 1.0 820 Content-Type: text/text; charset="iso-8859-2" 821 Content-Transfer-Encoding: quoted-printable 822 823 =BF 824 """)) 825 826 827# Test long header wrapping 828class TestLongHeaders(TestEmailBase): 829 830 maxDiff = None 831 832 def test_split_long_continuation(self): 833 eq = self.ndiffAssertEqual 834 msg = email.message_from_string("""\ 835Subject: bug demonstration 836\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 837\tmore text 838 839test 840""") 841 sfp = StringIO() 842 g = Generator(sfp) 843 g.flatten(msg) 844 eq(sfp.getvalue(), """\ 845Subject: bug demonstration 846\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 847\tmore text 848 849test 850""") 851 852 def test_another_long_almost_unsplittable_header(self): 853 eq = self.ndiffAssertEqual 854 hstr = """\ 855bug demonstration 856\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 857\tmore text""" 858 h = Header(hstr, continuation_ws='\t') 859 eq(h.encode(), """\ 860bug demonstration 861\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 862\tmore text""") 863 h = Header(hstr.replace('\t', ' ')) 864 eq(h.encode(), """\ 865bug demonstration 866 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 867 more text""") 868 869 def test_long_nonstring(self): 870 eq = self.ndiffAssertEqual 871 g = Charset("iso-8859-1") 872 cz = Charset("iso-8859-2") 873 utf8 = Charset("utf-8") 874 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 875 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 876 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 877 b'bef\xf6rdert. ') 878 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 879 b'd\xf9vtipu.. ') 880 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 881 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 882 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 883 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 884 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 885 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 886 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 887 '\u3044\u307e\u3059\u3002') 888 h = Header(g_head, g, header_name='Subject') 889 h.append(cz_head, cz) 890 h.append(utf8_head, utf8) 891 msg = Message() 892 msg['Subject'] = h 893 sfp = StringIO() 894 g = Generator(sfp) 895 g.flatten(msg) 896 eq(sfp.getvalue(), """\ 897Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 898 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 899 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 900 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 901 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 902 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 903 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 904 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 905 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 906 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 907 =?utf-8?b?44CC?= 908 909""") 910 eq(h.encode(maxlinelen=76), """\ 911=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 912 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 913 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 914 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 915 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 916 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 917 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 918 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 919 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 920 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 921 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 922 923 def test_long_header_encode(self): 924 eq = self.ndiffAssertEqual 925 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 926 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 927 header_name='X-Foobar-Spoink-Defrobnit') 928 eq(h.encode(), '''\ 929wasnipoop; giraffes="very-long-necked-animals"; 930 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 931 932 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 933 eq = self.ndiffAssertEqual 934 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 935 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 936 header_name='X-Foobar-Spoink-Defrobnit', 937 continuation_ws='\t') 938 eq(h.encode(), '''\ 939wasnipoop; giraffes="very-long-necked-animals"; 940 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 941 942 def test_long_header_encode_with_tab_continuation(self): 943 eq = self.ndiffAssertEqual 944 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 945 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 946 header_name='X-Foobar-Spoink-Defrobnit', 947 continuation_ws='\t') 948 eq(h.encode(), '''\ 949wasnipoop; giraffes="very-long-necked-animals"; 950\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 951 952 def test_header_encode_with_different_output_charset(self): 953 h = Header('文', 'euc-jp') 954 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 955 956 def test_long_header_encode_with_different_output_charset(self): 957 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 958 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 959 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 960 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 961 res = """\ 962=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 963 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 964 self.assertEqual(h.encode(), res) 965 966 def test_header_splitter(self): 967 eq = self.ndiffAssertEqual 968 msg = MIMEText('') 969 # It'd be great if we could use add_header() here, but that doesn't 970 # guarantee an order of the parameters. 971 msg['X-Foobar-Spoink-Defrobnit'] = ( 972 'wasnipoop; giraffes="very-long-necked-animals"; ' 973 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 974 sfp = StringIO() 975 g = Generator(sfp) 976 g.flatten(msg) 977 eq(sfp.getvalue(), '''\ 978Content-Type: text/plain; charset="us-ascii" 979MIME-Version: 1.0 980Content-Transfer-Encoding: 7bit 981X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 982 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 983 984''') 985 986 def test_no_semis_header_splitter(self): 987 eq = self.ndiffAssertEqual 988 msg = Message() 989 msg['From'] = 'test@dom.ain' 990 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) 991 msg.set_payload('Test') 992 sfp = StringIO() 993 g = Generator(sfp) 994 g.flatten(msg) 995 eq(sfp.getvalue(), """\ 996From: test@dom.ain 997References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> 998 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> 999 1000Test""") 1001 1002 def test_last_split_chunk_does_not_fit(self): 1003 eq = self.ndiffAssertEqual 1004 h = Header('Subject: the first part of this is short, but_the_second' 1005 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1006 '_all_by_itself') 1007 eq(h.encode(), """\ 1008Subject: the first part of this is short, 1009 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1010 1011 def test_splittable_leading_char_followed_by_overlong_unsplitable(self): 1012 eq = self.ndiffAssertEqual 1013 h = Header(', but_the_second' 1014 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1015 '_all_by_itself') 1016 eq(h.encode(), """\ 1017, 1018 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1019 1020 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self): 1021 eq = self.ndiffAssertEqual 1022 h = Header(', , but_the_second' 1023 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1024 '_all_by_itself') 1025 eq(h.encode(), """\ 1026, , 1027 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1028 1029 def test_trailing_splitable_on_overlong_unsplitable(self): 1030 eq = self.ndiffAssertEqual 1031 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1032 'be_on_a_line_all_by_itself;') 1033 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1034 "be_on_a_line_all_by_itself;") 1035 1036 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self): 1037 eq = self.ndiffAssertEqual 1038 h = Header('; ' 1039 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1040 'be_on_a_line_all_by_itself; ') 1041 eq(h.encode(), """\ 1042; 1043 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1044 1045 def test_long_header_with_multiple_sequential_split_chars(self): 1046 eq = self.ndiffAssertEqual 1047 h = Header('This is a long line that has two whitespaces in a row. ' 1048 'This used to cause truncation of the header when folded') 1049 eq(h.encode(), """\ 1050This is a long line that has two whitespaces in a row. This used to cause 1051 truncation of the header when folded""") 1052 1053 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1054 eq = self.ndiffAssertEqual 1055 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1056 'they;arenotlegal;fold,points') 1057 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1058 "arenotlegal;fold,points") 1059 1060 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1061 eq = self.ndiffAssertEqual 1062 h = Header('this is a test where we need to have more than one line ' 1063 'before; our final line that is just too big to fit;; ' 1064 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1065 'be_on_a_line_all_by_itself;') 1066 eq(h.encode(), """\ 1067this is a test where we need to have more than one line before; 1068 our final line that is just too big to fit;; 1069 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1070 1071 def test_overlong_last_part_followed_by_split_point(self): 1072 eq = self.ndiffAssertEqual 1073 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1074 'be_on_a_line_all_by_itself ') 1075 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1076 "should_be_on_a_line_all_by_itself ") 1077 1078 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1079 eq = self.ndiffAssertEqual 1080 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1081 'before_our_final_line_; ; ' 1082 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1083 'be_on_a_line_all_by_itself; ') 1084 eq(h.encode(), """\ 1085this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1086 ; 1087 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1088 1089 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1090 eq = self.ndiffAssertEqual 1091 h = Header('this is a test where we need to have more than one line ' 1092 'before our final line; ; ' 1093 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1094 'be_on_a_line_all_by_itself; ') 1095 eq(h.encode(), """\ 1096this is a test where we need to have more than one line before our final line; 1097 ; 1098 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1099 1100 def test_long_header_with_whitespace_runs(self): 1101 eq = self.ndiffAssertEqual 1102 msg = Message() 1103 msg['From'] = 'test@dom.ain' 1104 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) 1105 msg.set_payload('Test') 1106 sfp = StringIO() 1107 g = Generator(sfp) 1108 g.flatten(msg) 1109 eq(sfp.getvalue(), """\ 1110From: test@dom.ain 1111References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1112 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1113 <foo@dom.ain> <foo@dom.ain>\x20\x20 1114 1115Test""") 1116 1117 def test_long_run_with_semi_header_splitter(self): 1118 eq = self.ndiffAssertEqual 1119 msg = Message() 1120 msg['From'] = 'test@dom.ain' 1121 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' 1122 msg.set_payload('Test') 1123 sfp = StringIO() 1124 g = Generator(sfp) 1125 g.flatten(msg) 1126 eq(sfp.getvalue(), """\ 1127From: test@dom.ain 1128References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1129 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1130 <foo@dom.ain>; abc 1131 1132Test""") 1133 1134 def test_splitter_split_on_punctuation_only_if_fws(self): 1135 eq = self.ndiffAssertEqual 1136 msg = Message() 1137 msg['From'] = 'test@dom.ain' 1138 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1139 'they;arenotlegal;fold,points') 1140 msg.set_payload('Test') 1141 sfp = StringIO() 1142 g = Generator(sfp) 1143 g.flatten(msg) 1144 # XXX the space after the header should not be there. 1145 eq(sfp.getvalue(), """\ 1146From: test@dom.ain 1147References:\x20 1148 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1149 1150Test""") 1151 1152 def test_no_split_long_header(self): 1153 eq = self.ndiffAssertEqual 1154 hstr = 'References: ' + 'x' * 80 1155 h = Header(hstr) 1156 # These come on two lines because Headers are really field value 1157 # classes and don't really know about their field names. 1158 eq(h.encode(), """\ 1159References: 1160 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1161 h = Header('x' * 80) 1162 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1163 1164 def test_splitting_multiple_long_lines(self): 1165 eq = self.ndiffAssertEqual 1166 hstr = """\ 1167from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1168\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1169\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1170""" 1171 h = Header(hstr, continuation_ws='\t') 1172 eq(h.encode(), """\ 1173from babylon.socal-raves.org (localhost [127.0.0.1]); 1174 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1175 for <mailman-admin@babylon.socal-raves.org>; 1176 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1177\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1178 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1179 for <mailman-admin@babylon.socal-raves.org>; 1180 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1181\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1182 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1183 for <mailman-admin@babylon.socal-raves.org>; 1184 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1185 1186 def test_splitting_first_line_only_is_long(self): 1187 eq = self.ndiffAssertEqual 1188 hstr = """\ 1189from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1190\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1191\tid 17k4h5-00034i-00 1192\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1193 h = Header(hstr, maxlinelen=78, header_name='Received', 1194 continuation_ws='\t') 1195 eq(h.encode(), """\ 1196from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1197 helo=cthulhu.gerg.ca) 1198\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1199\tid 17k4h5-00034i-00 1200\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1201 1202 def test_long_8bit_header(self): 1203 eq = self.ndiffAssertEqual 1204 msg = Message() 1205 h = Header('Britische Regierung gibt', 'iso-8859-1', 1206 header_name='Subject') 1207 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1208 eq(h.encode(maxlinelen=76), """\ 1209=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1210 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1211 msg['Subject'] = h 1212 eq(msg.as_string(maxheaderlen=76), """\ 1213Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1214 =?iso-8859-1?q?hore-Windkraftprojekte?= 1215 1216""") 1217 eq(msg.as_string(maxheaderlen=0), """\ 1218Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1219 1220""") 1221 1222 def test_long_8bit_header_no_charset(self): 1223 eq = self.ndiffAssertEqual 1224 msg = Message() 1225 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1226 'f\xfcr Offshore-Windkraftprojekte ' 1227 '<a-very-long-address@example.com>') 1228 msg['Reply-To'] = header_string 1229 eq(msg.as_string(maxheaderlen=78), """\ 1230Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1231 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1232 1233""") 1234 msg = Message() 1235 msg['Reply-To'] = Header(header_string, 1236 header_name='Reply-To') 1237 eq(msg.as_string(maxheaderlen=78), """\ 1238Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1239 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1240 1241""") 1242 1243 def test_long_to_header(self): 1244 eq = self.ndiffAssertEqual 1245 to = ('"Someone Test #A" <someone@eecs.umich.edu>,' 1246 '<someone@eecs.umich.edu>, ' 1247 '"Someone Test #B" <someone@umich.edu>, ' 1248 '"Someone Test #C" <someone@eecs.umich.edu>, ' 1249 '"Someone Test #D" <someone@eecs.umich.edu>') 1250 msg = Message() 1251 msg['To'] = to 1252 eq(msg.as_string(maxheaderlen=78), '''\ 1253To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, 1254 "Someone Test #B" <someone@umich.edu>, 1255 "Someone Test #C" <someone@eecs.umich.edu>, 1256 "Someone Test #D" <someone@eecs.umich.edu> 1257 1258''') 1259 1260 def test_long_line_after_append(self): 1261 eq = self.ndiffAssertEqual 1262 s = 'This is an example of string which has almost the limit of header length.' 1263 h = Header(s) 1264 h.append('Add another line.') 1265 eq(h.encode(maxlinelen=76), """\ 1266This is an example of string which has almost the limit of header length. 1267 Add another line.""") 1268 1269 def test_shorter_line_with_append(self): 1270 eq = self.ndiffAssertEqual 1271 s = 'This is a shorter line.' 1272 h = Header(s) 1273 h.append('Add another sentence. (Surprise?)') 1274 eq(h.encode(), 1275 'This is a shorter line. Add another sentence. (Surprise?)') 1276 1277 def test_long_field_name(self): 1278 eq = self.ndiffAssertEqual 1279 fn = 'X-Very-Very-Very-Long-Header-Name' 1280 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1281 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1282 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1283 'bef\xf6rdert. ') 1284 h = Header(gs, 'iso-8859-1', header_name=fn) 1285 # BAW: this seems broken because the first line is too long 1286 eq(h.encode(maxlinelen=76), """\ 1287=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1288 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1289 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1290 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1291 1292 def test_long_received_header(self): 1293 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1294 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1295 'Wed, 05 Mar 2003 18:10:18 -0700') 1296 msg = Message() 1297 msg['Received-1'] = Header(h, continuation_ws='\t') 1298 msg['Received-2'] = h 1299 # This should be splitting on spaces not semicolons. 1300 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1301Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1302 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1303 Wed, 05 Mar 2003 18:10:18 -0700 1304Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1305 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1306 Wed, 05 Mar 2003 18:10:18 -0700 1307 1308""") 1309 1310 def test_string_headerinst_eq(self): 1311 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' 1312 'tu-muenchen.de> (David Bremner\'s message of ' 1313 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1314 msg = Message() 1315 msg['Received-1'] = Header(h, header_name='Received-1', 1316 continuation_ws='\t') 1317 msg['Received-2'] = h 1318 # XXX The space after the ':' should not be there. 1319 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1320Received-1:\x20 1321 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1322 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1323Received-2:\x20 1324 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1325 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1326 1327""") 1328 1329 def test_long_unbreakable_lines_with_continuation(self): 1330 eq = self.ndiffAssertEqual 1331 msg = Message() 1332 t = """\ 1333iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1334 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1335 msg['Face-1'] = t 1336 msg['Face-2'] = Header(t, header_name='Face-2') 1337 msg['Face-3'] = ' ' + t 1338 # XXX This splitting is all wrong. It the first value line should be 1339 # snug against the field name or the space after the header not there. 1340 eq(msg.as_string(maxheaderlen=78), """\ 1341Face-1:\x20 1342 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1343 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1344Face-2:\x20 1345 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1346 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1347Face-3:\x20 1348 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1349 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1350 1351""") 1352 1353 def test_another_long_multiline_header(self): 1354 eq = self.ndiffAssertEqual 1355 m = ('Received: from siimage.com ' 1356 '([172.25.1.3]) by zima.siliconimage.com with ' 1357 'Microsoft SMTPSVC(5.0.2195.4905); ' 1358 'Wed, 16 Oct 2002 07:41:11 -0700') 1359 msg = email.message_from_string(m) 1360 eq(msg.as_string(maxheaderlen=78), '''\ 1361Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1362 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1363 1364''') 1365 1366 def test_long_lines_with_different_header(self): 1367 eq = self.ndiffAssertEqual 1368 h = ('List-Unsubscribe: ' 1369 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1370 ' <mailto:spamassassin-talk-request@lists.sourceforge.net' 1371 '?subject=unsubscribe>') 1372 msg = Message() 1373 msg['List'] = h 1374 msg['List'] = Header(h, header_name='List') 1375 eq(msg.as_string(maxheaderlen=78), """\ 1376List: List-Unsubscribe: 1377 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1378 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1379List: List-Unsubscribe: 1380 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1381 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1382 1383""") 1384 1385 def test_long_rfc2047_header_with_embedded_fws(self): 1386 h = Header(textwrap.dedent("""\ 1387 We're going to pretend this header is in a non-ascii character set 1388 \tto see if line wrapping with encoded words and embedded 1389 folding white space works"""), 1390 charset='utf-8', 1391 header_name='Test') 1392 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1393 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1394 =?utf-8?q?cter_set?= 1395 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1396 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1397 1398 1399 1400# Test mangling of "From " lines in the body of a message 1401class TestFromMangling(unittest.TestCase): 1402 def setUp(self): 1403 self.msg = Message() 1404 self.msg['From'] = 'aaa@bbb.org' 1405 self.msg.set_payload("""\ 1406From the desk of A.A.A.: 1407Blah blah blah 1408""") 1409 1410 def test_mangled_from(self): 1411 s = StringIO() 1412 g = Generator(s, mangle_from_=True) 1413 g.flatten(self.msg) 1414 self.assertEqual(s.getvalue(), """\ 1415From: aaa@bbb.org 1416 1417>From the desk of A.A.A.: 1418Blah blah blah 1419""") 1420 1421 def test_dont_mangle_from(self): 1422 s = StringIO() 1423 g = Generator(s, mangle_from_=False) 1424 g.flatten(self.msg) 1425 self.assertEqual(s.getvalue(), """\ 1426From: aaa@bbb.org 1427 1428From the desk of A.A.A.: 1429Blah blah blah 1430""") 1431 1432 def test_mangle_from_in_preamble_and_epilog(self): 1433 s = StringIO() 1434 g = Generator(s, mangle_from_=True) 1435 msg = email.message_from_string(textwrap.dedent("""\ 1436 From: foo@bar.com 1437 Mime-Version: 1.0 1438 Content-Type: multipart/mixed; boundary=XXX 1439 1440 From somewhere unknown 1441 1442 --XXX 1443 Content-Type: text/plain 1444 1445 foo 1446 1447 --XXX-- 1448 1449 From somewhere unknowable 1450 """)) 1451 g.flatten(msg) 1452 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1453 if x.startswith('>From ')]), 2) 1454 1455 def test_mangled_from_with_bad_bytes(self): 1456 source = textwrap.dedent("""\ 1457 Content-Type: text/plain; charset="utf-8" 1458 MIME-Version: 1.0 1459 Content-Transfer-Encoding: 8bit 1460 From: aaa@bbb.org 1461 1462 """).encode('utf-8') 1463 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1464 b = BytesIO() 1465 g = BytesGenerator(b, mangle_from_=True) 1466 g.flatten(msg) 1467 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1468 1469 def test_mutltipart_with_bad_bytes_in_cte(self): 1470 # bpo30835 1471 source = textwrap.dedent("""\ 1472 From: aperson@example.com 1473 Content-Type: multipart/mixed; boundary="1" 1474 Content-Transfer-Encoding: \xc8 1475 """).encode('utf-8') 1476 msg = email.message_from_bytes(source) 1477 1478 1479# Test the basic MIMEAudio class 1480class TestMIMEAudio(unittest.TestCase): 1481 def setUp(self): 1482 with openfile('audiotest.au', 'rb') as fp: 1483 self._audiodata = fp.read() 1484 self._au = MIMEAudio(self._audiodata) 1485 1486 def test_guess_minor_type(self): 1487 self.assertEqual(self._au.get_content_type(), 'audio/basic') 1488 1489 def test_encoding(self): 1490 payload = self._au.get_payload() 1491 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1492 self._audiodata) 1493 1494 def test_checkSetMinor(self): 1495 au = MIMEAudio(self._audiodata, 'fish') 1496 self.assertEqual(au.get_content_type(), 'audio/fish') 1497 1498 def test_add_header(self): 1499 eq = self.assertEqual 1500 self._au.add_header('Content-Disposition', 'attachment', 1501 filename='audiotest.au') 1502 eq(self._au['content-disposition'], 1503 'attachment; filename="audiotest.au"') 1504 eq(self._au.get_params(header='content-disposition'), 1505 [('attachment', ''), ('filename', 'audiotest.au')]) 1506 eq(self._au.get_param('filename', header='content-disposition'), 1507 'audiotest.au') 1508 missing = [] 1509 eq(self._au.get_param('attachment', header='content-disposition'), '') 1510 self.assertIs(self._au.get_param('foo', failobj=missing, 1511 header='content-disposition'), missing) 1512 # Try some missing stuff 1513 self.assertIs(self._au.get_param('foobar', missing), missing) 1514 self.assertIs(self._au.get_param('attachment', missing, 1515 header='foobar'), missing) 1516 1517 1518 1519# Test the basic MIMEImage class 1520class TestMIMEImage(unittest.TestCase): 1521 def setUp(self): 1522 with openfile('PyBanner048.gif', 'rb') as fp: 1523 self._imgdata = fp.read() 1524 self._im = MIMEImage(self._imgdata) 1525 1526 def test_guess_minor_type(self): 1527 self.assertEqual(self._im.get_content_type(), 'image/gif') 1528 1529 def test_encoding(self): 1530 payload = self._im.get_payload() 1531 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1532 self._imgdata) 1533 1534 def test_checkSetMinor(self): 1535 im = MIMEImage(self._imgdata, 'fish') 1536 self.assertEqual(im.get_content_type(), 'image/fish') 1537 1538 def test_add_header(self): 1539 eq = self.assertEqual 1540 self._im.add_header('Content-Disposition', 'attachment', 1541 filename='dingusfish.gif') 1542 eq(self._im['content-disposition'], 1543 'attachment; filename="dingusfish.gif"') 1544 eq(self._im.get_params(header='content-disposition'), 1545 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1546 eq(self._im.get_param('filename', header='content-disposition'), 1547 'dingusfish.gif') 1548 missing = [] 1549 eq(self._im.get_param('attachment', header='content-disposition'), '') 1550 self.assertIs(self._im.get_param('foo', failobj=missing, 1551 header='content-disposition'), missing) 1552 # Try some missing stuff 1553 self.assertIs(self._im.get_param('foobar', missing), missing) 1554 self.assertIs(self._im.get_param('attachment', missing, 1555 header='foobar'), missing) 1556 1557 1558 1559# Test the basic MIMEApplication class 1560class TestMIMEApplication(unittest.TestCase): 1561 def test_headers(self): 1562 eq = self.assertEqual 1563 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1564 eq(msg.get_content_type(), 'application/octet-stream') 1565 eq(msg['content-transfer-encoding'], 'base64') 1566 1567 def test_body(self): 1568 eq = self.assertEqual 1569 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1570 msg = MIMEApplication(bytesdata) 1571 # whitespace in the cte encoded block is RFC-irrelevant. 1572 eq(msg.get_payload().strip(), '+vv8/f7/') 1573 eq(msg.get_payload(decode=True), bytesdata) 1574 1575 def test_binary_body_with_encode_7or8bit(self): 1576 # Issue 17171. 1577 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1578 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1579 # Treated as a string, this will be invalid code points. 1580 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1581 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1582 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1583 s = BytesIO() 1584 g = BytesGenerator(s) 1585 g.flatten(msg) 1586 wireform = s.getvalue() 1587 msg2 = email.message_from_bytes(wireform) 1588 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1589 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1590 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1591 1592 def test_binary_body_with_encode_noop(self): 1593 # Issue 16564: This does not produce an RFC valid message, since to be 1594 # valid it should have a CTE of binary. But the below works in 1595 # Python2, and is documented as working this way. 1596 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1597 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1598 # Treated as a string, this will be invalid code points. 1599 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1600 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1601 s = BytesIO() 1602 g = BytesGenerator(s) 1603 g.flatten(msg) 1604 wireform = s.getvalue() 1605 msg2 = email.message_from_bytes(wireform) 1606 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1607 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1608 1609 def test_binary_body_with_unicode_linend_encode_noop(self): 1610 # Issue 19003: This is a variation on #16564. 1611 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1612 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1613 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1614 s = BytesIO() 1615 g = BytesGenerator(s) 1616 g.flatten(msg) 1617 wireform = s.getvalue() 1618 msg2 = email.message_from_bytes(wireform) 1619 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1620 1621 def test_binary_body_with_encode_quopri(self): 1622 # Issue 14360. 1623 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1624 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1625 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1626 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1627 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1628 s = BytesIO() 1629 g = BytesGenerator(s) 1630 g.flatten(msg) 1631 wireform = s.getvalue() 1632 msg2 = email.message_from_bytes(wireform) 1633 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1634 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1635 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1636 1637 def test_binary_body_with_encode_base64(self): 1638 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1639 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1640 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1641 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1642 s = BytesIO() 1643 g = BytesGenerator(s) 1644 g.flatten(msg) 1645 wireform = s.getvalue() 1646 msg2 = email.message_from_bytes(wireform) 1647 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1648 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1649 1650 1651# Test the basic MIMEText class 1652class TestMIMEText(unittest.TestCase): 1653 def setUp(self): 1654 self._msg = MIMEText('hello there') 1655 1656 def test_types(self): 1657 eq = self.assertEqual 1658 eq(self._msg.get_content_type(), 'text/plain') 1659 eq(self._msg.get_param('charset'), 'us-ascii') 1660 missing = [] 1661 self.assertIs(self._msg.get_param('foobar', missing), missing) 1662 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1663 missing) 1664 1665 def test_payload(self): 1666 self.assertEqual(self._msg.get_payload(), 'hello there') 1667 self.assertFalse(self._msg.is_multipart()) 1668 1669 def test_charset(self): 1670 eq = self.assertEqual 1671 msg = MIMEText('hello there', _charset='us-ascii') 1672 eq(msg.get_charset().input_charset, 'us-ascii') 1673 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1674 # Also accept a Charset instance 1675 charset = Charset('utf-8') 1676 charset.body_encoding = None 1677 msg = MIMEText('hello there', _charset=charset) 1678 eq(msg.get_charset().input_charset, 'utf-8') 1679 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1680 eq(msg.get_payload(), 'hello there') 1681 1682 def test_7bit_input(self): 1683 eq = self.assertEqual 1684 msg = MIMEText('hello there', _charset='us-ascii') 1685 eq(msg.get_charset().input_charset, 'us-ascii') 1686 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1687 1688 def test_7bit_input_no_charset(self): 1689 eq = self.assertEqual 1690 msg = MIMEText('hello there') 1691 eq(msg.get_charset(), 'us-ascii') 1692 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1693 self.assertIn('hello there', msg.as_string()) 1694 1695 def test_utf8_input(self): 1696 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1697 eq = self.assertEqual 1698 msg = MIMEText(teststr, _charset='utf-8') 1699 eq(msg.get_charset().output_charset, 'utf-8') 1700 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1701 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1702 1703 @unittest.skip("can't fix because of backward compat in email5, " 1704 "will fix in email6") 1705 def test_utf8_input_no_charset(self): 1706 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1707 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1708 1709 1710 1711# Test complicated multipart/* messages 1712class TestMultipart(TestEmailBase): 1713 def setUp(self): 1714 with openfile('PyBanner048.gif', 'rb') as fp: 1715 data = fp.read() 1716 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1717 image = MIMEImage(data, name='dingusfish.gif') 1718 image.add_header('content-disposition', 'attachment', 1719 filename='dingusfish.gif') 1720 intro = MIMEText('''\ 1721Hi there, 1722 1723This is the dingus fish. 1724''') 1725 container.attach(intro) 1726 container.attach(image) 1727 container['From'] = 'Barry <barry@digicool.com>' 1728 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' 1729 container['Subject'] = 'Here is your dingus fish' 1730 1731 now = 987809702.54848599 1732 timetuple = time.localtime(now) 1733 if timetuple[-1] == 0: 1734 tzsecs = time.timezone 1735 else: 1736 tzsecs = time.altzone 1737 if tzsecs > 0: 1738 sign = '-' 1739 else: 1740 sign = '+' 1741 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1742 container['Date'] = time.strftime( 1743 '%a, %d %b %Y %H:%M:%S', 1744 time.localtime(now)) + tzoffset 1745 self._msg = container 1746 self._im = image 1747 self._txt = intro 1748 1749 def test_hierarchy(self): 1750 # convenience 1751 eq = self.assertEqual 1752 raises = self.assertRaises 1753 # tests 1754 m = self._msg 1755 self.assertTrue(m.is_multipart()) 1756 eq(m.get_content_type(), 'multipart/mixed') 1757 eq(len(m.get_payload()), 2) 1758 raises(IndexError, m.get_payload, 2) 1759 m0 = m.get_payload(0) 1760 m1 = m.get_payload(1) 1761 self.assertIs(m0, self._txt) 1762 self.assertIs(m1, self._im) 1763 eq(m.get_payload(), [m0, m1]) 1764 self.assertFalse(m0.is_multipart()) 1765 self.assertFalse(m1.is_multipart()) 1766 1767 def test_empty_multipart_idempotent(self): 1768 text = """\ 1769Content-Type: multipart/mixed; boundary="BOUNDARY" 1770MIME-Version: 1.0 1771Subject: A subject 1772To: aperson@dom.ain 1773From: bperson@dom.ain 1774 1775 1776--BOUNDARY 1777 1778 1779--BOUNDARY-- 1780""" 1781 msg = Parser().parsestr(text) 1782 self.ndiffAssertEqual(text, msg.as_string()) 1783 1784 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1785 outer = MIMEBase('multipart', 'mixed') 1786 outer['Subject'] = 'A subject' 1787 outer['To'] = 'aperson@dom.ain' 1788 outer['From'] = 'bperson@dom.ain' 1789 outer.set_boundary('BOUNDARY') 1790 self.ndiffAssertEqual(outer.as_string(), '''\ 1791Content-Type: multipart/mixed; boundary="BOUNDARY" 1792MIME-Version: 1.0 1793Subject: A subject 1794To: aperson@dom.ain 1795From: bperson@dom.ain 1796 1797--BOUNDARY 1798 1799--BOUNDARY-- 1800''') 1801 1802 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1803 outer = MIMEBase('multipart', 'mixed') 1804 outer['Subject'] = 'A subject' 1805 outer['To'] = 'aperson@dom.ain' 1806 outer['From'] = 'bperson@dom.ain' 1807 outer.preamble = '' 1808 outer.epilogue = '' 1809 outer.set_boundary('BOUNDARY') 1810 self.ndiffAssertEqual(outer.as_string(), '''\ 1811Content-Type: multipart/mixed; boundary="BOUNDARY" 1812MIME-Version: 1.0 1813Subject: A subject 1814To: aperson@dom.ain 1815From: bperson@dom.ain 1816 1817 1818--BOUNDARY 1819 1820--BOUNDARY-- 1821''') 1822 1823 def test_one_part_in_a_multipart(self): 1824 eq = self.ndiffAssertEqual 1825 outer = MIMEBase('multipart', 'mixed') 1826 outer['Subject'] = 'A subject' 1827 outer['To'] = 'aperson@dom.ain' 1828 outer['From'] = 'bperson@dom.ain' 1829 outer.set_boundary('BOUNDARY') 1830 msg = MIMEText('hello world') 1831 outer.attach(msg) 1832 eq(outer.as_string(), '''\ 1833Content-Type: multipart/mixed; boundary="BOUNDARY" 1834MIME-Version: 1.0 1835Subject: A subject 1836To: aperson@dom.ain 1837From: bperson@dom.ain 1838 1839--BOUNDARY 1840Content-Type: text/plain; charset="us-ascii" 1841MIME-Version: 1.0 1842Content-Transfer-Encoding: 7bit 1843 1844hello world 1845--BOUNDARY-- 1846''') 1847 1848 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1849 eq = self.ndiffAssertEqual 1850 outer = MIMEBase('multipart', 'mixed') 1851 outer['Subject'] = 'A subject' 1852 outer['To'] = 'aperson@dom.ain' 1853 outer['From'] = 'bperson@dom.ain' 1854 outer.preamble = '' 1855 msg = MIMEText('hello world') 1856 outer.attach(msg) 1857 outer.set_boundary('BOUNDARY') 1858 eq(outer.as_string(), '''\ 1859Content-Type: multipart/mixed; boundary="BOUNDARY" 1860MIME-Version: 1.0 1861Subject: A subject 1862To: aperson@dom.ain 1863From: bperson@dom.ain 1864 1865 1866--BOUNDARY 1867Content-Type: text/plain; charset="us-ascii" 1868MIME-Version: 1.0 1869Content-Transfer-Encoding: 7bit 1870 1871hello world 1872--BOUNDARY-- 1873''') 1874 1875 1876 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1877 eq = self.ndiffAssertEqual 1878 outer = MIMEBase('multipart', 'mixed') 1879 outer['Subject'] = 'A subject' 1880 outer['To'] = 'aperson@dom.ain' 1881 outer['From'] = 'bperson@dom.ain' 1882 outer.preamble = None 1883 msg = MIMEText('hello world') 1884 outer.attach(msg) 1885 outer.set_boundary('BOUNDARY') 1886 eq(outer.as_string(), '''\ 1887Content-Type: multipart/mixed; boundary="BOUNDARY" 1888MIME-Version: 1.0 1889Subject: A subject 1890To: aperson@dom.ain 1891From: bperson@dom.ain 1892 1893--BOUNDARY 1894Content-Type: text/plain; charset="us-ascii" 1895MIME-Version: 1.0 1896Content-Transfer-Encoding: 7bit 1897 1898hello world 1899--BOUNDARY-- 1900''') 1901 1902 1903 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1904 eq = self.ndiffAssertEqual 1905 outer = MIMEBase('multipart', 'mixed') 1906 outer['Subject'] = 'A subject' 1907 outer['To'] = 'aperson@dom.ain' 1908 outer['From'] = 'bperson@dom.ain' 1909 outer.epilogue = None 1910 msg = MIMEText('hello world') 1911 outer.attach(msg) 1912 outer.set_boundary('BOUNDARY') 1913 eq(outer.as_string(), '''\ 1914Content-Type: multipart/mixed; boundary="BOUNDARY" 1915MIME-Version: 1.0 1916Subject: A subject 1917To: aperson@dom.ain 1918From: bperson@dom.ain 1919 1920--BOUNDARY 1921Content-Type: text/plain; charset="us-ascii" 1922MIME-Version: 1.0 1923Content-Transfer-Encoding: 7bit 1924 1925hello world 1926--BOUNDARY-- 1927''') 1928 1929 1930 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 1931 eq = self.ndiffAssertEqual 1932 outer = MIMEBase('multipart', 'mixed') 1933 outer['Subject'] = 'A subject' 1934 outer['To'] = 'aperson@dom.ain' 1935 outer['From'] = 'bperson@dom.ain' 1936 outer.epilogue = '' 1937 msg = MIMEText('hello world') 1938 outer.attach(msg) 1939 outer.set_boundary('BOUNDARY') 1940 eq(outer.as_string(), '''\ 1941Content-Type: multipart/mixed; boundary="BOUNDARY" 1942MIME-Version: 1.0 1943Subject: A subject 1944To: aperson@dom.ain 1945From: bperson@dom.ain 1946 1947--BOUNDARY 1948Content-Type: text/plain; charset="us-ascii" 1949MIME-Version: 1.0 1950Content-Transfer-Encoding: 7bit 1951 1952hello world 1953--BOUNDARY-- 1954''') 1955 1956 1957 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 1958 eq = self.ndiffAssertEqual 1959 outer = MIMEBase('multipart', 'mixed') 1960 outer['Subject'] = 'A subject' 1961 outer['To'] = 'aperson@dom.ain' 1962 outer['From'] = 'bperson@dom.ain' 1963 outer.epilogue = '\n' 1964 msg = MIMEText('hello world') 1965 outer.attach(msg) 1966 outer.set_boundary('BOUNDARY') 1967 eq(outer.as_string(), '''\ 1968Content-Type: multipart/mixed; boundary="BOUNDARY" 1969MIME-Version: 1.0 1970Subject: A subject 1971To: aperson@dom.ain 1972From: bperson@dom.ain 1973 1974--BOUNDARY 1975Content-Type: text/plain; charset="us-ascii" 1976MIME-Version: 1.0 1977Content-Transfer-Encoding: 7bit 1978 1979hello world 1980--BOUNDARY-- 1981 1982''') 1983 1984 def test_message_external_body(self): 1985 eq = self.assertEqual 1986 msg = self._msgobj('msg_36.txt') 1987 eq(len(msg.get_payload()), 2) 1988 msg1 = msg.get_payload(1) 1989 eq(msg1.get_content_type(), 'multipart/alternative') 1990 eq(len(msg1.get_payload()), 2) 1991 for subpart in msg1.get_payload(): 1992 eq(subpart.get_content_type(), 'message/external-body') 1993 eq(len(subpart.get_payload()), 1) 1994 subsubpart = subpart.get_payload(0) 1995 eq(subsubpart.get_content_type(), 'text/plain') 1996 1997 def test_double_boundary(self): 1998 # msg_37.txt is a multipart that contains two dash-boundary's in a 1999 # row. Our interpretation of RFC 2046 calls for ignoring the second 2000 # and subsequent boundaries. 2001 msg = self._msgobj('msg_37.txt') 2002 self.assertEqual(len(msg.get_payload()), 3) 2003 2004 def test_nested_inner_contains_outer_boundary(self): 2005 eq = self.ndiffAssertEqual 2006 # msg_38.txt has an inner part that contains outer boundaries. My 2007 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2008 # these are illegal and should be interpreted as unterminated inner 2009 # parts. 2010 msg = self._msgobj('msg_38.txt') 2011 sfp = StringIO() 2012 iterators._structure(msg, sfp) 2013 eq(sfp.getvalue(), """\ 2014multipart/mixed 2015 multipart/mixed 2016 multipart/alternative 2017 text/plain 2018 text/plain 2019 text/plain 2020 text/plain 2021""") 2022 2023 def test_nested_with_same_boundary(self): 2024 eq = self.ndiffAssertEqual 2025 # msg 39.txt is similarly evil in that it's got inner parts that use 2026 # the same boundary as outer parts. Again, I believe the way this is 2027 # parsed is closest to the spirit of RFC 2046 2028 msg = self._msgobj('msg_39.txt') 2029 sfp = StringIO() 2030 iterators._structure(msg, sfp) 2031 eq(sfp.getvalue(), """\ 2032multipart/mixed 2033 multipart/mixed 2034 multipart/alternative 2035 application/octet-stream 2036 application/octet-stream 2037 text/plain 2038""") 2039 2040 def test_boundary_in_non_multipart(self): 2041 msg = self._msgobj('msg_40.txt') 2042 self.assertEqual(msg.as_string(), '''\ 2043MIME-Version: 1.0 2044Content-Type: text/html; boundary="--961284236552522269" 2045 2046----961284236552522269 2047Content-Type: text/html; 2048Content-Transfer-Encoding: 7Bit 2049 2050<html></html> 2051 2052----961284236552522269-- 2053''') 2054 2055 def test_boundary_with_leading_space(self): 2056 eq = self.assertEqual 2057 msg = email.message_from_string('''\ 2058MIME-Version: 1.0 2059Content-Type: multipart/mixed; boundary=" XXXX" 2060 2061-- XXXX 2062Content-Type: text/plain 2063 2064 2065-- XXXX 2066Content-Type: text/plain 2067 2068-- XXXX-- 2069''') 2070 self.assertTrue(msg.is_multipart()) 2071 eq(msg.get_boundary(), ' XXXX') 2072 eq(len(msg.get_payload()), 2) 2073 2074 def test_boundary_without_trailing_newline(self): 2075 m = Parser().parsestr("""\ 2076Content-Type: multipart/mixed; boundary="===============0012394164==" 2077MIME-Version: 1.0 2078 2079--===============0012394164== 2080Content-Type: image/file1.jpg 2081MIME-Version: 1.0 2082Content-Transfer-Encoding: base64 2083 2084YXNkZg== 2085--===============0012394164==--""") 2086 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2087 2088 def test_mimebase_default_policy(self): 2089 m = MIMEBase('multipart', 'mixed') 2090 self.assertIs(m.policy, email.policy.compat32) 2091 2092 def test_mimebase_custom_policy(self): 2093 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2094 self.assertIs(m.policy, email.policy.default) 2095 2096# Test some badly formatted messages 2097class TestNonConformant(TestEmailBase): 2098 2099 def test_parse_missing_minor_type(self): 2100 eq = self.assertEqual 2101 msg = self._msgobj('msg_14.txt') 2102 eq(msg.get_content_type(), 'text/plain') 2103 eq(msg.get_content_maintype(), 'text') 2104 eq(msg.get_content_subtype(), 'plain') 2105 2106 # test_defect_handling 2107 def test_same_boundary_inner_outer(self): 2108 msg = self._msgobj('msg_15.txt') 2109 # XXX We can probably eventually do better 2110 inner = msg.get_payload(0) 2111 self.assertTrue(hasattr(inner, 'defects')) 2112 self.assertEqual(len(inner.defects), 1) 2113 self.assertIsInstance(inner.defects[0], 2114 errors.StartBoundaryNotFoundDefect) 2115 2116 # test_defect_handling 2117 def test_multipart_no_boundary(self): 2118 msg = self._msgobj('msg_25.txt') 2119 self.assertIsInstance(msg.get_payload(), str) 2120 self.assertEqual(len(msg.defects), 2) 2121 self.assertIsInstance(msg.defects[0], 2122 errors.NoBoundaryInMultipartDefect) 2123 self.assertIsInstance(msg.defects[1], 2124 errors.MultipartInvariantViolationDefect) 2125 2126 multipart_msg = textwrap.dedent("""\ 2127 Date: Wed, 14 Nov 2007 12:56:23 GMT 2128 From: foo@bar.invalid 2129 To: foo@bar.invalid 2130 Subject: Content-Transfer-Encoding: base64 and multipart 2131 MIME-Version: 1.0 2132 Content-Type: multipart/mixed; 2133 boundary="===============3344438784458119861=="{} 2134 2135 --===============3344438784458119861== 2136 Content-Type: text/plain 2137 2138 Test message 2139 2140 --===============3344438784458119861== 2141 Content-Type: application/octet-stream 2142 Content-Transfer-Encoding: base64 2143 2144 YWJj 2145 2146 --===============3344438784458119861==-- 2147 """) 2148 2149 # test_defect_handling 2150 def test_multipart_invalid_cte(self): 2151 msg = self._str_msg( 2152 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2153 self.assertEqual(len(msg.defects), 1) 2154 self.assertIsInstance(msg.defects[0], 2155 errors.InvalidMultipartContentTransferEncodingDefect) 2156 2157 # test_defect_handling 2158 def test_multipart_no_cte_no_defect(self): 2159 msg = self._str_msg(self.multipart_msg.format('')) 2160 self.assertEqual(len(msg.defects), 0) 2161 2162 # test_defect_handling 2163 def test_multipart_valid_cte_no_defect(self): 2164 for cte in ('7bit', '8bit', 'BINary'): 2165 msg = self._str_msg( 2166 self.multipart_msg.format( 2167 "\nContent-Transfer-Encoding: {}".format(cte))) 2168 self.assertEqual(len(msg.defects), 0) 2169 2170 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2171 def test_invalid_content_type(self): 2172 eq = self.assertEqual 2173 neq = self.ndiffAssertEqual 2174 msg = Message() 2175 # RFC 2045, $5.2 says invalid yields text/plain 2176 msg['Content-Type'] = 'text' 2177 eq(msg.get_content_maintype(), 'text') 2178 eq(msg.get_content_subtype(), 'plain') 2179 eq(msg.get_content_type(), 'text/plain') 2180 # Clear the old value and try something /really/ invalid 2181 del msg['content-type'] 2182 msg['Content-Type'] = 'foo' 2183 eq(msg.get_content_maintype(), 'text') 2184 eq(msg.get_content_subtype(), 'plain') 2185 eq(msg.get_content_type(), 'text/plain') 2186 # Still, make sure that the message is idempotently generated 2187 s = StringIO() 2188 g = Generator(s) 2189 g.flatten(msg) 2190 neq(s.getvalue(), 'Content-Type: foo\n\n') 2191 2192 def test_no_start_boundary(self): 2193 eq = self.ndiffAssertEqual 2194 msg = self._msgobj('msg_31.txt') 2195 eq(msg.get_payload(), """\ 2196--BOUNDARY 2197Content-Type: text/plain 2198 2199message 1 2200 2201--BOUNDARY 2202Content-Type: text/plain 2203 2204message 2 2205 2206--BOUNDARY-- 2207""") 2208 2209 def test_no_separating_blank_line(self): 2210 eq = self.ndiffAssertEqual 2211 msg = self._msgobj('msg_35.txt') 2212 eq(msg.as_string(), """\ 2213From: aperson@dom.ain 2214To: bperson@dom.ain 2215Subject: here's something interesting 2216 2217counter to RFC 2822, there's no separating newline here 2218""") 2219 2220 # test_defect_handling 2221 def test_lying_multipart(self): 2222 msg = self._msgobj('msg_41.txt') 2223 self.assertTrue(hasattr(msg, 'defects')) 2224 self.assertEqual(len(msg.defects), 2) 2225 self.assertIsInstance(msg.defects[0], 2226 errors.NoBoundaryInMultipartDefect) 2227 self.assertIsInstance(msg.defects[1], 2228 errors.MultipartInvariantViolationDefect) 2229 2230 # test_defect_handling 2231 def test_missing_start_boundary(self): 2232 outer = self._msgobj('msg_42.txt') 2233 # The message structure is: 2234 # 2235 # multipart/mixed 2236 # text/plain 2237 # message/rfc822 2238 # multipart/mixed [*] 2239 # 2240 # [*] This message is missing its start boundary 2241 bad = outer.get_payload(1).get_payload(0) 2242 self.assertEqual(len(bad.defects), 1) 2243 self.assertIsInstance(bad.defects[0], 2244 errors.StartBoundaryNotFoundDefect) 2245 2246 # test_defect_handling 2247 def test_first_line_is_continuation_header(self): 2248 eq = self.assertEqual 2249 m = ' Line 1\nSubject: test\n\nbody' 2250 msg = email.message_from_string(m) 2251 eq(msg.keys(), ['Subject']) 2252 eq(msg.get_payload(), 'body') 2253 eq(len(msg.defects), 1) 2254 self.assertDefectsEqual(msg.defects, 2255 [errors.FirstHeaderLineIsContinuationDefect]) 2256 eq(msg.defects[0].line, ' Line 1\n') 2257 2258 # test_defect_handling 2259 def test_missing_header_body_separator(self): 2260 # Our heuristic if we see a line that doesn't look like a header (no 2261 # leading whitespace but no ':') is to assume that the blank line that 2262 # separates the header from the body is missing, and to stop parsing 2263 # headers and start parsing the body. 2264 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2265 self.assertEqual(msg.keys(), ['Subject']) 2266 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2267 self.assertDefectsEqual(msg.defects, 2268 [errors.MissingHeaderBodySeparatorDefect]) 2269 2270 2271# Test RFC 2047 header encoding and decoding 2272class TestRFC2047(TestEmailBase): 2273 def test_rfc2047_multiline(self): 2274 eq = self.assertEqual 2275 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2276 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2277 dh = decode_header(s) 2278 eq(dh, [ 2279 (b'Re: ', None), 2280 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2281 (b' baz foo bar ', None), 2282 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2283 header = make_header(dh) 2284 eq(str(header), 2285 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2286 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2287Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2288 =?mac-iceland?q?=9Arg=8Cs?=""") 2289 2290 def test_whitespace_keeper_unicode(self): 2291 eq = self.assertEqual 2292 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' 2293 dh = decode_header(s) 2294 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2295 (b' Pirard <pirard@dom.ain>', None)]) 2296 header = str(make_header(dh)) 2297 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 2298 2299 def test_whitespace_keeper_unicode_2(self): 2300 eq = self.assertEqual 2301 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2302 dh = decode_header(s) 2303 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2304 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2305 hu = str(make_header(dh)) 2306 eq(hu, 'The quick brown fox jumped over the lazy dog') 2307 2308 def test_rfc2047_missing_whitespace(self): 2309 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2310 dh = decode_header(s) 2311 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2312 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2313 (b'sbord', None)]) 2314 2315 def test_rfc2047_with_whitespace(self): 2316 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2317 dh = decode_header(s) 2318 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2319 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2320 (b' sbord', None)]) 2321 2322 def test_rfc2047_B_bad_padding(self): 2323 s = '=?iso-8859-1?B?%s?=' 2324 data = [ # only test complete bytes 2325 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2326 ('dmk=', b'vi'), ('dmk', b'vi') 2327 ] 2328 for q, a in data: 2329 dh = decode_header(s % q) 2330 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2331 2332 def test_rfc2047_Q_invalid_digits(self): 2333 # issue 10004. 2334 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2335 self.assertEqual(decode_header(s), 2336 [(b'andr\xe9=zz', 'iso-8859-1')]) 2337 2338 def test_rfc2047_rfc2047_1(self): 2339 # 1st testcase at end of rfc2047 2340 s = '(=?ISO-8859-1?Q?a?=)' 2341 self.assertEqual(decode_header(s), 2342 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2343 2344 def test_rfc2047_rfc2047_2(self): 2345 # 2nd testcase at end of rfc2047 2346 s = '(=?ISO-8859-1?Q?a?= b)' 2347 self.assertEqual(decode_header(s), 2348 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2349 2350 def test_rfc2047_rfc2047_3(self): 2351 # 3rd testcase at end of rfc2047 2352 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2353 self.assertEqual(decode_header(s), 2354 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2355 2356 def test_rfc2047_rfc2047_4(self): 2357 # 4th testcase at end of rfc2047 2358 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2359 self.assertEqual(decode_header(s), 2360 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2361 2362 def test_rfc2047_rfc2047_5a(self): 2363 # 5th testcase at end of rfc2047 newline is \r\n 2364 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2365 self.assertEqual(decode_header(s), 2366 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2367 2368 def test_rfc2047_rfc2047_5b(self): 2369 # 5th testcase at end of rfc2047 newline is \n 2370 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2371 self.assertEqual(decode_header(s), 2372 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2373 2374 def test_rfc2047_rfc2047_6(self): 2375 # 6th testcase at end of rfc2047 2376 s = '(=?ISO-8859-1?Q?a_b?=)' 2377 self.assertEqual(decode_header(s), 2378 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2379 2380 def test_rfc2047_rfc2047_7(self): 2381 # 7th testcase at end of rfc2047 2382 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2383 self.assertEqual(decode_header(s), 2384 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2385 (b')', None)]) 2386 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2387 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2388 2389 def test_multiline_header(self): 2390 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>' 2391 self.assertEqual(decode_header(s), 2392 [(b'"M\xfcller T"', 'windows-1252'), 2393 (b'<T.Mueller@xxx.com>', None)]) 2394 self.assertEqual(make_header(decode_header(s)).encode(), 2395 ''.join(s.splitlines())) 2396 self.assertEqual(str(make_header(decode_header(s))), 2397 '"Müller T" <T.Mueller@xxx.com>') 2398 2399 2400# Test the MIMEMessage class 2401class TestMIMEMessage(TestEmailBase): 2402 def setUp(self): 2403 with openfile('msg_11.txt') as fp: 2404 self._text = fp.read() 2405 2406 def test_type_error(self): 2407 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2408 2409 def test_valid_argument(self): 2410 eq = self.assertEqual 2411 subject = 'A sub-message' 2412 m = Message() 2413 m['Subject'] = subject 2414 r = MIMEMessage(m) 2415 eq(r.get_content_type(), 'message/rfc822') 2416 payload = r.get_payload() 2417 self.assertIsInstance(payload, list) 2418 eq(len(payload), 1) 2419 subpart = payload[0] 2420 self.assertIs(subpart, m) 2421 eq(subpart['subject'], subject) 2422 2423 def test_bad_multipart(self): 2424 msg1 = Message() 2425 msg1['Subject'] = 'subpart 1' 2426 msg2 = Message() 2427 msg2['Subject'] = 'subpart 2' 2428 r = MIMEMessage(msg1) 2429 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2430 2431 def test_generate(self): 2432 # First craft the message to be encapsulated 2433 m = Message() 2434 m['Subject'] = 'An enclosed message' 2435 m.set_payload('Here is the body of the message.\n') 2436 r = MIMEMessage(m) 2437 r['Subject'] = 'The enclosing message' 2438 s = StringIO() 2439 g = Generator(s) 2440 g.flatten(r) 2441 self.assertEqual(s.getvalue(), """\ 2442Content-Type: message/rfc822 2443MIME-Version: 1.0 2444Subject: The enclosing message 2445 2446Subject: An enclosed message 2447 2448Here is the body of the message. 2449""") 2450 2451 def test_parse_message_rfc822(self): 2452 eq = self.assertEqual 2453 msg = self._msgobj('msg_11.txt') 2454 eq(msg.get_content_type(), 'message/rfc822') 2455 payload = msg.get_payload() 2456 self.assertIsInstance(payload, list) 2457 eq(len(payload), 1) 2458 submsg = payload[0] 2459 self.assertIsInstance(submsg, Message) 2460 eq(submsg['subject'], 'An enclosed message') 2461 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2462 2463 def test_dsn(self): 2464 eq = self.assertEqual 2465 # msg 16 is a Delivery Status Notification, see RFC 1894 2466 msg = self._msgobj('msg_16.txt') 2467 eq(msg.get_content_type(), 'multipart/report') 2468 self.assertTrue(msg.is_multipart()) 2469 eq(len(msg.get_payload()), 3) 2470 # Subpart 1 is a text/plain, human readable section 2471 subpart = msg.get_payload(0) 2472 eq(subpart.get_content_type(), 'text/plain') 2473 eq(subpart.get_payload(), """\ 2474This report relates to a message you sent with the following header fields: 2475 2476 Message-id: <002001c144a6$8752e060$56104586@oxy.edu> 2477 Date: Sun, 23 Sep 2001 20:10:55 -0700 2478 From: "Ian T. Henry" <henryi@oxy.edu> 2479 To: SoCal Raves <scr@socal-raves.org> 2480 Subject: [scr] yeah for Ians!! 2481 2482Your message cannot be delivered to the following recipients: 2483 2484 Recipient address: jangel1@cougar.noc.ucla.edu 2485 Reason: recipient reached disk quota 2486 2487""") 2488 # Subpart 2 contains the machine parsable DSN information. It 2489 # consists of two blocks of headers, represented by two nested Message 2490 # objects. 2491 subpart = msg.get_payload(1) 2492 eq(subpart.get_content_type(), 'message/delivery-status') 2493 eq(len(subpart.get_payload()), 2) 2494 # message/delivery-status should treat each block as a bunch of 2495 # headers, i.e. a bunch of Message objects. 2496 dsn1 = subpart.get_payload(0) 2497 self.assertIsInstance(dsn1, Message) 2498 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') 2499 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2500 # Try a missing one <wink> 2501 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2502 dsn2 = subpart.get_payload(1) 2503 self.assertIsInstance(dsn2, Message) 2504 eq(dsn2['action'], 'failed') 2505 eq(dsn2.get_params(header='original-recipient'), 2506 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) 2507 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2508 # Subpart 3 is the original message 2509 subpart = msg.get_payload(2) 2510 eq(subpart.get_content_type(), 'message/rfc822') 2511 payload = subpart.get_payload() 2512 self.assertIsInstance(payload, list) 2513 eq(len(payload), 1) 2514 subsubpart = payload[0] 2515 self.assertIsInstance(subsubpart, Message) 2516 eq(subsubpart.get_content_type(), 'text/plain') 2517 eq(subsubpart['message-id'], 2518 '<002001c144a6$8752e060$56104586@oxy.edu>') 2519 2520 def test_epilogue(self): 2521 eq = self.ndiffAssertEqual 2522 with openfile('msg_21.txt') as fp: 2523 text = fp.read() 2524 msg = Message() 2525 msg['From'] = 'aperson@dom.ain' 2526 msg['To'] = 'bperson@dom.ain' 2527 msg['Subject'] = 'Test' 2528 msg.preamble = 'MIME message' 2529 msg.epilogue = 'End of MIME message\n' 2530 msg1 = MIMEText('One') 2531 msg2 = MIMEText('Two') 2532 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2533 msg.attach(msg1) 2534 msg.attach(msg2) 2535 sfp = StringIO() 2536 g = Generator(sfp) 2537 g.flatten(msg) 2538 eq(sfp.getvalue(), text) 2539 2540 def test_no_nl_preamble(self): 2541 eq = self.ndiffAssertEqual 2542 msg = Message() 2543 msg['From'] = 'aperson@dom.ain' 2544 msg['To'] = 'bperson@dom.ain' 2545 msg['Subject'] = 'Test' 2546 msg.preamble = 'MIME message' 2547 msg.epilogue = '' 2548 msg1 = MIMEText('One') 2549 msg2 = MIMEText('Two') 2550 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2551 msg.attach(msg1) 2552 msg.attach(msg2) 2553 eq(msg.as_string(), """\ 2554From: aperson@dom.ain 2555To: bperson@dom.ain 2556Subject: Test 2557Content-Type: multipart/mixed; boundary="BOUNDARY" 2558 2559MIME message 2560--BOUNDARY 2561Content-Type: text/plain; charset="us-ascii" 2562MIME-Version: 1.0 2563Content-Transfer-Encoding: 7bit 2564 2565One 2566--BOUNDARY 2567Content-Type: text/plain; charset="us-ascii" 2568MIME-Version: 1.0 2569Content-Transfer-Encoding: 7bit 2570 2571Two 2572--BOUNDARY-- 2573""") 2574 2575 def test_default_type(self): 2576 eq = self.assertEqual 2577 with openfile('msg_30.txt') as fp: 2578 msg = email.message_from_file(fp) 2579 container1 = msg.get_payload(0) 2580 eq(container1.get_default_type(), 'message/rfc822') 2581 eq(container1.get_content_type(), 'message/rfc822') 2582 container2 = msg.get_payload(1) 2583 eq(container2.get_default_type(), 'message/rfc822') 2584 eq(container2.get_content_type(), 'message/rfc822') 2585 container1a = container1.get_payload(0) 2586 eq(container1a.get_default_type(), 'text/plain') 2587 eq(container1a.get_content_type(), 'text/plain') 2588 container2a = container2.get_payload(0) 2589 eq(container2a.get_default_type(), 'text/plain') 2590 eq(container2a.get_content_type(), 'text/plain') 2591 2592 def test_default_type_with_explicit_container_type(self): 2593 eq = self.assertEqual 2594 with openfile('msg_28.txt') as fp: 2595 msg = email.message_from_file(fp) 2596 container1 = msg.get_payload(0) 2597 eq(container1.get_default_type(), 'message/rfc822') 2598 eq(container1.get_content_type(), 'message/rfc822') 2599 container2 = msg.get_payload(1) 2600 eq(container2.get_default_type(), 'message/rfc822') 2601 eq(container2.get_content_type(), 'message/rfc822') 2602 container1a = container1.get_payload(0) 2603 eq(container1a.get_default_type(), 'text/plain') 2604 eq(container1a.get_content_type(), 'text/plain') 2605 container2a = container2.get_payload(0) 2606 eq(container2a.get_default_type(), 'text/plain') 2607 eq(container2a.get_content_type(), 'text/plain') 2608 2609 def test_default_type_non_parsed(self): 2610 eq = self.assertEqual 2611 neq = self.ndiffAssertEqual 2612 # Set up container 2613 container = MIMEMultipart('digest', 'BOUNDARY') 2614 container.epilogue = '' 2615 # Set up subparts 2616 subpart1a = MIMEText('message 1\n') 2617 subpart2a = MIMEText('message 2\n') 2618 subpart1 = MIMEMessage(subpart1a) 2619 subpart2 = MIMEMessage(subpart2a) 2620 container.attach(subpart1) 2621 container.attach(subpart2) 2622 eq(subpart1.get_content_type(), 'message/rfc822') 2623 eq(subpart1.get_default_type(), 'message/rfc822') 2624 eq(subpart2.get_content_type(), 'message/rfc822') 2625 eq(subpart2.get_default_type(), 'message/rfc822') 2626 neq(container.as_string(0), '''\ 2627Content-Type: multipart/digest; boundary="BOUNDARY" 2628MIME-Version: 1.0 2629 2630--BOUNDARY 2631Content-Type: message/rfc822 2632MIME-Version: 1.0 2633 2634Content-Type: text/plain; charset="us-ascii" 2635MIME-Version: 1.0 2636Content-Transfer-Encoding: 7bit 2637 2638message 1 2639 2640--BOUNDARY 2641Content-Type: message/rfc822 2642MIME-Version: 1.0 2643 2644Content-Type: text/plain; charset="us-ascii" 2645MIME-Version: 1.0 2646Content-Transfer-Encoding: 7bit 2647 2648message 2 2649 2650--BOUNDARY-- 2651''') 2652 del subpart1['content-type'] 2653 del subpart1['mime-version'] 2654 del subpart2['content-type'] 2655 del subpart2['mime-version'] 2656 eq(subpart1.get_content_type(), 'message/rfc822') 2657 eq(subpart1.get_default_type(), 'message/rfc822') 2658 eq(subpart2.get_content_type(), 'message/rfc822') 2659 eq(subpart2.get_default_type(), 'message/rfc822') 2660 neq(container.as_string(0), '''\ 2661Content-Type: multipart/digest; boundary="BOUNDARY" 2662MIME-Version: 1.0 2663 2664--BOUNDARY 2665 2666Content-Type: text/plain; charset="us-ascii" 2667MIME-Version: 1.0 2668Content-Transfer-Encoding: 7bit 2669 2670message 1 2671 2672--BOUNDARY 2673 2674Content-Type: text/plain; charset="us-ascii" 2675MIME-Version: 1.0 2676Content-Transfer-Encoding: 7bit 2677 2678message 2 2679 2680--BOUNDARY-- 2681''') 2682 2683 def test_mime_attachments_in_constructor(self): 2684 eq = self.assertEqual 2685 text1 = MIMEText('') 2686 text2 = MIMEText('') 2687 msg = MIMEMultipart(_subparts=(text1, text2)) 2688 eq(len(msg.get_payload()), 2) 2689 eq(msg.get_payload(0), text1) 2690 eq(msg.get_payload(1), text2) 2691 2692 def test_default_multipart_constructor(self): 2693 msg = MIMEMultipart() 2694 self.assertTrue(msg.is_multipart()) 2695 2696 def test_multipart_default_policy(self): 2697 msg = MIMEMultipart() 2698 msg['To'] = 'a@b.com' 2699 msg['To'] = 'c@d.com' 2700 self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com']) 2701 2702 def test_multipart_custom_policy(self): 2703 msg = MIMEMultipart(policy=email.policy.default) 2704 msg['To'] = 'a@b.com' 2705 with self.assertRaises(ValueError) as cm: 2706 msg['To'] = 'c@d.com' 2707 self.assertEqual(str(cm.exception), 2708 'There may be at most 1 To headers in a message') 2709 2710# A general test of parser->model->generator idempotency. IOW, read a message 2711# in, parse it into a message object tree, then without touching the tree, 2712# regenerate the plain text. The original text and the transformed text 2713# should be identical. Note: that we ignore the Unix-From since that may 2714# contain a changed date. 2715class TestIdempotent(TestEmailBase): 2716 2717 linesep = '\n' 2718 2719 def _msgobj(self, filename): 2720 with openfile(filename) as fp: 2721 data = fp.read() 2722 msg = email.message_from_string(data) 2723 return msg, data 2724 2725 def _idempotent(self, msg, text, unixfrom=False): 2726 eq = self.ndiffAssertEqual 2727 s = StringIO() 2728 g = Generator(s, maxheaderlen=0) 2729 g.flatten(msg, unixfrom=unixfrom) 2730 eq(text, s.getvalue()) 2731 2732 def test_parse_text_message(self): 2733 eq = self.assertEqual 2734 msg, text = self._msgobj('msg_01.txt') 2735 eq(msg.get_content_type(), 'text/plain') 2736 eq(msg.get_content_maintype(), 'text') 2737 eq(msg.get_content_subtype(), 'plain') 2738 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2739 eq(msg.get_param('charset'), 'us-ascii') 2740 eq(msg.preamble, None) 2741 eq(msg.epilogue, None) 2742 self._idempotent(msg, text) 2743 2744 def test_parse_untyped_message(self): 2745 eq = self.assertEqual 2746 msg, text = self._msgobj('msg_03.txt') 2747 eq(msg.get_content_type(), 'text/plain') 2748 eq(msg.get_params(), None) 2749 eq(msg.get_param('charset'), None) 2750 self._idempotent(msg, text) 2751 2752 def test_simple_multipart(self): 2753 msg, text = self._msgobj('msg_04.txt') 2754 self._idempotent(msg, text) 2755 2756 def test_MIME_digest(self): 2757 msg, text = self._msgobj('msg_02.txt') 2758 self._idempotent(msg, text) 2759 2760 def test_long_header(self): 2761 msg, text = self._msgobj('msg_27.txt') 2762 self._idempotent(msg, text) 2763 2764 def test_MIME_digest_with_part_headers(self): 2765 msg, text = self._msgobj('msg_28.txt') 2766 self._idempotent(msg, text) 2767 2768 def test_mixed_with_image(self): 2769 msg, text = self._msgobj('msg_06.txt') 2770 self._idempotent(msg, text) 2771 2772 def test_multipart_report(self): 2773 msg, text = self._msgobj('msg_05.txt') 2774 self._idempotent(msg, text) 2775 2776 def test_dsn(self): 2777 msg, text = self._msgobj('msg_16.txt') 2778 self._idempotent(msg, text) 2779 2780 def test_preamble_epilogue(self): 2781 msg, text = self._msgobj('msg_21.txt') 2782 self._idempotent(msg, text) 2783 2784 def test_multipart_one_part(self): 2785 msg, text = self._msgobj('msg_23.txt') 2786 self._idempotent(msg, text) 2787 2788 def test_multipart_no_parts(self): 2789 msg, text = self._msgobj('msg_24.txt') 2790 self._idempotent(msg, text) 2791 2792 def test_no_start_boundary(self): 2793 msg, text = self._msgobj('msg_31.txt') 2794 self._idempotent(msg, text) 2795 2796 def test_rfc2231_charset(self): 2797 msg, text = self._msgobj('msg_32.txt') 2798 self._idempotent(msg, text) 2799 2800 def test_more_rfc2231_parameters(self): 2801 msg, text = self._msgobj('msg_33.txt') 2802 self._idempotent(msg, text) 2803 2804 def test_text_plain_in_a_multipart_digest(self): 2805 msg, text = self._msgobj('msg_34.txt') 2806 self._idempotent(msg, text) 2807 2808 def test_nested_multipart_mixeds(self): 2809 msg, text = self._msgobj('msg_12a.txt') 2810 self._idempotent(msg, text) 2811 2812 def test_message_external_body_idempotent(self): 2813 msg, text = self._msgobj('msg_36.txt') 2814 self._idempotent(msg, text) 2815 2816 def test_message_delivery_status(self): 2817 msg, text = self._msgobj('msg_43.txt') 2818 self._idempotent(msg, text, unixfrom=True) 2819 2820 def test_message_signed_idempotent(self): 2821 msg, text = self._msgobj('msg_45.txt') 2822 self._idempotent(msg, text) 2823 2824 def test_content_type(self): 2825 eq = self.assertEqual 2826 # Get a message object and reset the seek pointer for other tests 2827 msg, text = self._msgobj('msg_05.txt') 2828 eq(msg.get_content_type(), 'multipart/report') 2829 # Test the Content-Type: parameters 2830 params = {} 2831 for pk, pv in msg.get_params(): 2832 params[pk] = pv 2833 eq(params['report-type'], 'delivery-status') 2834 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2835 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2836 eq(msg.epilogue, self.linesep) 2837 eq(len(msg.get_payload()), 3) 2838 # Make sure the subparts are what we expect 2839 msg1 = msg.get_payload(0) 2840 eq(msg1.get_content_type(), 'text/plain') 2841 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2842 msg2 = msg.get_payload(1) 2843 eq(msg2.get_content_type(), 'text/plain') 2844 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2845 msg3 = msg.get_payload(2) 2846 eq(msg3.get_content_type(), 'message/rfc822') 2847 self.assertIsInstance(msg3, Message) 2848 payload = msg3.get_payload() 2849 self.assertIsInstance(payload, list) 2850 eq(len(payload), 1) 2851 msg4 = payload[0] 2852 self.assertIsInstance(msg4, Message) 2853 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2854 2855 def test_parser(self): 2856 eq = self.assertEqual 2857 msg, text = self._msgobj('msg_06.txt') 2858 # Check some of the outer headers 2859 eq(msg.get_content_type(), 'message/rfc822') 2860 # Make sure the payload is a list of exactly one sub-Message, and that 2861 # that submessage has a type of text/plain 2862 payload = msg.get_payload() 2863 self.assertIsInstance(payload, list) 2864 eq(len(payload), 1) 2865 msg1 = payload[0] 2866 self.assertIsInstance(msg1, Message) 2867 eq(msg1.get_content_type(), 'text/plain') 2868 self.assertIsInstance(msg1.get_payload(), str) 2869 eq(msg1.get_payload(), self.linesep) 2870 2871 2872 2873# Test various other bits of the package's functionality 2874class TestMiscellaneous(TestEmailBase): 2875 def test_message_from_string(self): 2876 with openfile('msg_01.txt') as fp: 2877 text = fp.read() 2878 msg = email.message_from_string(text) 2879 s = StringIO() 2880 # Don't wrap/continue long headers since we're trying to test 2881 # idempotency. 2882 g = Generator(s, maxheaderlen=0) 2883 g.flatten(msg) 2884 self.assertEqual(text, s.getvalue()) 2885 2886 def test_message_from_file(self): 2887 with openfile('msg_01.txt') as fp: 2888 text = fp.read() 2889 fp.seek(0) 2890 msg = email.message_from_file(fp) 2891 s = StringIO() 2892 # Don't wrap/continue long headers since we're trying to test 2893 # idempotency. 2894 g = Generator(s, maxheaderlen=0) 2895 g.flatten(msg) 2896 self.assertEqual(text, s.getvalue()) 2897 2898 def test_message_from_string_with_class(self): 2899 with openfile('msg_01.txt') as fp: 2900 text = fp.read() 2901 2902 # Create a subclass 2903 class MyMessage(Message): 2904 pass 2905 2906 msg = email.message_from_string(text, MyMessage) 2907 self.assertIsInstance(msg, MyMessage) 2908 # Try something more complicated 2909 with openfile('msg_02.txt') as fp: 2910 text = fp.read() 2911 msg = email.message_from_string(text, MyMessage) 2912 for subpart in msg.walk(): 2913 self.assertIsInstance(subpart, MyMessage) 2914 2915 def test_message_from_file_with_class(self): 2916 # Create a subclass 2917 class MyMessage(Message): 2918 pass 2919 2920 with openfile('msg_01.txt') as fp: 2921 msg = email.message_from_file(fp, MyMessage) 2922 self.assertIsInstance(msg, MyMessage) 2923 # Try something more complicated 2924 with openfile('msg_02.txt') as fp: 2925 msg = email.message_from_file(fp, MyMessage) 2926 for subpart in msg.walk(): 2927 self.assertIsInstance(subpart, MyMessage) 2928 2929 def test_custom_message_does_not_require_arguments(self): 2930 class MyMessage(Message): 2931 def __init__(self): 2932 super().__init__() 2933 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 2934 self.assertIsInstance(msg, MyMessage) 2935 2936 def test__all__(self): 2937 module = __import__('email') 2938 self.assertEqual(sorted(module.__all__), [ 2939 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 2940 'generator', 'header', 'iterators', 'message', 2941 'message_from_binary_file', 'message_from_bytes', 2942 'message_from_file', 'message_from_string', 'mime', 'parser', 2943 'quoprimime', 'utils', 2944 ]) 2945 2946 def test_formatdate(self): 2947 now = time.time() 2948 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 2949 time.gmtime(now)[:6]) 2950 2951 def test_formatdate_localtime(self): 2952 now = time.time() 2953 self.assertEqual( 2954 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 2955 time.localtime(now)[:6]) 2956 2957 def test_formatdate_usegmt(self): 2958 now = time.time() 2959 self.assertEqual( 2960 utils.formatdate(now, localtime=False), 2961 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 2962 self.assertEqual( 2963 utils.formatdate(now, localtime=False, usegmt=True), 2964 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 2965 2966 # parsedate and parsedate_tz will become deprecated interfaces someday 2967 def test_parsedate_returns_None_for_invalid_strings(self): 2968 self.assertIsNone(utils.parsedate('')) 2969 self.assertIsNone(utils.parsedate_tz('')) 2970 self.assertIsNone(utils.parsedate('0')) 2971 self.assertIsNone(utils.parsedate_tz('0')) 2972 self.assertIsNone(utils.parsedate('A Complete Waste of Time')) 2973 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time')) 2974 # Not a part of the spec but, but this has historically worked: 2975 self.assertIsNone(utils.parsedate(None)) 2976 self.assertIsNone(utils.parsedate_tz(None)) 2977 2978 def test_parsedate_compact(self): 2979 # The FWS after the comma is optional 2980 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), 2981 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) 2982 2983 def test_parsedate_no_dayofweek(self): 2984 eq = self.assertEqual 2985 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), 2986 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) 2987 2988 def test_parsedate_compact_no_dayofweek(self): 2989 eq = self.assertEqual 2990 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 2991 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 2992 2993 def test_parsedate_no_space_before_positive_offset(self): 2994 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 2995 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 2996 2997 def test_parsedate_no_space_before_negative_offset(self): 2998 # Issue 1155362: we already handled '+' for this case. 2999 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3000 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3001 3002 3003 def test_parsedate_accepts_time_with_dots(self): 3004 eq = self.assertEqual 3005 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3006 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3007 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3008 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3009 3010 def test_parsedate_acceptable_to_time_functions(self): 3011 eq = self.assertEqual 3012 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3013 t = int(time.mktime(timetup)) 3014 eq(time.localtime(t)[:6], timetup[:6]) 3015 eq(int(time.strftime('%Y', timetup)), 2003) 3016 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3017 t = int(time.mktime(timetup[:9])) 3018 eq(time.localtime(t)[:6], timetup[:6]) 3019 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3020 3021 def test_mktime_tz(self): 3022 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3023 -1, -1, -1, 0)), 0) 3024 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3025 -1, -1, -1, 1234)), -1234) 3026 3027 def test_parsedate_y2k(self): 3028 """Test for parsing a date with a two-digit year. 3029 3030 Parsing a date with a two-digit year should return the correct 3031 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3032 obsoletes RFC822) requires four-digit years. 3033 3034 """ 3035 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3036 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3037 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3038 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3039 3040 def test_parseaddr_empty(self): 3041 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3042 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3043 3044 def test_parseaddr_multiple_domains(self): 3045 self.assertEqual( 3046 utils.parseaddr('a@b@c'), 3047 ('', '') 3048 ) 3049 self.assertEqual( 3050 utils.parseaddr('a@b.c@c'), 3051 ('', '') 3052 ) 3053 self.assertEqual( 3054 utils.parseaddr('a@172.17.0.1@c'), 3055 ('', '') 3056 ) 3057 3058 def test_noquote_dump(self): 3059 self.assertEqual( 3060 utils.formataddr(('A Silly Person', 'person@dom.ain')), 3061 'A Silly Person <person@dom.ain>') 3062 3063 def test_escape_dump(self): 3064 self.assertEqual( 3065 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), 3066 r'"A (Very) Silly Person" <person@dom.ain>') 3067 self.assertEqual( 3068 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'), 3069 ('A (Very) Silly Person', 'person@dom.ain')) 3070 a = r'A \(Special\) Person' 3071 b = 'person@dom.ain' 3072 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3073 3074 def test_escape_backslashes(self): 3075 self.assertEqual( 3076 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')), 3077 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') 3078 a = r'Arthur \Backslash\ Foobar' 3079 b = 'person@dom.ain' 3080 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3081 3082 def test_quotes_unicode_names(self): 3083 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3084 name = "H\u00e4ns W\u00fcrst" 3085 addr = 'person@dom.ain' 3086 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3087 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" 3088 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3089 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3090 latin1_quopri) 3091 3092 def test_accepts_any_charset_like_object(self): 3093 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3094 name = "H\u00e4ns W\u00fcrst" 3095 addr = 'person@dom.ain' 3096 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3097 foobar = "FOOBAR" 3098 class CharsetMock: 3099 def header_encode(self, string): 3100 return foobar 3101 mock = CharsetMock() 3102 mock_expected = "%s <%s>" % (foobar, addr) 3103 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3104 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3105 utf8_base64) 3106 3107 def test_invalid_charset_like_object_raises_error(self): 3108 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3109 name = "H\u00e4ns W\u00fcrst" 3110 addr = 'person@dom.ain' 3111 # An object without a header_encode method: 3112 bad_charset = object() 3113 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3114 bad_charset) 3115 3116 def test_unicode_address_raises_error(self): 3117 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3118 addr = 'pers\u00f6n@dom.in' 3119 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3120 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3121 3122 def test_name_with_dot(self): 3123 x = 'John X. Doe <jxd@example.com>' 3124 y = '"John X. Doe" <jxd@example.com>' 3125 a, b = ('John X. Doe', 'jxd@example.com') 3126 self.assertEqual(utils.parseaddr(x), (a, b)) 3127 self.assertEqual(utils.parseaddr(y), (a, b)) 3128 # formataddr() quotes the name if there's a dot in it 3129 self.assertEqual(utils.formataddr((a, b)), y) 3130 3131 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3132 # issue 10005. Note that in the third test the second pair of 3133 # backslashes is not actually a quoted pair because it is not inside a 3134 # comment or quoted string: the address being parsed has a quoted 3135 # string containing a quoted backslash, followed by 'example' and two 3136 # backslashes, followed by another quoted string containing a space and 3137 # the word 'example'. parseaddr copies those two backslashes 3138 # literally. Per rfc5322 this is not technically correct since a \ may 3139 # not appear in an address outside of a quoted string. It is probably 3140 # a sensible Postel interpretation, though. 3141 eq = self.assertEqual 3142 eq(utils.parseaddr('""example" example"@example.com'), 3143 ('', '""example" example"@example.com')) 3144 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3145 ('', '"\\"example\\" example"@example.com')) 3146 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3147 ('', '"\\\\"example\\\\" example"@example.com')) 3148 3149 def test_parseaddr_preserves_spaces_in_local_part(self): 3150 # issue 9286. A normal RFC5322 local part should not contain any 3151 # folding white space, but legacy local parts can (they are a sequence 3152 # of atoms, not dotatoms). On the other hand we strip whitespace from 3153 # before the @ and around dots, on the assumption that the whitespace 3154 # around the punctuation is a mistake in what would otherwise be 3155 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3156 self.assertEqual(('', "merwok wok@xample.com"), 3157 utils.parseaddr("merwok wok@xample.com")) 3158 self.assertEqual(('', "merwok wok@xample.com"), 3159 utils.parseaddr("merwok wok@xample.com")) 3160 self.assertEqual(('', "merwok wok@xample.com"), 3161 utils.parseaddr(" merwok wok @xample.com")) 3162 self.assertEqual(('', 'merwok"wok" wok@xample.com'), 3163 utils.parseaddr('merwok"wok" wok@xample.com')) 3164 self.assertEqual(('', 'merwok.wok.wok@xample.com'), 3165 utils.parseaddr('merwok. wok . wok@xample.com')) 3166 3167 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3168 addr = ("'foo@example.com' (foo@example.com)", 3169 'foo@example.com') 3170 addrstr = ('"\'foo@example.com\' ' 3171 '(foo@example.com)" <foo@example.com>') 3172 self.assertEqual(utils.parseaddr(addrstr), addr) 3173 self.assertEqual(utils.formataddr(addr), addrstr) 3174 3175 3176 def test_multiline_from_comment(self): 3177 x = """\ 3178Foo 3179\tBar <foo@example.com>""" 3180 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) 3181 3182 def test_quote_dump(self): 3183 self.assertEqual( 3184 utils.formataddr(('A Silly; Person', 'person@dom.ain')), 3185 r'"A Silly; Person" <person@dom.ain>') 3186 3187 def test_charset_richcomparisons(self): 3188 eq = self.assertEqual 3189 ne = self.assertNotEqual 3190 cset1 = Charset() 3191 cset2 = Charset() 3192 eq(cset1, 'us-ascii') 3193 eq(cset1, 'US-ASCII') 3194 eq(cset1, 'Us-AsCiI') 3195 eq('us-ascii', cset1) 3196 eq('US-ASCII', cset1) 3197 eq('Us-AsCiI', cset1) 3198 ne(cset1, 'usascii') 3199 ne(cset1, 'USASCII') 3200 ne(cset1, 'UsAsCiI') 3201 ne('usascii', cset1) 3202 ne('USASCII', cset1) 3203 ne('UsAsCiI', cset1) 3204 eq(cset1, cset2) 3205 eq(cset2, cset1) 3206 3207 def test_getaddresses(self): 3208 eq = self.assertEqual 3209 eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 3210 'Bud Person <bperson@dom.ain>']), 3211 [('Al Person', 'aperson@dom.ain'), 3212 ('Bud Person', 'bperson@dom.ain')]) 3213 3214 def test_getaddresses_nasty(self): 3215 eq = self.assertEqual 3216 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3217 eq(utils.getaddresses( 3218 ['[]*-- =~$']), 3219 [('', ''), ('', ''), ('', '*--')]) 3220 eq(utils.getaddresses( 3221 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), 3222 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) 3223 3224 def test_getaddresses_embedded_comment(self): 3225 """Test proper handling of a nested comment""" 3226 eq = self.assertEqual 3227 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) 3228 eq(addrs[0][1], 'foo@bar.com') 3229 3230 def test_make_msgid_collisions(self): 3231 # Test make_msgid uniqueness, even with multiple threads 3232 class MsgidsThread(Thread): 3233 def run(self): 3234 # generate msgids for 3 seconds 3235 self.msgids = [] 3236 append = self.msgids.append 3237 make_msgid = utils.make_msgid 3238 clock = time.monotonic 3239 tfin = clock() + 3.0 3240 while clock() < tfin: 3241 append(make_msgid(domain='testdomain-string')) 3242 3243 threads = [MsgidsThread() for i in range(5)] 3244 with start_threads(threads): 3245 pass 3246 all_ids = sum([t.msgids for t in threads], []) 3247 self.assertEqual(len(set(all_ids)), len(all_ids)) 3248 3249 def test_utils_quote_unquote(self): 3250 eq = self.assertEqual 3251 msg = Message() 3252 msg.add_header('content-disposition', 'attachment', 3253 filename='foo\\wacky"name') 3254 eq(msg.get_filename(), 'foo\\wacky"name') 3255 3256 def test_get_body_encoding_with_bogus_charset(self): 3257 charset = Charset('not a charset') 3258 self.assertEqual(charset.get_body_encoding(), 'base64') 3259 3260 def test_get_body_encoding_with_uppercase_charset(self): 3261 eq = self.assertEqual 3262 msg = Message() 3263 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3264 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3265 charsets = msg.get_charsets() 3266 eq(len(charsets), 1) 3267 eq(charsets[0], 'utf-8') 3268 charset = Charset(charsets[0]) 3269 eq(charset.get_body_encoding(), 'base64') 3270 msg.set_payload(b'hello world', charset=charset) 3271 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3272 eq(msg.get_payload(decode=True), b'hello world') 3273 eq(msg['content-transfer-encoding'], 'base64') 3274 # Try another one 3275 msg = Message() 3276 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3277 charsets = msg.get_charsets() 3278 eq(len(charsets), 1) 3279 eq(charsets[0], 'us-ascii') 3280 charset = Charset(charsets[0]) 3281 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3282 msg.set_payload('hello world', charset=charset) 3283 eq(msg.get_payload(), 'hello world') 3284 eq(msg['content-transfer-encoding'], '7bit') 3285 3286 def test_charsets_case_insensitive(self): 3287 lc = Charset('us-ascii') 3288 uc = Charset('US-ASCII') 3289 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3290 3291 def test_partial_falls_inside_message_delivery_status(self): 3292 eq = self.ndiffAssertEqual 3293 # The Parser interface provides chunks of data to FeedParser in 8192 3294 # byte gulps. SF bug #1076485 found one of those chunks inside 3295 # message/delivery-status header block, which triggered an 3296 # unreadline() of NeedMoreData. 3297 msg = self._msgobj('msg_43.txt') 3298 sfp = StringIO() 3299 iterators._structure(msg, sfp) 3300 eq(sfp.getvalue(), """\ 3301multipart/report 3302 text/plain 3303 message/delivery-status 3304 text/plain 3305 text/plain 3306 text/plain 3307 text/plain 3308 text/plain 3309 text/plain 3310 text/plain 3311 text/plain 3312 text/plain 3313 text/plain 3314 text/plain 3315 text/plain 3316 text/plain 3317 text/plain 3318 text/plain 3319 text/plain 3320 text/plain 3321 text/plain 3322 text/plain 3323 text/plain 3324 text/plain 3325 text/plain 3326 text/plain 3327 text/plain 3328 text/plain 3329 text/plain 3330 text/rfc822-headers 3331""") 3332 3333 def test_make_msgid_domain(self): 3334 self.assertEqual( 3335 email.utils.make_msgid(domain='testdomain-string')[-19:], 3336 '@testdomain-string>') 3337 3338 def test_make_msgid_idstring(self): 3339 self.assertEqual( 3340 email.utils.make_msgid(idstring='test-idstring', 3341 domain='testdomain-string')[-33:], 3342 '.test-idstring@testdomain-string>') 3343 3344 def test_make_msgid_default_domain(self): 3345 self.assertTrue( 3346 email.utils.make_msgid().endswith( 3347 '@' + getfqdn() + '>')) 3348 3349 def test_Generator_linend(self): 3350 # Issue 14645. 3351 with openfile('msg_26.txt', newline='\n') as f: 3352 msgtxt = f.read() 3353 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3354 msg = email.message_from_string(msgtxt) 3355 s = StringIO() 3356 g = email.generator.Generator(s) 3357 g.flatten(msg) 3358 self.assertEqual(s.getvalue(), msgtxt_nl) 3359 3360 def test_BytesGenerator_linend(self): 3361 # Issue 14645. 3362 with openfile('msg_26.txt', newline='\n') as f: 3363 msgtxt = f.read() 3364 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3365 msg = email.message_from_string(msgtxt_nl) 3366 s = BytesIO() 3367 g = email.generator.BytesGenerator(s) 3368 g.flatten(msg, linesep='\r\n') 3369 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3370 3371 def test_BytesGenerator_linend_with_non_ascii(self): 3372 # Issue 14645. 3373 with openfile('msg_26.txt', 'rb') as f: 3374 msgtxt = f.read() 3375 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3376 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3377 msg = email.message_from_bytes(msgtxt_nl) 3378 s = BytesIO() 3379 g = email.generator.BytesGenerator(s) 3380 g.flatten(msg, linesep='\r\n') 3381 self.assertEqual(s.getvalue(), msgtxt) 3382 3383 def test_mime_classes_policy_argument(self): 3384 with openfile('audiotest.au', 'rb') as fp: 3385 audiodata = fp.read() 3386 with openfile('PyBanner048.gif', 'rb') as fp: 3387 bindata = fp.read() 3388 classes = [ 3389 (MIMEApplication, ('',)), 3390 (MIMEAudio, (audiodata,)), 3391 (MIMEImage, (bindata,)), 3392 (MIMEMessage, (Message(),)), 3393 (MIMENonMultipart, ('multipart', 'mixed')), 3394 (MIMEText, ('',)), 3395 ] 3396 for cls, constructor in classes: 3397 with self.subTest(cls=cls.__name__, policy='compat32'): 3398 m = cls(*constructor) 3399 self.assertIs(m.policy, email.policy.compat32) 3400 with self.subTest(cls=cls.__name__, policy='default'): 3401 m = cls(*constructor, policy=email.policy.default) 3402 self.assertIs(m.policy, email.policy.default) 3403 3404 3405# Test the iterator/generators 3406class TestIterators(TestEmailBase): 3407 def test_body_line_iterator(self): 3408 eq = self.assertEqual 3409 neq = self.ndiffAssertEqual 3410 # First a simple non-multipart message 3411 msg = self._msgobj('msg_01.txt') 3412 it = iterators.body_line_iterator(msg) 3413 lines = list(it) 3414 eq(len(lines), 6) 3415 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3416 # Now a more complicated multipart 3417 msg = self._msgobj('msg_02.txt') 3418 it = iterators.body_line_iterator(msg) 3419 lines = list(it) 3420 eq(len(lines), 43) 3421 with openfile('msg_19.txt') as fp: 3422 neq(EMPTYSTRING.join(lines), fp.read()) 3423 3424 def test_typed_subpart_iterator(self): 3425 eq = self.assertEqual 3426 msg = self._msgobj('msg_04.txt') 3427 it = iterators.typed_subpart_iterator(msg, 'text') 3428 lines = [] 3429 subparts = 0 3430 for subpart in it: 3431 subparts += 1 3432 lines.append(subpart.get_payload()) 3433 eq(subparts, 2) 3434 eq(EMPTYSTRING.join(lines), """\ 3435a simple kind of mirror 3436to reflect upon our own 3437a simple kind of mirror 3438to reflect upon our own 3439""") 3440 3441 def test_typed_subpart_iterator_default_type(self): 3442 eq = self.assertEqual 3443 msg = self._msgobj('msg_03.txt') 3444 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3445 lines = [] 3446 subparts = 0 3447 for subpart in it: 3448 subparts += 1 3449 lines.append(subpart.get_payload()) 3450 eq(subparts, 1) 3451 eq(EMPTYSTRING.join(lines), """\ 3452 3453Hi, 3454 3455Do you like this message? 3456 3457-Me 3458""") 3459 3460 def test_pushCR_LF(self): 3461 '''FeedParser BufferedSubFile.push() assumed it received complete 3462 line endings. A CR ending one push() followed by a LF starting 3463 the next push() added an empty line. 3464 ''' 3465 imt = [ 3466 ("a\r \n", 2), 3467 ("b", 0), 3468 ("c\n", 1), 3469 ("", 0), 3470 ("d\r\n", 1), 3471 ("e\r", 0), 3472 ("\nf", 1), 3473 ("\r\n", 1), 3474 ] 3475 from email.feedparser import BufferedSubFile, NeedMoreData 3476 bsf = BufferedSubFile() 3477 om = [] 3478 nt = 0 3479 for il, n in imt: 3480 bsf.push(il) 3481 nt += n 3482 n1 = 0 3483 for ol in iter(bsf.readline, NeedMoreData): 3484 om.append(ol) 3485 n1 += 1 3486 self.assertEqual(n, n1) 3487 self.assertEqual(len(om), nt) 3488 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3489 3490 def test_push_random(self): 3491 from email.feedparser import BufferedSubFile, NeedMoreData 3492 3493 n = 10000 3494 chunksize = 5 3495 chars = 'abcd \t\r\n' 3496 3497 s = ''.join(choice(chars) for i in range(n)) + '\n' 3498 target = s.splitlines(True) 3499 3500 bsf = BufferedSubFile() 3501 lines = [] 3502 for i in range(0, len(s), chunksize): 3503 chunk = s[i:i+chunksize] 3504 bsf.push(chunk) 3505 lines.extend(iter(bsf.readline, NeedMoreData)) 3506 self.assertEqual(lines, target) 3507 3508 3509class TestFeedParsers(TestEmailBase): 3510 3511 def parse(self, chunks): 3512 feedparser = FeedParser() 3513 for chunk in chunks: 3514 feedparser.feed(chunk) 3515 return feedparser.close() 3516 3517 def test_empty_header_name_handled(self): 3518 # Issue 19996 3519 msg = self.parse("First: val\n: bad\nSecond: val") 3520 self.assertEqual(msg['First'], 'val') 3521 self.assertEqual(msg['Second'], 'val') 3522 3523 def test_newlines(self): 3524 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3525 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3526 m = self.parse(['a:\nb:\rc:\r\nd:']) 3527 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3528 m = self.parse(['a:\rb', 'c:\n']) 3529 self.assertEqual(m.keys(), ['a', 'bc']) 3530 m = self.parse(['a:\r', 'b:\n']) 3531 self.assertEqual(m.keys(), ['a', 'b']) 3532 m = self.parse(['a:\r', '\nb:\n']) 3533 self.assertEqual(m.keys(), ['a', 'b']) 3534 3535 # Only CR and LF should break header fields 3536 m = self.parse(['a:\x85b:\u2028c:\n']) 3537 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3538 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3539 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3540 3541 def test_long_lines(self): 3542 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3543 M, N = 1000, 20000 3544 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3545 self.assertEqual(m.items(), [('a', 'b')]) 3546 self.assertEqual(m.get_payload(), 'x'*M*N) 3547 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3548 self.assertEqual(m.items(), [('a', 'b')]) 3549 self.assertEqual(m.get_payload(), 'x'*M*N) 3550 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3551 self.assertEqual(m.items(), [('a', 'b')]) 3552 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3553 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3554 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3555 3556 3557class TestParsers(TestEmailBase): 3558 3559 def test_header_parser(self): 3560 eq = self.assertEqual 3561 # Parse only the headers of a complex multipart MIME document 3562 with openfile('msg_02.txt') as fp: 3563 msg = HeaderParser().parse(fp) 3564 eq(msg['from'], 'ppp-request@zzz.org') 3565 eq(msg['to'], 'ppp@zzz.org') 3566 eq(msg.get_content_type(), 'multipart/mixed') 3567 self.assertFalse(msg.is_multipart()) 3568 self.assertIsInstance(msg.get_payload(), str) 3569 3570 def test_bytes_header_parser(self): 3571 eq = self.assertEqual 3572 # Parse only the headers of a complex multipart MIME document 3573 with openfile('msg_02.txt', 'rb') as fp: 3574 msg = email.parser.BytesHeaderParser().parse(fp) 3575 eq(msg['from'], 'ppp-request@zzz.org') 3576 eq(msg['to'], 'ppp@zzz.org') 3577 eq(msg.get_content_type(), 'multipart/mixed') 3578 self.assertFalse(msg.is_multipart()) 3579 self.assertIsInstance(msg.get_payload(), str) 3580 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3581 3582 def test_bytes_parser_does_not_close_file(self): 3583 with openfile('msg_02.txt', 'rb') as fp: 3584 email.parser.BytesParser().parse(fp) 3585 self.assertFalse(fp.closed) 3586 3587 def test_bytes_parser_on_exception_does_not_close_file(self): 3588 with openfile('msg_15.txt', 'rb') as fp: 3589 bytesParser = email.parser.BytesParser 3590 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3591 bytesParser(policy=email.policy.strict).parse, 3592 fp) 3593 self.assertFalse(fp.closed) 3594 3595 def test_parser_does_not_close_file(self): 3596 with openfile('msg_02.txt', 'r') as fp: 3597 email.parser.Parser().parse(fp) 3598 self.assertFalse(fp.closed) 3599 3600 def test_parser_on_exception_does_not_close_file(self): 3601 with openfile('msg_15.txt', 'r') as fp: 3602 parser = email.parser.Parser 3603 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3604 parser(policy=email.policy.strict).parse, fp) 3605 self.assertFalse(fp.closed) 3606 3607 def test_whitespace_continuation(self): 3608 eq = self.assertEqual 3609 # This message contains a line after the Subject: header that has only 3610 # whitespace, but it is not empty! 3611 msg = email.message_from_string("""\ 3612From: aperson@dom.ain 3613To: bperson@dom.ain 3614Subject: the next line has a space on it 3615\x20 3616Date: Mon, 8 Apr 2002 15:09:19 -0400 3617Message-ID: spam 3618 3619Here's the message body 3620""") 3621 eq(msg['subject'], 'the next line has a space on it\n ') 3622 eq(msg['message-id'], 'spam') 3623 eq(msg.get_payload(), "Here's the message body\n") 3624 3625 def test_whitespace_continuation_last_header(self): 3626 eq = self.assertEqual 3627 # Like the previous test, but the subject line is the last 3628 # header. 3629 msg = email.message_from_string("""\ 3630From: aperson@dom.ain 3631To: bperson@dom.ain 3632Date: Mon, 8 Apr 2002 15:09:19 -0400 3633Message-ID: spam 3634Subject: the next line has a space on it 3635\x20 3636 3637Here's the message body 3638""") 3639 eq(msg['subject'], 'the next line has a space on it\n ') 3640 eq(msg['message-id'], 'spam') 3641 eq(msg.get_payload(), "Here's the message body\n") 3642 3643 def test_crlf_separation(self): 3644 eq = self.assertEqual 3645 with openfile('msg_26.txt', newline='\n') as fp: 3646 msg = Parser().parse(fp) 3647 eq(len(msg.get_payload()), 2) 3648 part1 = msg.get_payload(0) 3649 eq(part1.get_content_type(), 'text/plain') 3650 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3651 part2 = msg.get_payload(1) 3652 eq(part2.get_content_type(), 'application/riscos') 3653 3654 def test_crlf_flatten(self): 3655 # Using newline='\n' preserves the crlfs in this input file. 3656 with openfile('msg_26.txt', newline='\n') as fp: 3657 text = fp.read() 3658 msg = email.message_from_string(text) 3659 s = StringIO() 3660 g = Generator(s) 3661 g.flatten(msg, linesep='\r\n') 3662 self.assertEqual(s.getvalue(), text) 3663 3664 maxDiff = None 3665 3666 def test_multipart_digest_with_extra_mime_headers(self): 3667 eq = self.assertEqual 3668 neq = self.ndiffAssertEqual 3669 with openfile('msg_28.txt') as fp: 3670 msg = email.message_from_file(fp) 3671 # Structure is: 3672 # multipart/digest 3673 # message/rfc822 3674 # text/plain 3675 # message/rfc822 3676 # text/plain 3677 eq(msg.is_multipart(), 1) 3678 eq(len(msg.get_payload()), 2) 3679 part1 = msg.get_payload(0) 3680 eq(part1.get_content_type(), 'message/rfc822') 3681 eq(part1.is_multipart(), 1) 3682 eq(len(part1.get_payload()), 1) 3683 part1a = part1.get_payload(0) 3684 eq(part1a.is_multipart(), 0) 3685 eq(part1a.get_content_type(), 'text/plain') 3686 neq(part1a.get_payload(), 'message 1\n') 3687 # next message/rfc822 3688 part2 = msg.get_payload(1) 3689 eq(part2.get_content_type(), 'message/rfc822') 3690 eq(part2.is_multipart(), 1) 3691 eq(len(part2.get_payload()), 1) 3692 part2a = part2.get_payload(0) 3693 eq(part2a.is_multipart(), 0) 3694 eq(part2a.get_content_type(), 'text/plain') 3695 neq(part2a.get_payload(), 'message 2\n') 3696 3697 def test_three_lines(self): 3698 # A bug report by Andrew McNamara 3699 lines = ['From: Andrew Person <aperson@dom.ain', 3700 'Subject: Test', 3701 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3702 msg = email.message_from_string(NL.join(lines)) 3703 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3704 3705 def test_strip_line_feed_and_carriage_return_in_headers(self): 3706 eq = self.assertEqual 3707 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3708 value1 = 'text' 3709 value2 = 'more text' 3710 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3711 value1, value2) 3712 msg = email.message_from_string(m) 3713 eq(msg.get('Header'), value1) 3714 eq(msg.get('Next-Header'), value2) 3715 3716 def test_rfc2822_header_syntax(self): 3717 eq = self.assertEqual 3718 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3719 msg = email.message_from_string(m) 3720 eq(len(msg), 3) 3721 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3722 eq(msg.get_payload(), 'body') 3723 3724 def test_rfc2822_space_not_allowed_in_header(self): 3725 eq = self.assertEqual 3726 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3727 msg = email.message_from_string(m) 3728 eq(len(msg.keys()), 0) 3729 3730 def test_rfc2822_one_character_header(self): 3731 eq = self.assertEqual 3732 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3733 msg = email.message_from_string(m) 3734 headers = msg.keys() 3735 headers.sort() 3736 eq(headers, ['A', 'B', 'CC']) 3737 eq(msg.get_payload(), 'body') 3738 3739 def test_CRLFLF_at_end_of_part(self): 3740 # issue 5610: feedparser should not eat two chars from body part ending 3741 # with "\r\n\n". 3742 m = ( 3743 "From: foo@bar.com\n" 3744 "To: baz\n" 3745 "Mime-Version: 1.0\n" 3746 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3747 "\n" 3748 "--BOUNDARY\n" 3749 "Content-Type: text/plain\n" 3750 "\n" 3751 "body ending with CRLF newline\r\n" 3752 "\n" 3753 "--BOUNDARY--\n" 3754 ) 3755 msg = email.message_from_string(m) 3756 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3757 3758 3759class Test8BitBytesHandling(TestEmailBase): 3760 # In Python3 all input is string, but that doesn't work if the actual input 3761 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3762 # decode byte streams using the surrogateescape error handler, and 3763 # reconvert to binary at appropriate places if we detect surrogates. This 3764 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3765 # but it does allow us to parse and preserve them, and to decode body 3766 # parts that use an 8bit CTE. 3767 3768 bodytest_msg = textwrap.dedent("""\ 3769 From: foo@bar.com 3770 To: baz 3771 Mime-Version: 1.0 3772 Content-Type: text/plain; charset={charset} 3773 Content-Transfer-Encoding: {cte} 3774 3775 {bodyline} 3776 """) 3777 3778 def test_known_8bit_CTE(self): 3779 m = self.bodytest_msg.format(charset='utf-8', 3780 cte='8bit', 3781 bodyline='pöstal').encode('utf-8') 3782 msg = email.message_from_bytes(m) 3783 self.assertEqual(msg.get_payload(), "pöstal\n") 3784 self.assertEqual(msg.get_payload(decode=True), 3785 "pöstal\n".encode('utf-8')) 3786 3787 def test_unknown_8bit_CTE(self): 3788 m = self.bodytest_msg.format(charset='notavalidcharset', 3789 cte='8bit', 3790 bodyline='pöstal').encode('utf-8') 3791 msg = email.message_from_bytes(m) 3792 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3793 self.assertEqual(msg.get_payload(decode=True), 3794 "pöstal\n".encode('utf-8')) 3795 3796 def test_8bit_in_quopri_body(self): 3797 # This is non-RFC compliant data...without 'decode' the library code 3798 # decodes the body using the charset from the headers, and because the 3799 # source byte really is utf-8 this works. This is likely to fail 3800 # against real dirty data (ie: produce mojibake), but the data is 3801 # invalid anyway so it is as good a guess as any. But this means that 3802 # this test just confirms the current behavior; that behavior is not 3803 # necessarily the best possible behavior. With 'decode' it is 3804 # returning the raw bytes, so that test should be of correct behavior, 3805 # or at least produce the same result that email4 did. 3806 m = self.bodytest_msg.format(charset='utf-8', 3807 cte='quoted-printable', 3808 bodyline='p=C3=B6stál').encode('utf-8') 3809 msg = email.message_from_bytes(m) 3810 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 3811 self.assertEqual(msg.get_payload(decode=True), 3812 'pöstál\n'.encode('utf-8')) 3813 3814 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3815 # This is similar to the previous test, but proves that if the 8bit 3816 # byte is undecodeable in the specified charset, it gets replaced 3817 # by the unicode 'unknown' character. Again, this may or may not 3818 # be the ideal behavior. Note that if decode=False none of the 3819 # decoders will get involved, so this is the only test we need 3820 # for this behavior. 3821 m = self.bodytest_msg.format(charset='ascii', 3822 cte='quoted-printable', 3823 bodyline='p=C3=B6stál').encode('utf-8') 3824 msg = email.message_from_bytes(m) 3825 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3826 self.assertEqual(msg.get_payload(decode=True), 3827 'pöstál\n'.encode('utf-8')) 3828 3829 # test_defect_handling:test_invalid_chars_in_base64_payload 3830 def test_8bit_in_base64_body(self): 3831 # If we get 8bit bytes in a base64 body, we can just ignore them 3832 # as being outside the base64 alphabet and decode anyway. But 3833 # we register a defect. 3834 m = self.bodytest_msg.format(charset='utf-8', 3835 cte='base64', 3836 bodyline='cMO2c3RhbAá=').encode('utf-8') 3837 msg = email.message_from_bytes(m) 3838 self.assertEqual(msg.get_payload(decode=True), 3839 'pöstal'.encode('utf-8')) 3840 self.assertIsInstance(msg.defects[0], 3841 errors.InvalidBase64CharactersDefect) 3842 3843 def test_8bit_in_uuencode_body(self): 3844 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3845 # normal means, so the block is returned undecoded, but as bytes. 3846 m = self.bodytest_msg.format(charset='utf-8', 3847 cte='uuencode', 3848 bodyline='<,.V<W1A; á ').encode('utf-8') 3849 msg = email.message_from_bytes(m) 3850 self.assertEqual(msg.get_payload(decode=True), 3851 '<,.V<W1A; á \n'.encode('utf-8')) 3852 3853 3854 headertest_headers = ( 3855 ('From: foo@bar.com', ('From', 'foo@bar.com')), 3856 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3857 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 3858 '\tJean de Baddie', 3859 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3860 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3861 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3862 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3863 ) 3864 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3865 '\nYes, they are flying.\n').encode('utf-8') 3866 3867 def test_get_8bit_header(self): 3868 msg = email.message_from_bytes(self.headertest_msg) 3869 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3870 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3871 3872 def test_print_8bit_headers(self): 3873 msg = email.message_from_bytes(self.headertest_msg) 3874 self.assertEqual(str(msg), 3875 textwrap.dedent("""\ 3876 From: {} 3877 To: {} 3878 Subject: {} 3879 From: {} 3880 3881 Yes, they are flying. 3882 """).format(*[expected[1] for (_, expected) in 3883 self.headertest_headers])) 3884 3885 def test_values_with_8bit_headers(self): 3886 msg = email.message_from_bytes(self.headertest_msg) 3887 self.assertListEqual([str(x) for x in msg.values()], 3888 ['foo@bar.com', 3889 'b\uFFFD\uFFFDz', 3890 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 3891 'coll\uFFFD\uFFFDgue, le pouf ' 3892 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3893 '\tJean de Baddie', 3894 "g\uFFFD\uFFFDst"]) 3895 3896 def test_items_with_8bit_headers(self): 3897 msg = email.message_from_bytes(self.headertest_msg) 3898 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 3899 [('From', 'foo@bar.com'), 3900 ('To', 'b\uFFFD\uFFFDz'), 3901 ('Subject', 'Maintenant je vous ' 3902 'pr\uFFFD\uFFFDsente ' 3903 'mon coll\uFFFD\uFFFDgue, le pouf ' 3904 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3905 '\tJean de Baddie'), 3906 ('From', 'g\uFFFD\uFFFDst')]) 3907 3908 def test_get_all_with_8bit_headers(self): 3909 msg = email.message_from_bytes(self.headertest_msg) 3910 self.assertListEqual([str(x) for x in msg.get_all('from')], 3911 ['foo@bar.com', 3912 'g\uFFFD\uFFFDst']) 3913 3914 def test_get_content_type_with_8bit(self): 3915 msg = email.message_from_bytes(textwrap.dedent("""\ 3916 Content-Type: text/pl\xA7in; charset=utf-8 3917 """).encode('latin-1')) 3918 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 3919 self.assertEqual(msg.get_content_maintype(), "text") 3920 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 3921 3922 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 3923 def test_get_params_with_8bit(self): 3924 msg = email.message_from_bytes( 3925 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 3926 self.assertEqual(msg.get_params(header='x-header'), 3927 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 3928 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 3929 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 3930 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 3931 3932 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 3933 def test_get_rfc2231_params_with_8bit(self): 3934 msg = email.message_from_bytes(textwrap.dedent("""\ 3935 Content-Type: text/plain; charset=us-ascii; 3936 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3937 ).encode('latin-1')) 3938 self.assertEqual(msg.get_param('title'), 3939 ('us-ascii', 'en', 'This is not f\uFFFDn')) 3940 3941 def test_set_rfc2231_params_with_8bit(self): 3942 msg = email.message_from_bytes(textwrap.dedent("""\ 3943 Content-Type: text/plain; charset=us-ascii; 3944 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3945 ).encode('latin-1')) 3946 msg.set_param('title', 'test') 3947 self.assertEqual(msg.get_param('title'), 'test') 3948 3949 def test_del_rfc2231_params_with_8bit(self): 3950 msg = email.message_from_bytes(textwrap.dedent("""\ 3951 Content-Type: text/plain; charset=us-ascii; 3952 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3953 ).encode('latin-1')) 3954 msg.del_param('title') 3955 self.assertEqual(msg.get_param('title'), None) 3956 self.assertEqual(msg.get_content_maintype(), 'text') 3957 3958 def test_get_payload_with_8bit_cte_header(self): 3959 msg = email.message_from_bytes(textwrap.dedent("""\ 3960 Content-Transfer-Encoding: b\xa7se64 3961 Content-Type: text/plain; charset=latin-1 3962 3963 payload 3964 """).encode('latin-1')) 3965 self.assertEqual(msg.get_payload(), 'payload\n') 3966 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 3967 3968 non_latin_bin_msg = textwrap.dedent("""\ 3969 From: foo@bar.com 3970 To: báz 3971 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 3972 \tJean de Baddie 3973 Mime-Version: 1.0 3974 Content-Type: text/plain; charset="utf-8" 3975 Content-Transfer-Encoding: 8bit 3976 3977 Да, они летят. 3978 """).encode('utf-8') 3979 3980 def test_bytes_generator(self): 3981 msg = email.message_from_bytes(self.non_latin_bin_msg) 3982 out = BytesIO() 3983 email.generator.BytesGenerator(out).flatten(msg) 3984 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 3985 3986 def test_bytes_generator_handles_None_body(self): 3987 #Issue 11019 3988 msg = email.message.Message() 3989 out = BytesIO() 3990 email.generator.BytesGenerator(out).flatten(msg) 3991 self.assertEqual(out.getvalue(), b"\n") 3992 3993 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 3994 From: foo@bar.com 3995 To: =?unknown-8bit?q?b=C3=A1z?= 3996 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 3997 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 3998 =?unknown-8bit?q?_Jean_de_Baddie?= 3999 Mime-Version: 1.0 4000 Content-Type: text/plain; charset="utf-8" 4001 Content-Transfer-Encoding: base64 4002 4003 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4004 """) 4005 4006 def test_generator_handles_8bit(self): 4007 msg = email.message_from_bytes(self.non_latin_bin_msg) 4008 out = StringIO() 4009 email.generator.Generator(out).flatten(msg) 4010 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4011 4012 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4013 msg = email.message_from_bytes(self.non_latin_bin_msg) 4014 out = BytesIO() 4015 BytesGenerator(out).flatten(msg) 4016 orig_value = out.getvalue() 4017 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4018 out = BytesIO() 4019 BytesGenerator(out).flatten(msg) 4020 self.assertEqual(out.getvalue(), orig_value) 4021 4022 def test_bytes_generator_with_unix_from(self): 4023 # The unixfrom contains a current date, so we can't check it 4024 # literally. Just make sure the first word is 'From' and the 4025 # rest of the message matches the input. 4026 msg = email.message_from_bytes(self.non_latin_bin_msg) 4027 out = BytesIO() 4028 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4029 lines = out.getvalue().split(b'\n') 4030 self.assertEqual(lines[0].split()[0], b'From') 4031 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4032 4033 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4034 non_latin_bin_msg_as7bit[2:4] = [ 4035 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4036 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4037 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4038 4039 def test_message_from_binary_file(self): 4040 fn = 'test.msg' 4041 self.addCleanup(unlink, fn) 4042 with open(fn, 'wb') as testfile: 4043 testfile.write(self.non_latin_bin_msg) 4044 with open(fn, 'rb') as testfile: 4045 m = email.parser.BytesParser().parse(testfile) 4046 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4047 4048 latin_bin_msg = textwrap.dedent("""\ 4049 From: foo@bar.com 4050 To: Dinsdale 4051 Subject: Nudge nudge, wink, wink 4052 Mime-Version: 1.0 4053 Content-Type: text/plain; charset="latin-1" 4054 Content-Transfer-Encoding: 8bit 4055 4056 oh là là, know what I mean, know what I mean? 4057 """).encode('latin-1') 4058 4059 latin_bin_msg_as7bit = textwrap.dedent("""\ 4060 From: foo@bar.com 4061 To: Dinsdale 4062 Subject: Nudge nudge, wink, wink 4063 Mime-Version: 1.0 4064 Content-Type: text/plain; charset="iso-8859-1" 4065 Content-Transfer-Encoding: quoted-printable 4066 4067 oh l=E0 l=E0, know what I mean, know what I mean? 4068 """) 4069 4070 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4071 m = email.message_from_bytes(self.latin_bin_msg) 4072 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4073 4074 def test_decoded_generator_emits_unicode_body(self): 4075 m = email.message_from_bytes(self.latin_bin_msg) 4076 out = StringIO() 4077 email.generator.DecodedGenerator(out).flatten(m) 4078 #DecodedHeader output contains an extra blank line compared 4079 #to the input message. RDM: not sure if this is a bug or not, 4080 #but it is not specific to the 8bit->7bit conversion. 4081 self.assertEqual(out.getvalue(), 4082 self.latin_bin_msg.decode('latin-1')+'\n') 4083 4084 def test_bytes_feedparser(self): 4085 bfp = email.feedparser.BytesFeedParser() 4086 for i in range(0, len(self.latin_bin_msg), 10): 4087 bfp.feed(self.latin_bin_msg[i:i+10]) 4088 m = bfp.close() 4089 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4090 4091 def test_crlf_flatten(self): 4092 with openfile('msg_26.txt', 'rb') as fp: 4093 text = fp.read() 4094 msg = email.message_from_bytes(text) 4095 s = BytesIO() 4096 g = email.generator.BytesGenerator(s) 4097 g.flatten(msg, linesep='\r\n') 4098 self.assertEqual(s.getvalue(), text) 4099 4100 def test_8bit_multipart(self): 4101 # Issue 11605 4102 source = textwrap.dedent("""\ 4103 Date: Fri, 18 Mar 2011 17:15:43 +0100 4104 To: foo@example.com 4105 From: foodwatch-Newsletter <bar@example.com> 4106 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4107 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> 4108 MIME-Version: 1.0 4109 Content-Type: multipart/alternative; 4110 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4111 4112 --b1_76a486bee62b0d200f33dc2ca08220ad 4113 Content-Type: text/plain; charset="utf-8" 4114 Content-Transfer-Encoding: 8bit 4115 4116 Guten Tag, , 4117 4118 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4119 Nachrichten aus Japan. 4120 4121 4122 --b1_76a486bee62b0d200f33dc2ca08220ad 4123 Content-Type: text/html; charset="utf-8" 4124 Content-Transfer-Encoding: 8bit 4125 4126 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4127 "http://www.w3.org/TR/html4/loose.dtd"> 4128 <html lang="de"> 4129 <head> 4130 <title>foodwatch - Newsletter</title> 4131 </head> 4132 <body> 4133 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4134 die Nachrichten aus Japan.</p> 4135 </body> 4136 </html> 4137 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4138 4139 """).encode('utf-8') 4140 msg = email.message_from_bytes(source) 4141 s = BytesIO() 4142 g = email.generator.BytesGenerator(s) 4143 g.flatten(msg) 4144 self.assertEqual(s.getvalue(), source) 4145 4146 def test_bytes_generator_b_encoding_linesep(self): 4147 # Issue 14062: b encoding was tacking on an extra \n. 4148 m = Message() 4149 # This has enough non-ascii that it should always end up b encoded. 4150 m['Subject'] = Header('žluťoučký kůň') 4151 s = BytesIO() 4152 g = email.generator.BytesGenerator(s) 4153 g.flatten(m, linesep='\r\n') 4154 self.assertEqual( 4155 s.getvalue(), 4156 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4157 4158 def test_generator_b_encoding_linesep(self): 4159 # Since this broke in ByteGenerator, test Generator for completeness. 4160 m = Message() 4161 # This has enough non-ascii that it should always end up b encoded. 4162 m['Subject'] = Header('žluťoučký kůň') 4163 s = StringIO() 4164 g = email.generator.Generator(s) 4165 g.flatten(m, linesep='\r\n') 4166 self.assertEqual( 4167 s.getvalue(), 4168 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4169 4170 maxDiff = None 4171 4172 4173class BaseTestBytesGeneratorIdempotent: 4174 4175 maxDiff = None 4176 4177 def _msgobj(self, filename): 4178 with openfile(filename, 'rb') as fp: 4179 data = fp.read() 4180 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4181 msg = email.message_from_bytes(data) 4182 return msg, data 4183 4184 def _idempotent(self, msg, data, unixfrom=False): 4185 b = BytesIO() 4186 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4187 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4188 self.assertEqual(data, b.getvalue()) 4189 4190 4191class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4192 TestIdempotent): 4193 linesep = '\n' 4194 blinesep = b'\n' 4195 normalize_linesep_regex = re.compile(br'\r\n') 4196 4197 4198class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4199 TestIdempotent): 4200 linesep = '\r\n' 4201 blinesep = b'\r\n' 4202 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4203 4204 4205class TestBase64(unittest.TestCase): 4206 def test_len(self): 4207 eq = self.assertEqual 4208 eq(base64mime.header_length('hello'), 4209 len(base64mime.body_encode(b'hello', eol=''))) 4210 for size in range(15): 4211 if size == 0 : bsize = 0 4212 elif size <= 3 : bsize = 4 4213 elif size <= 6 : bsize = 8 4214 elif size <= 9 : bsize = 12 4215 elif size <= 12: bsize = 16 4216 else : bsize = 20 4217 eq(base64mime.header_length('x' * size), bsize) 4218 4219 def test_decode(self): 4220 eq = self.assertEqual 4221 eq(base64mime.decode(''), b'') 4222 eq(base64mime.decode('aGVsbG8='), b'hello') 4223 4224 def test_encode(self): 4225 eq = self.assertEqual 4226 eq(base64mime.body_encode(b''), b'') 4227 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4228 # Test the binary flag 4229 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4230 # Test the maxlinelen arg 4231 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4232eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4233eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4234eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4235eHh4eCB4eHh4IA== 4236""") 4237 # Test the eol argument 4238 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4239 """\ 4240eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4241eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4242eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4243eHh4eCB4eHh4IA==\r 4244""") 4245 4246 def test_header_encode(self): 4247 eq = self.assertEqual 4248 he = base64mime.header_encode 4249 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4250 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4251 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4252 # Test the charset option 4253 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4254 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4255 4256 4257 4258class TestQuopri(unittest.TestCase): 4259 def setUp(self): 4260 # Set of characters (as byte integers) that don't need to be encoded 4261 # in headers. 4262 self.hlit = list(chain( 4263 range(ord('a'), ord('z') + 1), 4264 range(ord('A'), ord('Z') + 1), 4265 range(ord('0'), ord('9') + 1), 4266 (c for c in b'!*+-/'))) 4267 # Set of characters (as byte integers) that do need to be encoded in 4268 # headers. 4269 self.hnon = [c for c in range(256) if c not in self.hlit] 4270 assert len(self.hlit) + len(self.hnon) == 256 4271 # Set of characters (as byte integers) that don't need to be encoded 4272 # in bodies. 4273 self.blit = list(range(ord(' '), ord('~') + 1)) 4274 self.blit.append(ord('\t')) 4275 self.blit.remove(ord('=')) 4276 # Set of characters (as byte integers) that do need to be encoded in 4277 # bodies. 4278 self.bnon = [c for c in range(256) if c not in self.blit] 4279 assert len(self.blit) + len(self.bnon) == 256 4280 4281 def test_quopri_header_check(self): 4282 for c in self.hlit: 4283 self.assertFalse(quoprimime.header_check(c), 4284 'Should not be header quopri encoded: %s' % chr(c)) 4285 for c in self.hnon: 4286 self.assertTrue(quoprimime.header_check(c), 4287 'Should be header quopri encoded: %s' % chr(c)) 4288 4289 def test_quopri_body_check(self): 4290 for c in self.blit: 4291 self.assertFalse(quoprimime.body_check(c), 4292 'Should not be body quopri encoded: %s' % chr(c)) 4293 for c in self.bnon: 4294 self.assertTrue(quoprimime.body_check(c), 4295 'Should be body quopri encoded: %s' % chr(c)) 4296 4297 def test_header_quopri_len(self): 4298 eq = self.assertEqual 4299 eq(quoprimime.header_length(b'hello'), 5) 4300 # RFC 2047 chrome is not included in header_length(). 4301 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4302 quoprimime.header_length(b'hello') + 4303 # =?xxx?q?...?= means 10 extra characters 4304 10) 4305 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4306 # RFC 2047 chrome is not included in header_length(). 4307 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4308 quoprimime.header_length(b'h@e@l@l@o@') + 4309 # =?xxx?q?...?= means 10 extra characters 4310 10) 4311 for c in self.hlit: 4312 eq(quoprimime.header_length(bytes([c])), 1, 4313 'expected length 1 for %r' % chr(c)) 4314 for c in self.hnon: 4315 # Space is special; it's encoded to _ 4316 if c == ord(' '): 4317 continue 4318 eq(quoprimime.header_length(bytes([c])), 3, 4319 'expected length 3 for %r' % chr(c)) 4320 eq(quoprimime.header_length(b' '), 1) 4321 4322 def test_body_quopri_len(self): 4323 eq = self.assertEqual 4324 for c in self.blit: 4325 eq(quoprimime.body_length(bytes([c])), 1) 4326 for c in self.bnon: 4327 eq(quoprimime.body_length(bytes([c])), 3) 4328 4329 def test_quote_unquote_idempotent(self): 4330 for x in range(256): 4331 c = chr(x) 4332 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4333 4334 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4335 if charset is None: 4336 encoded_header = quoprimime.header_encode(header) 4337 else: 4338 encoded_header = quoprimime.header_encode(header, charset) 4339 self.assertEqual(encoded_header, expected_encoded_header) 4340 4341 def test_header_encode_null(self): 4342 self._test_header_encode(b'', '') 4343 4344 def test_header_encode_one_word(self): 4345 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4346 4347 def test_header_encode_two_lines(self): 4348 self._test_header_encode(b'hello\nworld', 4349 '=?iso-8859-1?q?hello=0Aworld?=') 4350 4351 def test_header_encode_non_ascii(self): 4352 self._test_header_encode(b'hello\xc7there', 4353 '=?iso-8859-1?q?hello=C7there?=') 4354 4355 def test_header_encode_alt_charset(self): 4356 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4357 charset='iso-8859-2') 4358 4359 def _test_header_decode(self, encoded_header, expected_decoded_header): 4360 decoded_header = quoprimime.header_decode(encoded_header) 4361 self.assertEqual(decoded_header, expected_decoded_header) 4362 4363 def test_header_decode_null(self): 4364 self._test_header_decode('', '') 4365 4366 def test_header_decode_one_word(self): 4367 self._test_header_decode('hello', 'hello') 4368 4369 def test_header_decode_two_lines(self): 4370 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4371 4372 def test_header_decode_non_ascii(self): 4373 self._test_header_decode('hello=C7there', 'hello\xc7there') 4374 4375 def test_header_decode_re_bug_18380(self): 4376 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4377 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4378 4379 def _test_decode(self, encoded, expected_decoded, eol=None): 4380 if eol is None: 4381 decoded = quoprimime.decode(encoded) 4382 else: 4383 decoded = quoprimime.decode(encoded, eol=eol) 4384 self.assertEqual(decoded, expected_decoded) 4385 4386 def test_decode_null_word(self): 4387 self._test_decode('', '') 4388 4389 def test_decode_null_line_null_word(self): 4390 self._test_decode('\r\n', '\n') 4391 4392 def test_decode_one_word(self): 4393 self._test_decode('hello', 'hello') 4394 4395 def test_decode_one_word_eol(self): 4396 self._test_decode('hello', 'hello', eol='X') 4397 4398 def test_decode_one_line(self): 4399 self._test_decode('hello\r\n', 'hello\n') 4400 4401 def test_decode_one_line_lf(self): 4402 self._test_decode('hello\n', 'hello\n') 4403 4404 def test_decode_one_line_cr(self): 4405 self._test_decode('hello\r', 'hello\n') 4406 4407 def test_decode_one_line_nl(self): 4408 self._test_decode('hello\n', 'helloX', eol='X') 4409 4410 def test_decode_one_line_crnl(self): 4411 self._test_decode('hello\r\n', 'helloX', eol='X') 4412 4413 def test_decode_one_line_one_word(self): 4414 self._test_decode('hello\r\nworld', 'hello\nworld') 4415 4416 def test_decode_one_line_one_word_eol(self): 4417 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4418 4419 def test_decode_two_lines(self): 4420 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4421 4422 def test_decode_two_lines_eol(self): 4423 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4424 4425 def test_decode_one_long_line(self): 4426 self._test_decode('Spam' * 250, 'Spam' * 250) 4427 4428 def test_decode_one_space(self): 4429 self._test_decode(' ', '') 4430 4431 def test_decode_multiple_spaces(self): 4432 self._test_decode(' ' * 5, '') 4433 4434 def test_decode_one_line_trailing_spaces(self): 4435 self._test_decode('hello \r\n', 'hello\n') 4436 4437 def test_decode_two_lines_trailing_spaces(self): 4438 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4439 4440 def test_decode_quoted_word(self): 4441 self._test_decode('=22quoted=20words=22', '"quoted words"') 4442 4443 def test_decode_uppercase_quoting(self): 4444 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4445 4446 def test_decode_lowercase_quoting(self): 4447 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4448 4449 def test_decode_soft_line_break(self): 4450 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4451 4452 def test_decode_false_quoting(self): 4453 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4454 4455 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4456 kwargs = {} 4457 if maxlinelen is None: 4458 # Use body_encode's default. 4459 maxlinelen = 76 4460 else: 4461 kwargs['maxlinelen'] = maxlinelen 4462 if eol is None: 4463 # Use body_encode's default. 4464 eol = '\n' 4465 else: 4466 kwargs['eol'] = eol 4467 encoded_body = quoprimime.body_encode(body, **kwargs) 4468 self.assertEqual(encoded_body, expected_encoded_body) 4469 if eol == '\n' or eol == '\r\n': 4470 # We know how to split the result back into lines, so maxlinelen 4471 # can be checked. 4472 for line in encoded_body.splitlines(): 4473 self.assertLessEqual(len(line), maxlinelen) 4474 4475 def test_encode_null(self): 4476 self._test_encode('', '') 4477 4478 def test_encode_null_lines(self): 4479 self._test_encode('\n\n', '\n\n') 4480 4481 def test_encode_one_line(self): 4482 self._test_encode('hello\n', 'hello\n') 4483 4484 def test_encode_one_line_crlf(self): 4485 self._test_encode('hello\r\n', 'hello\n') 4486 4487 def test_encode_one_line_eol(self): 4488 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4489 4490 def test_encode_one_line_eol_after_non_ascii(self): 4491 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4492 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4493 'hello=CF=85\r\n', eol='\r\n') 4494 4495 def test_encode_one_space(self): 4496 self._test_encode(' ', '=20') 4497 4498 def test_encode_one_line_one_space(self): 4499 self._test_encode(' \n', '=20\n') 4500 4501# XXX: body_encode() expect strings, but uses ord(char) from these strings 4502# to index into a 256-entry list. For code points above 255, this will fail. 4503# Should there be a check for 8-bit only ord() values in body, or at least 4504# a comment about the expected input? 4505 4506 def test_encode_two_lines_one_space(self): 4507 self._test_encode(' \n \n', '=20\n=20\n') 4508 4509 def test_encode_one_word_trailing_spaces(self): 4510 self._test_encode('hello ', 'hello =20') 4511 4512 def test_encode_one_line_trailing_spaces(self): 4513 self._test_encode('hello \n', 'hello =20\n') 4514 4515 def test_encode_one_word_trailing_tab(self): 4516 self._test_encode('hello \t', 'hello =09') 4517 4518 def test_encode_one_line_trailing_tab(self): 4519 self._test_encode('hello \t\n', 'hello =09\n') 4520 4521 def test_encode_trailing_space_before_maxlinelen(self): 4522 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4523 4524 def test_encode_trailing_space_at_maxlinelen(self): 4525 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4526 4527 def test_encode_trailing_space_beyond_maxlinelen(self): 4528 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4529 4530 def test_encode_whitespace_lines(self): 4531 self._test_encode(' \n' * 5, '=20\n' * 5) 4532 4533 def test_encode_quoted_equals(self): 4534 self._test_encode('a = b', 'a =3D b') 4535 4536 def test_encode_one_long_string(self): 4537 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4538 4539 def test_encode_one_long_line(self): 4540 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4541 4542 def test_encode_one_very_long_line(self): 4543 self._test_encode('x' * 200 + '\n', 4544 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4545 4546 def test_encode_shortest_maxlinelen(self): 4547 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4548 4549 def test_encode_maxlinelen_too_small(self): 4550 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4551 4552 def test_encode(self): 4553 eq = self.assertEqual 4554 eq(quoprimime.body_encode(''), '') 4555 eq(quoprimime.body_encode('hello'), 'hello') 4556 # Test the binary flag 4557 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4558 # Test the maxlinelen arg 4559 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4560xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4561 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4562x xxxx xxxx xxxx xxxx=20""") 4563 # Test the eol argument 4564 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4565 """\ 4566xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4567 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4568x xxxx xxxx xxxx xxxx=20""") 4569 eq(quoprimime.body_encode("""\ 4570one line 4571 4572two line"""), """\ 4573one line 4574 4575two line""") 4576 4577 4578 4579# Test the Charset class 4580class TestCharset(unittest.TestCase): 4581 def tearDown(self): 4582 from email import charset as CharsetModule 4583 try: 4584 del CharsetModule.CHARSETS['fake'] 4585 except KeyError: 4586 pass 4587 4588 def test_codec_encodeable(self): 4589 eq = self.assertEqual 4590 # Make sure us-ascii = no Unicode conversion 4591 c = Charset('us-ascii') 4592 eq(c.header_encode('Hello World!'), 'Hello World!') 4593 # Test 8-bit idempotency with us-ascii 4594 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4595 self.assertRaises(UnicodeError, c.header_encode, s) 4596 c = Charset('utf-8') 4597 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4598 4599 def test_body_encode(self): 4600 eq = self.assertEqual 4601 # Try a charset with QP body encoding 4602 c = Charset('iso-8859-1') 4603 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4604 # Try a charset with Base64 body encoding 4605 c = Charset('utf-8') 4606 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4607 # Try a charset with None body encoding 4608 c = Charset('us-ascii') 4609 eq('hello world', c.body_encode('hello world')) 4610 # Try the convert argument, where input codec != output codec 4611 c = Charset('euc-jp') 4612 # With apologies to Tokio Kikuchi ;) 4613 # XXX FIXME 4614## try: 4615## eq('\x1b$B5FCO;~IW\x1b(B', 4616## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4617## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4618## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4619## except LookupError: 4620## # We probably don't have the Japanese codecs installed 4621## pass 4622 # Testing SF bug #625509, which we have to fake, since there are no 4623 # built-in encodings where the header encoding is QP but the body 4624 # encoding is not. 4625 from email import charset as CharsetModule 4626 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4627 c = Charset('fake') 4628 eq('hello world', c.body_encode('hello world')) 4629 4630 def test_unicode_charset_name(self): 4631 charset = Charset('us-ascii') 4632 self.assertEqual(str(charset), 'us-ascii') 4633 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4634 4635 4636 4637# Test multilingual MIME headers. 4638class TestHeader(TestEmailBase): 4639 def test_simple(self): 4640 eq = self.ndiffAssertEqual 4641 h = Header('Hello World!') 4642 eq(h.encode(), 'Hello World!') 4643 h.append(' Goodbye World!') 4644 eq(h.encode(), 'Hello World! Goodbye World!') 4645 4646 def test_simple_surprise(self): 4647 eq = self.ndiffAssertEqual 4648 h = Header('Hello World!') 4649 eq(h.encode(), 'Hello World!') 4650 h.append('Goodbye World!') 4651 eq(h.encode(), 'Hello World! Goodbye World!') 4652 4653 def test_header_needs_no_decoding(self): 4654 h = 'no decoding needed' 4655 self.assertEqual(decode_header(h), [(h, None)]) 4656 4657 def test_long(self): 4658 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4659 maxlinelen=76) 4660 for l in h.encode(splitchars=' ').split('\n '): 4661 self.assertLessEqual(len(l), 76) 4662 4663 def test_multilingual(self): 4664 eq = self.ndiffAssertEqual 4665 g = Charset("iso-8859-1") 4666 cz = Charset("iso-8859-2") 4667 utf8 = Charset("utf-8") 4668 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4669 b'Foerderband komfortabel den Korridor entlang, ' 4670 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4671 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4672 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4673 b'd\xf9vtipu.. ') 4674 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4675 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4676 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4677 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4678 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4679 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4680 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4681 '\u3044\u307e\u3059\u3002') 4682 h = Header(g_head, g) 4683 h.append(cz_head, cz) 4684 h.append(utf8_head, utf8) 4685 enc = h.encode(maxlinelen=76) 4686 eq(enc, """\ 4687=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4688 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4689 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4690 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4691 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4692 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4693 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4694 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4695 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4696 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4697 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4698 decoded = decode_header(enc) 4699 eq(len(decoded), 3) 4700 eq(decoded[0], (g_head, 'iso-8859-1')) 4701 eq(decoded[1], (cz_head, 'iso-8859-2')) 4702 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4703 ustr = str(h) 4704 eq(ustr, 4705 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4706 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4707 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4708 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4709 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4710 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4711 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4712 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4713 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4714 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4715 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4716 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4717 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4718 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4719 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4720 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4721 ).decode('utf-8')) 4722 # Test make_header() 4723 newh = make_header(decode_header(enc)) 4724 eq(newh, h) 4725 4726 def test_empty_header_encode(self): 4727 h = Header() 4728 self.assertEqual(h.encode(), '') 4729 4730 def test_header_ctor_default_args(self): 4731 eq = self.ndiffAssertEqual 4732 h = Header() 4733 eq(h, '') 4734 h.append('foo', Charset('iso-8859-1')) 4735 eq(h, 'foo') 4736 4737 def test_explicit_maxlinelen(self): 4738 eq = self.ndiffAssertEqual 4739 hstr = ('A very long line that must get split to something other ' 4740 'than at the 76th character boundary to test the non-default ' 4741 'behavior') 4742 h = Header(hstr) 4743 eq(h.encode(), '''\ 4744A very long line that must get split to something other than at the 76th 4745 character boundary to test the non-default behavior''') 4746 eq(str(h), hstr) 4747 h = Header(hstr, header_name='Subject') 4748 eq(h.encode(), '''\ 4749A very long line that must get split to something other than at the 4750 76th character boundary to test the non-default behavior''') 4751 eq(str(h), hstr) 4752 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4753 eq(h.encode(), hstr) 4754 eq(str(h), hstr) 4755 4756 def test_quopri_splittable(self): 4757 eq = self.ndiffAssertEqual 4758 h = Header(charset='iso-8859-1', maxlinelen=20) 4759 x = 'xxxx ' * 20 4760 h.append(x) 4761 s = h.encode() 4762 eq(s, """\ 4763=?iso-8859-1?q?xxx?= 4764 =?iso-8859-1?q?x_?= 4765 =?iso-8859-1?q?xx?= 4766 =?iso-8859-1?q?xx?= 4767 =?iso-8859-1?q?_x?= 4768 =?iso-8859-1?q?xx?= 4769 =?iso-8859-1?q?x_?= 4770 =?iso-8859-1?q?xx?= 4771 =?iso-8859-1?q?xx?= 4772 =?iso-8859-1?q?_x?= 4773 =?iso-8859-1?q?xx?= 4774 =?iso-8859-1?q?x_?= 4775 =?iso-8859-1?q?xx?= 4776 =?iso-8859-1?q?xx?= 4777 =?iso-8859-1?q?_x?= 4778 =?iso-8859-1?q?xx?= 4779 =?iso-8859-1?q?x_?= 4780 =?iso-8859-1?q?xx?= 4781 =?iso-8859-1?q?xx?= 4782 =?iso-8859-1?q?_x?= 4783 =?iso-8859-1?q?xx?= 4784 =?iso-8859-1?q?x_?= 4785 =?iso-8859-1?q?xx?= 4786 =?iso-8859-1?q?xx?= 4787 =?iso-8859-1?q?_x?= 4788 =?iso-8859-1?q?xx?= 4789 =?iso-8859-1?q?x_?= 4790 =?iso-8859-1?q?xx?= 4791 =?iso-8859-1?q?xx?= 4792 =?iso-8859-1?q?_x?= 4793 =?iso-8859-1?q?xx?= 4794 =?iso-8859-1?q?x_?= 4795 =?iso-8859-1?q?xx?= 4796 =?iso-8859-1?q?xx?= 4797 =?iso-8859-1?q?_x?= 4798 =?iso-8859-1?q?xx?= 4799 =?iso-8859-1?q?x_?= 4800 =?iso-8859-1?q?xx?= 4801 =?iso-8859-1?q?xx?= 4802 =?iso-8859-1?q?_x?= 4803 =?iso-8859-1?q?xx?= 4804 =?iso-8859-1?q?x_?= 4805 =?iso-8859-1?q?xx?= 4806 =?iso-8859-1?q?xx?= 4807 =?iso-8859-1?q?_x?= 4808 =?iso-8859-1?q?xx?= 4809 =?iso-8859-1?q?x_?= 4810 =?iso-8859-1?q?xx?= 4811 =?iso-8859-1?q?xx?= 4812 =?iso-8859-1?q?_?=""") 4813 eq(x, str(make_header(decode_header(s)))) 4814 h = Header(charset='iso-8859-1', maxlinelen=40) 4815 h.append('xxxx ' * 20) 4816 s = h.encode() 4817 eq(s, """\ 4818=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4819 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4820 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4821 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4822 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4823 eq(x, str(make_header(decode_header(s)))) 4824 4825 def test_base64_splittable(self): 4826 eq = self.ndiffAssertEqual 4827 h = Header(charset='koi8-r', maxlinelen=20) 4828 x = 'xxxx ' * 20 4829 h.append(x) 4830 s = h.encode() 4831 eq(s, """\ 4832=?koi8-r?b?eHh4?= 4833 =?koi8-r?b?eCB4?= 4834 =?koi8-r?b?eHh4?= 4835 =?koi8-r?b?IHh4?= 4836 =?koi8-r?b?eHgg?= 4837 =?koi8-r?b?eHh4?= 4838 =?koi8-r?b?eCB4?= 4839 =?koi8-r?b?eHh4?= 4840 =?koi8-r?b?IHh4?= 4841 =?koi8-r?b?eHgg?= 4842 =?koi8-r?b?eHh4?= 4843 =?koi8-r?b?eCB4?= 4844 =?koi8-r?b?eHh4?= 4845 =?koi8-r?b?IHh4?= 4846 =?koi8-r?b?eHgg?= 4847 =?koi8-r?b?eHh4?= 4848 =?koi8-r?b?eCB4?= 4849 =?koi8-r?b?eHh4?= 4850 =?koi8-r?b?IHh4?= 4851 =?koi8-r?b?eHgg?= 4852 =?koi8-r?b?eHh4?= 4853 =?koi8-r?b?eCB4?= 4854 =?koi8-r?b?eHh4?= 4855 =?koi8-r?b?IHh4?= 4856 =?koi8-r?b?eHgg?= 4857 =?koi8-r?b?eHh4?= 4858 =?koi8-r?b?eCB4?= 4859 =?koi8-r?b?eHh4?= 4860 =?koi8-r?b?IHh4?= 4861 =?koi8-r?b?eHgg?= 4862 =?koi8-r?b?eHh4?= 4863 =?koi8-r?b?eCB4?= 4864 =?koi8-r?b?eHh4?= 4865 =?koi8-r?b?IA==?=""") 4866 eq(x, str(make_header(decode_header(s)))) 4867 h = Header(charset='koi8-r', maxlinelen=40) 4868 h.append(x) 4869 s = h.encode() 4870 eq(s, """\ 4871=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4872 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4873 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4874 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4875 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4876 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4877 eq(x, str(make_header(decode_header(s)))) 4878 4879 def test_us_ascii_header(self): 4880 eq = self.assertEqual 4881 s = 'hello' 4882 x = decode_header(s) 4883 eq(x, [('hello', None)]) 4884 h = make_header(x) 4885 eq(s, h.encode()) 4886 4887 def test_string_charset(self): 4888 eq = self.assertEqual 4889 h = Header() 4890 h.append('hello', 'iso-8859-1') 4891 eq(h, 'hello') 4892 4893## def test_unicode_error(self): 4894## raises = self.assertRaises 4895## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 4896## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 4897## h = Header() 4898## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 4899## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 4900## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 4901 4902 def test_utf8_shortest(self): 4903 eq = self.assertEqual 4904 h = Header('p\xf6stal', 'utf-8') 4905 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 4906 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 4907 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 4908 4909 def test_bad_8bit_header(self): 4910 raises = self.assertRaises 4911 eq = self.assertEqual 4912 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4913 raises(UnicodeError, Header, x) 4914 h = Header() 4915 raises(UnicodeError, h.append, x) 4916 e = x.decode('utf-8', 'replace') 4917 eq(str(Header(x, errors='replace')), e) 4918 h.append(x, errors='replace') 4919 eq(str(h), e) 4920 4921 def test_escaped_8bit_header(self): 4922 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4923 e = x.decode('ascii', 'surrogateescape') 4924 h = Header(e, charset=email.charset.UNKNOWN8BIT) 4925 self.assertEqual(str(h), 4926 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4927 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4928 4929 def test_header_handles_binary_unknown8bit(self): 4930 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4931 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4932 self.assertEqual(str(h), 4933 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4934 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4935 4936 def test_make_header_handles_binary_unknown8bit(self): 4937 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4938 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4939 h2 = email.header.make_header(email.header.decode_header(h)) 4940 self.assertEqual(str(h2), 4941 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4942 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 4943 4944 def test_modify_returned_list_does_not_change_header(self): 4945 h = Header('test') 4946 chunks = email.header.decode_header(h) 4947 chunks.append(('ascii', 'test2')) 4948 self.assertEqual(str(h), 'test') 4949 4950 def test_encoded_adjacent_nonencoded(self): 4951 eq = self.assertEqual 4952 h = Header() 4953 h.append('hello', 'iso-8859-1') 4954 h.append('world') 4955 s = h.encode() 4956 eq(s, '=?iso-8859-1?q?hello?= world') 4957 h = make_header(decode_header(s)) 4958 eq(h.encode(), s) 4959 4960 def test_whitespace_keeper(self): 4961 eq = self.assertEqual 4962 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 4963 parts = decode_header(s) 4964 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 4965 hdr = make_header(parts) 4966 eq(hdr.encode(), 4967 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 4968 4969 def test_broken_base64_header(self): 4970 raises = self.assertRaises 4971 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 4972 raises(errors.HeaderParseError, decode_header, s) 4973 4974 def test_shift_jis_charset(self): 4975 h = Header('文', charset='shift_jis') 4976 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 4977 4978 def test_flatten_header_with_no_value(self): 4979 # Issue 11401 (regression from email 4.x) Note that the space after 4980 # the header doesn't reflect the input, but this is also the way 4981 # email 4.x behaved. At some point it would be nice to fix that. 4982 msg = email.message_from_string("EmptyHeader:") 4983 self.assertEqual(str(msg), "EmptyHeader: \n\n") 4984 4985 def test_encode_preserves_leading_ws_on_value(self): 4986 msg = Message() 4987 msg['SomeHeader'] = ' value with leading ws' 4988 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 4989 4990 def test_whitespace_header(self): 4991 self.assertEqual(Header(' ').encode(), ' ') 4992 4993 4994 4995# Test RFC 2231 header parameters (en/de)coding 4996class TestRFC2231(TestEmailBase): 4997 4998 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 4999 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5000 def test_get_param(self): 5001 eq = self.assertEqual 5002 msg = self._msgobj('msg_29.txt') 5003 eq(msg.get_param('title'), 5004 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5005 eq(msg.get_param('title', unquote=False), 5006 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5007 5008 def test_set_param(self): 5009 eq = self.ndiffAssertEqual 5010 msg = Message() 5011 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5012 charset='us-ascii') 5013 eq(msg.get_param('title'), 5014 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5015 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5016 charset='us-ascii', language='en') 5017 eq(msg.get_param('title'), 5018 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5019 msg = self._msgobj('msg_01.txt') 5020 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5021 charset='us-ascii', language='en') 5022 eq(msg.as_string(maxheaderlen=78), """\ 5023Return-Path: <bbb@zzz.org> 5024Delivered-To: bbb@zzz.org 5025Received: by mail.zzz.org (Postfix, from userid 889) 5026\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5027MIME-Version: 1.0 5028Content-Transfer-Encoding: 7bit 5029Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5030From: bbb@ddd.com (John X. Doe) 5031To: bbb@zzz.org 5032Subject: This is a test message 5033Date: Fri, 4 May 2001 14:05:44 -0400 5034Content-Type: text/plain; charset=us-ascii; 5035 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5036 5037 5038Hi, 5039 5040Do you like this message? 5041 5042-Me 5043""") 5044 5045 def test_set_param_requote(self): 5046 msg = Message() 5047 msg.set_param('title', 'foo') 5048 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5049 msg.set_param('title', 'bar', requote=False) 5050 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5051 # tspecial is still quoted. 5052 msg.set_param('title', "(bar)bell", requote=False) 5053 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5054 5055 def test_del_param(self): 5056 eq = self.ndiffAssertEqual 5057 msg = self._msgobj('msg_01.txt') 5058 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5059 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5060 charset='us-ascii', language='en') 5061 msg.del_param('foo', header='Content-Type') 5062 eq(msg.as_string(maxheaderlen=78), """\ 5063Return-Path: <bbb@zzz.org> 5064Delivered-To: bbb@zzz.org 5065Received: by mail.zzz.org (Postfix, from userid 889) 5066\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5067MIME-Version: 1.0 5068Content-Transfer-Encoding: 7bit 5069Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5070From: bbb@ddd.com (John X. Doe) 5071To: bbb@zzz.org 5072Subject: This is a test message 5073Date: Fri, 4 May 2001 14:05:44 -0400 5074Content-Type: text/plain; charset="us-ascii"; 5075 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5076 5077 5078Hi, 5079 5080Do you like this message? 5081 5082-Me 5083""") 5084 5085 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5086 # I changed the charset name, though, because the one in the file isn't 5087 # a legal charset name. Should add a test for an illegal charset. 5088 def test_rfc2231_get_content_charset(self): 5089 eq = self.assertEqual 5090 msg = self._msgobj('msg_32.txt') 5091 eq(msg.get_content_charset(), 'us-ascii') 5092 5093 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5094 def test_rfc2231_parse_rfc_quoting(self): 5095 m = textwrap.dedent('''\ 5096 Content-Disposition: inline; 5097 \tfilename*0*=''This%20is%20even%20more%20; 5098 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5099 \tfilename*2="is it not.pdf" 5100 5101 ''') 5102 msg = email.message_from_string(m) 5103 self.assertEqual(msg.get_filename(), 5104 'This is even more ***fun*** is it not.pdf') 5105 self.assertEqual(m, msg.as_string()) 5106 5107 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5108 def test_rfc2231_parse_extra_quoting(self): 5109 m = textwrap.dedent('''\ 5110 Content-Disposition: inline; 5111 \tfilename*0*="''This%20is%20even%20more%20"; 5112 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5113 \tfilename*2="is it not.pdf" 5114 5115 ''') 5116 msg = email.message_from_string(m) 5117 self.assertEqual(msg.get_filename(), 5118 'This is even more ***fun*** is it not.pdf') 5119 self.assertEqual(m, msg.as_string()) 5120 5121 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5122 # but new test uses *0* because otherwise lang/charset is not valid. 5123 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5124 def test_rfc2231_no_language_or_charset(self): 5125 m = '''\ 5126Content-Transfer-Encoding: 8bit 5127Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5128Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5129 5130''' 5131 msg = email.message_from_string(m) 5132 param = msg.get_param('NAME') 5133 self.assertNotIsInstance(param, tuple) 5134 self.assertEqual( 5135 param, 5136 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5137 5138 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5139 def test_rfc2231_no_language_or_charset_in_filename(self): 5140 m = '''\ 5141Content-Disposition: inline; 5142\tfilename*0*="''This%20is%20even%20more%20"; 5143\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5144\tfilename*2="is it not.pdf" 5145 5146''' 5147 msg = email.message_from_string(m) 5148 self.assertEqual(msg.get_filename(), 5149 'This is even more ***fun*** is it not.pdf') 5150 5151 # Duplicate of previous test? 5152 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5153 m = '''\ 5154Content-Disposition: inline; 5155\tfilename*0*="''This%20is%20even%20more%20"; 5156\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5157\tfilename*2="is it not.pdf" 5158 5159''' 5160 msg = email.message_from_string(m) 5161 self.assertEqual(msg.get_filename(), 5162 'This is even more ***fun*** is it not.pdf') 5163 5164 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5165 # but the test below is wrong (the first part should be decoded). 5166 def test_rfc2231_partly_encoded(self): 5167 m = '''\ 5168Content-Disposition: inline; 5169\tfilename*0="''This%20is%20even%20more%20"; 5170\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5171\tfilename*2="is it not.pdf" 5172 5173''' 5174 msg = email.message_from_string(m) 5175 self.assertEqual( 5176 msg.get_filename(), 5177 'This%20is%20even%20more%20***fun*** is it not.pdf') 5178 5179 def test_rfc2231_partly_nonencoded(self): 5180 m = '''\ 5181Content-Disposition: inline; 5182\tfilename*0="This%20is%20even%20more%20"; 5183\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5184\tfilename*2="is it not.pdf" 5185 5186''' 5187 msg = email.message_from_string(m) 5188 self.assertEqual( 5189 msg.get_filename(), 5190 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5191 5192 def test_rfc2231_no_language_or_charset_in_boundary(self): 5193 m = '''\ 5194Content-Type: multipart/alternative; 5195\tboundary*0*="''This%20is%20even%20more%20"; 5196\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5197\tboundary*2="is it not.pdf" 5198 5199''' 5200 msg = email.message_from_string(m) 5201 self.assertEqual(msg.get_boundary(), 5202 'This is even more ***fun*** is it not.pdf') 5203 5204 def test_rfc2231_no_language_or_charset_in_charset(self): 5205 # This is a nonsensical charset value, but tests the code anyway 5206 m = '''\ 5207Content-Type: text/plain; 5208\tcharset*0*="This%20is%20even%20more%20"; 5209\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5210\tcharset*2="is it not.pdf" 5211 5212''' 5213 msg = email.message_from_string(m) 5214 self.assertEqual(msg.get_content_charset(), 5215 'this is even more ***fun*** is it not.pdf') 5216 5217 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5218 def test_rfc2231_bad_encoding_in_filename(self): 5219 m = '''\ 5220Content-Disposition: inline; 5221\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5222\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5223\tfilename*2="is it not.pdf" 5224 5225''' 5226 msg = email.message_from_string(m) 5227 self.assertEqual(msg.get_filename(), 5228 'This is even more ***fun*** is it not.pdf') 5229 5230 def test_rfc2231_bad_encoding_in_charset(self): 5231 m = """\ 5232Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5233 5234""" 5235 msg = email.message_from_string(m) 5236 # This should return None because non-ascii characters in the charset 5237 # are not allowed. 5238 self.assertEqual(msg.get_content_charset(), None) 5239 5240 def test_rfc2231_bad_character_in_charset(self): 5241 m = """\ 5242Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5243 5244""" 5245 msg = email.message_from_string(m) 5246 # This should return None because non-ascii characters in the charset 5247 # are not allowed. 5248 self.assertEqual(msg.get_content_charset(), None) 5249 5250 def test_rfc2231_bad_character_in_filename(self): 5251 m = '''\ 5252Content-Disposition: inline; 5253\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5254\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5255\tfilename*2*="is it not.pdf%E2" 5256 5257''' 5258 msg = email.message_from_string(m) 5259 self.assertEqual(msg.get_filename(), 5260 'This is even more ***fun*** is it not.pdf\ufffd') 5261 5262 def test_rfc2231_unknown_encoding(self): 5263 m = """\ 5264Content-Transfer-Encoding: 8bit 5265Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5266 5267""" 5268 msg = email.message_from_string(m) 5269 self.assertEqual(msg.get_filename(), 'myfile.txt') 5270 5271 def test_rfc2231_single_tick_in_filename_extended(self): 5272 eq = self.assertEqual 5273 m = """\ 5274Content-Type: application/x-foo; 5275\tname*0*=\"Frank's\"; name*1*=\" Document\" 5276 5277""" 5278 msg = email.message_from_string(m) 5279 charset, language, s = msg.get_param('name') 5280 eq(charset, None) 5281 eq(language, None) 5282 eq(s, "Frank's Document") 5283 5284 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5285 def test_rfc2231_single_tick_in_filename(self): 5286 m = """\ 5287Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5288 5289""" 5290 msg = email.message_from_string(m) 5291 param = msg.get_param('name') 5292 self.assertNotIsInstance(param, tuple) 5293 self.assertEqual(param, "Frank's Document") 5294 5295 def test_rfc2231_missing_tick(self): 5296 m = '''\ 5297Content-Disposition: inline; 5298\tfilename*0*="'This%20is%20broken"; 5299''' 5300 msg = email.message_from_string(m) 5301 self.assertEqual( 5302 msg.get_filename(), 5303 "'This is broken") 5304 5305 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5306 m = '''\ 5307Content-Disposition: inline; 5308\tfilename*0*="'This%20is%E2broken"; 5309''' 5310 msg = email.message_from_string(m) 5311 self.assertEqual( 5312 msg.get_filename(), 5313 "'This is\ufffdbroken") 5314 5315 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5316 def test_rfc2231_tick_attack_extended(self): 5317 eq = self.assertEqual 5318 m = """\ 5319Content-Type: application/x-foo; 5320\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5321 5322""" 5323 msg = email.message_from_string(m) 5324 charset, language, s = msg.get_param('name') 5325 eq(charset, 'us-ascii') 5326 eq(language, 'en-us') 5327 eq(s, "Frank's Document") 5328 5329 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5330 def test_rfc2231_tick_attack(self): 5331 m = """\ 5332Content-Type: application/x-foo; 5333\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5334 5335""" 5336 msg = email.message_from_string(m) 5337 param = msg.get_param('name') 5338 self.assertNotIsInstance(param, tuple) 5339 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5340 5341 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5342 def test_rfc2231_no_extended_values(self): 5343 eq = self.assertEqual 5344 m = """\ 5345Content-Type: application/x-foo; name=\"Frank's Document\" 5346 5347""" 5348 msg = email.message_from_string(m) 5349 eq(msg.get_param('name'), "Frank's Document") 5350 5351 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5352 def test_rfc2231_encoded_then_unencoded_segments(self): 5353 eq = self.assertEqual 5354 m = """\ 5355Content-Type: application/x-foo; 5356\tname*0*=\"us-ascii'en-us'My\"; 5357\tname*1=\" Document\"; 5358\tname*2*=\" For You\" 5359 5360""" 5361 msg = email.message_from_string(m) 5362 charset, language, s = msg.get_param('name') 5363 eq(charset, 'us-ascii') 5364 eq(language, 'en-us') 5365 eq(s, 'My Document For You') 5366 5367 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5368 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5369 def test_rfc2231_unencoded_then_encoded_segments(self): 5370 eq = self.assertEqual 5371 m = """\ 5372Content-Type: application/x-foo; 5373\tname*0=\"us-ascii'en-us'My\"; 5374\tname*1*=\" Document\"; 5375\tname*2*=\" For You\" 5376 5377""" 5378 msg = email.message_from_string(m) 5379 charset, language, s = msg.get_param('name') 5380 eq(charset, 'us-ascii') 5381 eq(language, 'en-us') 5382 eq(s, 'My Document For You') 5383 5384 def test_should_not_hang_on_invalid_ew_messages(self): 5385 messages = ["""From: user@host.com 5386To: user@host.com 5387Bad-Header: 5388 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5389 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5390 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5391 5392Hello! 5393""", """From: ����� �������� <xxx@xxx> 5394To: "xxx" <xxx@xxx> 5395Subject: ��� ���������� ����� ����� � ��������� �� ���� 5396MIME-Version: 1.0 5397Content-Type: text/plain; charset="windows-1251"; 5398Content-Transfer-Encoding: 8bit 5399 5400�� ����� � ���� ������ ��� �������� 5401"""] 5402 for m in messages: 5403 with self.subTest(m=m): 5404 msg = email.message_from_string(m) 5405 5406 5407# Tests to ensure that signed parts of an email are completely preserved, as 5408# required by RFC1847 section 2.1. Note that these are incomplete, because the 5409# email package does not currently always preserve the body. See issue 1670765. 5410class TestSigned(TestEmailBase): 5411 5412 def _msg_and_obj(self, filename): 5413 with openfile(filename) as fp: 5414 original = fp.read() 5415 msg = email.message_from_string(original) 5416 return original, msg 5417 5418 def _signed_parts_eq(self, original, result): 5419 # Extract the first mime part of each message 5420 import re 5421 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5422 inpart = repart.search(original).group(2) 5423 outpart = repart.search(result).group(2) 5424 self.assertEqual(outpart, inpart) 5425 5426 def test_long_headers_as_string(self): 5427 original, msg = self._msg_and_obj('msg_45.txt') 5428 result = msg.as_string() 5429 self._signed_parts_eq(original, result) 5430 5431 def test_long_headers_as_string_maxheaderlen(self): 5432 original, msg = self._msg_and_obj('msg_45.txt') 5433 result = msg.as_string(maxheaderlen=60) 5434 self._signed_parts_eq(original, result) 5435 5436 def test_long_headers_flatten(self): 5437 original, msg = self._msg_and_obj('msg_45.txt') 5438 fp = StringIO() 5439 Generator(fp).flatten(msg) 5440 result = fp.getvalue() 5441 self._signed_parts_eq(original, result) 5442 5443 5444 5445if __name__ == '__main__': 5446 unittest.main() 5447