1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: email-sig@python.org 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10 11from io import StringIO, BytesIO 12from itertools import chain 13from random import choice 14from threading import Thread 15from unittest.mock import patch 16 17import email 18import email.policy 19 20from email.charset import Charset 21from email.header import Header, decode_header, make_header 22from email.parser import Parser, HeaderParser 23from email.generator import Generator, DecodedGenerator, BytesGenerator 24from email.message import Message 25from email.mime.application import MIMEApplication 26from email.mime.audio import MIMEAudio 27from email.mime.text import MIMEText 28from email.mime.image import MIMEImage 29from email.mime.base import MIMEBase 30from email.mime.message import MIMEMessage 31from email.mime.multipart import MIMEMultipart 32from email.mime.nonmultipart import MIMENonMultipart 33from email import utils 34from email import errors 35from email import encoders 36from email import iterators 37from email import base64mime 38from email import quoprimime 39 40from test.support import unlink, start_threads 41from test.test_email import openfile, TestEmailBase 42 43# These imports are documented to work, but we are testing them using a 44# different path, so we import them here just to make sure they are importable. 45from email.parser import FeedParser, BytesFeedParser 46 47NL = '\n' 48EMPTYSTRING = '' 49SPACE = ' ' 50 51 52# Test various aspects of the Message class's API 53class TestMessageAPI(TestEmailBase): 54 def test_get_all(self): 55 eq = self.assertEqual 56 msg = self._msgobj('msg_20.txt') 57 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) 58 eq(msg.get_all('xx', 'n/a'), 'n/a') 59 60 def test_getset_charset(self): 61 eq = self.assertEqual 62 msg = Message() 63 eq(msg.get_charset(), None) 64 charset = Charset('iso-8859-1') 65 msg.set_charset(charset) 66 eq(msg['mime-version'], '1.0') 67 eq(msg.get_content_type(), 'text/plain') 68 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 69 eq(msg.get_param('charset'), 'iso-8859-1') 70 eq(msg['content-transfer-encoding'], 'quoted-printable') 71 eq(msg.get_charset().input_charset, 'iso-8859-1') 72 # Remove the charset 73 msg.set_charset(None) 74 eq(msg.get_charset(), None) 75 eq(msg['content-type'], 'text/plain') 76 # Try adding a charset when there's already MIME headers present 77 msg = Message() 78 msg['MIME-Version'] = '2.0' 79 msg['Content-Type'] = 'text/x-weird' 80 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 81 msg.set_charset(charset) 82 eq(msg['mime-version'], '2.0') 83 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 84 eq(msg['content-transfer-encoding'], 'quinted-puntable') 85 86 def test_set_charset_from_string(self): 87 eq = self.assertEqual 88 msg = Message() 89 msg.set_charset('us-ascii') 90 eq(msg.get_charset().input_charset, 'us-ascii') 91 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 92 93 def test_set_payload_with_charset(self): 94 msg = Message() 95 charset = Charset('iso-8859-1') 96 msg.set_payload('This is a string payload', charset) 97 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 98 99 def test_set_payload_with_8bit_data_and_charset(self): 100 data = b'\xd0\x90\xd0\x91\xd0\x92' 101 charset = Charset('utf-8') 102 msg = Message() 103 msg.set_payload(data, charset) 104 self.assertEqual(msg['content-transfer-encoding'], 'base64') 105 self.assertEqual(msg.get_payload(decode=True), data) 106 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 107 108 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 109 data = b'\xd0\x90\xd0\x91\xd0\x92' 110 charset = Charset('utf-8') 111 charset.body_encoding = None # Disable base64 encoding 112 msg = Message() 113 msg.set_payload(data.decode('utf-8'), charset) 114 self.assertEqual(msg['content-transfer-encoding'], '8bit') 115 self.assertEqual(msg.get_payload(decode=True), data) 116 117 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 118 data = b'\xd0\x90\xd0\x91\xd0\x92' 119 charset = Charset('utf-8') 120 charset.body_encoding = None # Disable base64 encoding 121 msg = Message() 122 msg.set_payload(data, charset) 123 self.assertEqual(msg['content-transfer-encoding'], '8bit') 124 self.assertEqual(msg.get_payload(decode=True), data) 125 126 def test_set_payload_to_list(self): 127 msg = Message() 128 msg.set_payload([]) 129 self.assertEqual(msg.get_payload(), []) 130 131 def test_attach_when_payload_is_string(self): 132 msg = Message() 133 msg['Content-Type'] = 'multipart/mixed' 134 msg.set_payload('string payload') 135 sub_msg = MIMEMessage(Message()) 136 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 137 msg.attach, sub_msg) 138 139 def test_get_charsets(self): 140 eq = self.assertEqual 141 142 msg = self._msgobj('msg_08.txt') 143 charsets = msg.get_charsets() 144 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 145 146 msg = self._msgobj('msg_09.txt') 147 charsets = msg.get_charsets('dingbat') 148 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 149 'koi8-r']) 150 151 msg = self._msgobj('msg_12.txt') 152 charsets = msg.get_charsets() 153 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 154 'iso-8859-3', 'us-ascii', 'koi8-r']) 155 156 def test_get_filename(self): 157 eq = self.assertEqual 158 159 msg = self._msgobj('msg_04.txt') 160 filenames = [p.get_filename() for p in msg.get_payload()] 161 eq(filenames, ['msg.txt', 'msg.txt']) 162 163 msg = self._msgobj('msg_07.txt') 164 subpart = msg.get_payload(1) 165 eq(subpart.get_filename(), 'dingusfish.gif') 166 167 def test_get_filename_with_name_parameter(self): 168 eq = self.assertEqual 169 170 msg = self._msgobj('msg_44.txt') 171 filenames = [p.get_filename() for p in msg.get_payload()] 172 eq(filenames, ['msg.txt', 'msg.txt']) 173 174 def test_get_boundary(self): 175 eq = self.assertEqual 176 msg = self._msgobj('msg_07.txt') 177 # No quotes! 178 eq(msg.get_boundary(), 'BOUNDARY') 179 180 def test_set_boundary(self): 181 eq = self.assertEqual 182 # This one has no existing boundary parameter, but the Content-Type: 183 # header appears fifth. 184 msg = self._msgobj('msg_01.txt') 185 msg.set_boundary('BOUNDARY') 186 header, value = msg.items()[4] 187 eq(header.lower(), 'content-type') 188 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 189 # This one has a Content-Type: header, with a boundary, stuck in the 190 # middle of its headers. Make sure the order is preserved; it should 191 # be fifth. 192 msg = self._msgobj('msg_04.txt') 193 msg.set_boundary('BOUNDARY') 194 header, value = msg.items()[4] 195 eq(header.lower(), 'content-type') 196 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 197 # And this one has no Content-Type: header at all. 198 msg = self._msgobj('msg_03.txt') 199 self.assertRaises(errors.HeaderParseError, 200 msg.set_boundary, 'BOUNDARY') 201 202 def test_make_boundary(self): 203 msg = MIMEMultipart('form-data') 204 # Note that when the boundary gets created is an implementation 205 # detail and might change. 206 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 207 # Trigger creation of boundary 208 msg.as_string() 209 self.assertEqual(msg.items()[0][1][:33], 210 'multipart/form-data; boundary="==') 211 # XXX: there ought to be tests of the uniqueness of the boundary, too. 212 213 def test_message_rfc822_only(self): 214 # Issue 7970: message/rfc822 not in multipart parsed by 215 # HeaderParser caused an exception when flattened. 216 with openfile('msg_46.txt') as fp: 217 msgdata = fp.read() 218 parser = HeaderParser() 219 msg = parser.parsestr(msgdata) 220 out = StringIO() 221 gen = Generator(out, True, 0) 222 gen.flatten(msg, False) 223 self.assertEqual(out.getvalue(), msgdata) 224 225 def test_byte_message_rfc822_only(self): 226 # Make sure new bytes header parser also passes this. 227 with openfile('msg_46.txt') as fp: 228 msgdata = fp.read().encode('ascii') 229 parser = email.parser.BytesHeaderParser() 230 msg = parser.parsebytes(msgdata) 231 out = BytesIO() 232 gen = email.generator.BytesGenerator(out) 233 gen.flatten(msg) 234 self.assertEqual(out.getvalue(), msgdata) 235 236 def test_get_decoded_payload(self): 237 eq = self.assertEqual 238 msg = self._msgobj('msg_10.txt') 239 # The outer message is a multipart 240 eq(msg.get_payload(decode=True), None) 241 # Subpart 1 is 7bit encoded 242 eq(msg.get_payload(0).get_payload(decode=True), 243 b'This is a 7bit encoded message.\n') 244 # Subpart 2 is quopri 245 eq(msg.get_payload(1).get_payload(decode=True), 246 b'\xa1This is a Quoted Printable encoded message!\n') 247 # Subpart 3 is base64 248 eq(msg.get_payload(2).get_payload(decode=True), 249 b'This is a Base64 encoded message.') 250 # Subpart 4 is base64 with a trailing newline, which 251 # used to be stripped (issue 7143). 252 eq(msg.get_payload(3).get_payload(decode=True), 253 b'This is a Base64 encoded message.\n') 254 # Subpart 5 has no Content-Transfer-Encoding: header. 255 eq(msg.get_payload(4).get_payload(decode=True), 256 b'This has no Content-Transfer-Encoding: header.\n') 257 258 def test_get_decoded_uu_payload(self): 259 eq = self.assertEqual 260 msg = Message() 261 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 262 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 263 msg['content-transfer-encoding'] = cte 264 eq(msg.get_payload(decode=True), b'hello world') 265 # Now try some bogus data 266 msg.set_payload('foo') 267 eq(msg.get_payload(decode=True), b'foo') 268 269 def test_get_payload_n_raises_on_non_multipart(self): 270 msg = Message() 271 self.assertRaises(TypeError, msg.get_payload, 1) 272 273 def test_decoded_generator(self): 274 eq = self.assertEqual 275 msg = self._msgobj('msg_07.txt') 276 with openfile('msg_17.txt') as fp: 277 text = fp.read() 278 s = StringIO() 279 g = DecodedGenerator(s) 280 g.flatten(msg) 281 eq(s.getvalue(), text) 282 283 def test__contains__(self): 284 msg = Message() 285 msg['From'] = 'Me' 286 msg['to'] = 'You' 287 # Check for case insensitivity 288 self.assertIn('from', msg) 289 self.assertIn('From', msg) 290 self.assertIn('FROM', msg) 291 self.assertIn('to', msg) 292 self.assertIn('To', msg) 293 self.assertIn('TO', msg) 294 295 def test_as_string(self): 296 msg = self._msgobj('msg_01.txt') 297 with openfile('msg_01.txt') as fp: 298 text = fp.read() 299 self.assertEqual(text, str(msg)) 300 fullrepr = msg.as_string(unixfrom=True) 301 lines = fullrepr.split('\n') 302 self.assertTrue(lines[0].startswith('From ')) 303 self.assertEqual(text, NL.join(lines[1:])) 304 305 def test_as_string_policy(self): 306 msg = self._msgobj('msg_01.txt') 307 newpolicy = msg.policy.clone(linesep='\r\n') 308 fullrepr = msg.as_string(policy=newpolicy) 309 s = StringIO() 310 g = Generator(s, policy=newpolicy) 311 g.flatten(msg) 312 self.assertEqual(fullrepr, s.getvalue()) 313 314 def test_as_bytes(self): 315 msg = self._msgobj('msg_01.txt') 316 with openfile('msg_01.txt') as fp: 317 data = fp.read().encode('ascii') 318 self.assertEqual(data, bytes(msg)) 319 fullrepr = msg.as_bytes(unixfrom=True) 320 lines = fullrepr.split(b'\n') 321 self.assertTrue(lines[0].startswith(b'From ')) 322 self.assertEqual(data, b'\n'.join(lines[1:])) 323 324 def test_as_bytes_policy(self): 325 msg = self._msgobj('msg_01.txt') 326 newpolicy = msg.policy.clone(linesep='\r\n') 327 fullrepr = msg.as_bytes(policy=newpolicy) 328 s = BytesIO() 329 g = BytesGenerator(s,policy=newpolicy) 330 g.flatten(msg) 331 self.assertEqual(fullrepr, s.getvalue()) 332 333 # test_headerregistry.TestContentTypeHeader.bad_params 334 def test_bad_param(self): 335 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 336 self.assertEqual(msg.get_param('baz'), '') 337 338 def test_missing_filename(self): 339 msg = email.message_from_string("From: foo\n") 340 self.assertEqual(msg.get_filename(), None) 341 342 def test_bogus_filename(self): 343 msg = email.message_from_string( 344 "Content-Disposition: blarg; filename\n") 345 self.assertEqual(msg.get_filename(), '') 346 347 def test_missing_boundary(self): 348 msg = email.message_from_string("From: foo\n") 349 self.assertEqual(msg.get_boundary(), None) 350 351 def test_get_params(self): 352 eq = self.assertEqual 353 msg = email.message_from_string( 354 'X-Header: foo=one; bar=two; baz=three\n') 355 eq(msg.get_params(header='x-header'), 356 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 357 msg = email.message_from_string( 358 'X-Header: foo; bar=one; baz=two\n') 359 eq(msg.get_params(header='x-header'), 360 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 361 eq(msg.get_params(), None) 362 msg = email.message_from_string( 363 'X-Header: foo; bar="one"; baz=two\n') 364 eq(msg.get_params(header='x-header'), 365 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 366 367 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 368 def test_get_param_liberal(self): 369 msg = Message() 370 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 371 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 372 373 def test_get_param(self): 374 eq = self.assertEqual 375 msg = email.message_from_string( 376 "X-Header: foo=one; bar=two; baz=three\n") 377 eq(msg.get_param('bar', header='x-header'), 'two') 378 eq(msg.get_param('quuz', header='x-header'), None) 379 eq(msg.get_param('quuz'), None) 380 msg = email.message_from_string( 381 'X-Header: foo; bar="one"; baz=two\n') 382 eq(msg.get_param('foo', header='x-header'), '') 383 eq(msg.get_param('bar', header='x-header'), 'one') 384 eq(msg.get_param('baz', header='x-header'), 'two') 385 # XXX: We are not RFC-2045 compliant! We cannot parse: 386 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 387 # msg.get_param("weird") 388 # yet. 389 390 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 391 def test_get_param_funky_continuation_lines(self): 392 msg = self._msgobj('msg_22.txt') 393 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 394 395 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 396 def test_get_param_with_semis_in_quotes(self): 397 msg = email.message_from_string( 398 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 399 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 400 self.assertEqual(msg.get_param('name', unquote=False), 401 '"Jim&&Jill"') 402 403 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 404 def test_get_param_with_quotes(self): 405 msg = email.message_from_string( 406 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 407 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 408 msg = email.message_from_string( 409 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 410 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 411 412 def test_field_containment(self): 413 msg = email.message_from_string('Header: exists') 414 self.assertIn('header', msg) 415 self.assertIn('Header', msg) 416 self.assertIn('HEADER', msg) 417 self.assertNotIn('headerx', msg) 418 419 def test_set_param(self): 420 eq = self.assertEqual 421 msg = Message() 422 msg.set_param('charset', 'iso-2022-jp') 423 eq(msg.get_param('charset'), 'iso-2022-jp') 424 msg.set_param('importance', 'high value') 425 eq(msg.get_param('importance'), 'high value') 426 eq(msg.get_param('importance', unquote=False), '"high value"') 427 eq(msg.get_params(), [('text/plain', ''), 428 ('charset', 'iso-2022-jp'), 429 ('importance', 'high value')]) 430 eq(msg.get_params(unquote=False), [('text/plain', ''), 431 ('charset', '"iso-2022-jp"'), 432 ('importance', '"high value"')]) 433 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 434 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 435 436 def test_del_param(self): 437 eq = self.assertEqual 438 msg = self._msgobj('msg_05.txt') 439 eq(msg.get_params(), 440 [('multipart/report', ''), ('report-type', 'delivery-status'), 441 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 442 old_val = msg.get_param("report-type") 443 msg.del_param("report-type") 444 eq(msg.get_params(), 445 [('multipart/report', ''), 446 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 447 msg.set_param("report-type", old_val) 448 eq(msg.get_params(), 449 [('multipart/report', ''), 450 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 451 ('report-type', old_val)]) 452 453 def test_del_param_on_other_header(self): 454 msg = Message() 455 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 456 msg.del_param('filename', 'content-disposition') 457 self.assertEqual(msg['content-disposition'], 'attachment') 458 459 def test_del_param_on_nonexistent_header(self): 460 msg = Message() 461 # Deleting param on empty msg should not raise exception. 462 msg.del_param('filename', 'content-disposition') 463 464 def test_del_nonexistent_param(self): 465 msg = Message() 466 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 467 existing_header = msg['Content-Type'] 468 msg.del_param('foobar', header='Content-Type') 469 self.assertEqual(msg['Content-Type'], existing_header) 470 471 def test_set_type(self): 472 eq = self.assertEqual 473 msg = Message() 474 self.assertRaises(ValueError, msg.set_type, 'text') 475 msg.set_type('text/plain') 476 eq(msg['content-type'], 'text/plain') 477 msg.set_param('charset', 'us-ascii') 478 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 479 msg.set_type('text/html') 480 eq(msg['content-type'], 'text/html; charset="us-ascii"') 481 482 def test_set_type_on_other_header(self): 483 msg = Message() 484 msg['X-Content-Type'] = 'text/plain' 485 msg.set_type('application/octet-stream', 'X-Content-Type') 486 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 487 488 def test_get_content_type_missing(self): 489 msg = Message() 490 self.assertEqual(msg.get_content_type(), 'text/plain') 491 492 def test_get_content_type_missing_with_default_type(self): 493 msg = Message() 494 msg.set_default_type('message/rfc822') 495 self.assertEqual(msg.get_content_type(), 'message/rfc822') 496 497 def test_get_content_type_from_message_implicit(self): 498 msg = self._msgobj('msg_30.txt') 499 self.assertEqual(msg.get_payload(0).get_content_type(), 500 'message/rfc822') 501 502 def test_get_content_type_from_message_explicit(self): 503 msg = self._msgobj('msg_28.txt') 504 self.assertEqual(msg.get_payload(0).get_content_type(), 505 'message/rfc822') 506 507 def test_get_content_type_from_message_text_plain_implicit(self): 508 msg = self._msgobj('msg_03.txt') 509 self.assertEqual(msg.get_content_type(), 'text/plain') 510 511 def test_get_content_type_from_message_text_plain_explicit(self): 512 msg = self._msgobj('msg_01.txt') 513 self.assertEqual(msg.get_content_type(), 'text/plain') 514 515 def test_get_content_maintype_missing(self): 516 msg = Message() 517 self.assertEqual(msg.get_content_maintype(), 'text') 518 519 def test_get_content_maintype_missing_with_default_type(self): 520 msg = Message() 521 msg.set_default_type('message/rfc822') 522 self.assertEqual(msg.get_content_maintype(), 'message') 523 524 def test_get_content_maintype_from_message_implicit(self): 525 msg = self._msgobj('msg_30.txt') 526 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 527 528 def test_get_content_maintype_from_message_explicit(self): 529 msg = self._msgobj('msg_28.txt') 530 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 531 532 def test_get_content_maintype_from_message_text_plain_implicit(self): 533 msg = self._msgobj('msg_03.txt') 534 self.assertEqual(msg.get_content_maintype(), 'text') 535 536 def test_get_content_maintype_from_message_text_plain_explicit(self): 537 msg = self._msgobj('msg_01.txt') 538 self.assertEqual(msg.get_content_maintype(), 'text') 539 540 def test_get_content_subtype_missing(self): 541 msg = Message() 542 self.assertEqual(msg.get_content_subtype(), 'plain') 543 544 def test_get_content_subtype_missing_with_default_type(self): 545 msg = Message() 546 msg.set_default_type('message/rfc822') 547 self.assertEqual(msg.get_content_subtype(), 'rfc822') 548 549 def test_get_content_subtype_from_message_implicit(self): 550 msg = self._msgobj('msg_30.txt') 551 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 552 553 def test_get_content_subtype_from_message_explicit(self): 554 msg = self._msgobj('msg_28.txt') 555 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 556 557 def test_get_content_subtype_from_message_text_plain_implicit(self): 558 msg = self._msgobj('msg_03.txt') 559 self.assertEqual(msg.get_content_subtype(), 'plain') 560 561 def test_get_content_subtype_from_message_text_plain_explicit(self): 562 msg = self._msgobj('msg_01.txt') 563 self.assertEqual(msg.get_content_subtype(), 'plain') 564 565 def test_get_content_maintype_error(self): 566 msg = Message() 567 msg['Content-Type'] = 'no-slash-in-this-string' 568 self.assertEqual(msg.get_content_maintype(), 'text') 569 570 def test_get_content_subtype_error(self): 571 msg = Message() 572 msg['Content-Type'] = 'no-slash-in-this-string' 573 self.assertEqual(msg.get_content_subtype(), 'plain') 574 575 def test_replace_header(self): 576 eq = self.assertEqual 577 msg = Message() 578 msg.add_header('First', 'One') 579 msg.add_header('Second', 'Two') 580 msg.add_header('Third', 'Three') 581 eq(msg.keys(), ['First', 'Second', 'Third']) 582 eq(msg.values(), ['One', 'Two', 'Three']) 583 msg.replace_header('Second', 'Twenty') 584 eq(msg.keys(), ['First', 'Second', 'Third']) 585 eq(msg.values(), ['One', 'Twenty', 'Three']) 586 msg.add_header('First', 'Eleven') 587 msg.replace_header('First', 'One Hundred') 588 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 589 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 590 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 591 592 def test_get_content_disposition(self): 593 msg = Message() 594 self.assertIsNone(msg.get_content_disposition()) 595 msg.add_header('Content-Disposition', 'attachment', 596 filename='random.avi') 597 self.assertEqual(msg.get_content_disposition(), 'attachment') 598 msg.replace_header('Content-Disposition', 'inline') 599 self.assertEqual(msg.get_content_disposition(), 'inline') 600 msg.replace_header('Content-Disposition', 'InlinE') 601 self.assertEqual(msg.get_content_disposition(), 'inline') 602 603 # test_defect_handling:test_invalid_chars_in_base64_payload 604 def test_broken_base64_payload(self): 605 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 606 msg = Message() 607 msg['content-type'] = 'audio/x-midi' 608 msg['content-transfer-encoding'] = 'base64' 609 msg.set_payload(x) 610 self.assertEqual(msg.get_payload(decode=True), 611 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 612 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 613 self.assertIsInstance(msg.defects[0], 614 errors.InvalidBase64CharactersDefect) 615 616 def test_broken_unicode_payload(self): 617 # This test improves coverage but is not a compliance test. 618 # The behavior in this situation is currently undefined by the API. 619 x = 'this is a br\xf6ken thing to do' 620 msg = Message() 621 msg['content-type'] = 'text/plain' 622 msg['content-transfer-encoding'] = '8bit' 623 msg.set_payload(x) 624 self.assertEqual(msg.get_payload(decode=True), 625 bytes(x, 'raw-unicode-escape')) 626 627 def test_questionable_bytes_payload(self): 628 # This test improves coverage but is not a compliance test, 629 # since it involves poking inside the black box. 630 x = 'this is a quéstionable thing to do'.encode('utf-8') 631 msg = Message() 632 msg['content-type'] = 'text/plain; charset="utf-8"' 633 msg['content-transfer-encoding'] = '8bit' 634 msg._payload = x 635 self.assertEqual(msg.get_payload(decode=True), x) 636 637 # Issue 1078919 638 def test_ascii_add_header(self): 639 msg = Message() 640 msg.add_header('Content-Disposition', 'attachment', 641 filename='bud.gif') 642 self.assertEqual('attachment; filename="bud.gif"', 643 msg['Content-Disposition']) 644 645 def test_noascii_add_header(self): 646 msg = Message() 647 msg.add_header('Content-Disposition', 'attachment', 648 filename="Fußballer.ppt") 649 self.assertEqual( 650 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 651 msg['Content-Disposition']) 652 653 def test_nonascii_add_header_via_triple(self): 654 msg = Message() 655 msg.add_header('Content-Disposition', 'attachment', 656 filename=('iso-8859-1', '', 'Fußballer.ppt')) 657 self.assertEqual( 658 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 659 msg['Content-Disposition']) 660 661 def test_ascii_add_header_with_tspecial(self): 662 msg = Message() 663 msg.add_header('Content-Disposition', 'attachment', 664 filename="windows [filename].ppt") 665 self.assertEqual( 666 'attachment; filename="windows [filename].ppt"', 667 msg['Content-Disposition']) 668 669 def test_nonascii_add_header_with_tspecial(self): 670 msg = Message() 671 msg.add_header('Content-Disposition', 'attachment', 672 filename="Fußballer [filename].ppt") 673 self.assertEqual( 674 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 675 msg['Content-Disposition']) 676 677 def test_binary_quopri_payload(self): 678 for charset in ('latin-1', 'ascii'): 679 msg = Message() 680 msg['content-type'] = 'text/plain; charset=%s' % charset 681 msg['content-transfer-encoding'] = 'quoted-printable' 682 msg.set_payload(b'foo=e6=96=87bar') 683 self.assertEqual( 684 msg.get_payload(decode=True), 685 b'foo\xe6\x96\x87bar', 686 'get_payload returns wrong result with charset %s.' % charset) 687 688 def test_binary_base64_payload(self): 689 for charset in ('latin-1', 'ascii'): 690 msg = Message() 691 msg['content-type'] = 'text/plain; charset=%s' % charset 692 msg['content-transfer-encoding'] = 'base64' 693 msg.set_payload(b'Zm9v5paHYmFy') 694 self.assertEqual( 695 msg.get_payload(decode=True), 696 b'foo\xe6\x96\x87bar', 697 'get_payload returns wrong result with charset %s.' % charset) 698 699 def test_binary_uuencode_payload(self): 700 for charset in ('latin-1', 'ascii'): 701 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 702 msg = Message() 703 msg['content-type'] = 'text/plain; charset=%s' % charset 704 msg['content-transfer-encoding'] = encoding 705 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 706 self.assertEqual( 707 msg.get_payload(decode=True), 708 b'foo\xe6\x96\x87bar', 709 str(('get_payload returns wrong result ', 710 'with charset {0} and encoding {1}.')).\ 711 format(charset, encoding)) 712 713 def test_add_header_with_name_only_param(self): 714 msg = Message() 715 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 716 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 717 718 def test_add_header_with_no_value(self): 719 msg = Message() 720 msg.add_header('X-Status', None) 721 self.assertEqual('', msg['X-Status']) 722 723 # Issue 5871: reject an attempt to embed a header inside a header value 724 # (header injection attack). 725 def test_embedded_header_via_Header_rejected(self): 726 msg = Message() 727 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 728 self.assertRaises(errors.HeaderParseError, msg.as_string) 729 730 def test_embedded_header_via_string_rejected(self): 731 msg = Message() 732 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 733 self.assertRaises(errors.HeaderParseError, msg.as_string) 734 735 def test_unicode_header_defaults_to_utf8_encoding(self): 736 # Issue 14291 737 m = MIMEText('abc\n') 738 m['Subject'] = 'É test' 739 self.assertEqual(str(m),textwrap.dedent("""\ 740 Content-Type: text/plain; charset="us-ascii" 741 MIME-Version: 1.0 742 Content-Transfer-Encoding: 7bit 743 Subject: =?utf-8?q?=C3=89_test?= 744 745 abc 746 """)) 747 748 def test_unicode_body_defaults_to_utf8_encoding(self): 749 # Issue 14291 750 m = MIMEText('É testabc\n') 751 self.assertEqual(str(m),textwrap.dedent("""\ 752 Content-Type: text/plain; charset="utf-8" 753 MIME-Version: 1.0 754 Content-Transfer-Encoding: base64 755 756 w4kgdGVzdGFiYwo= 757 """)) 758 759 760# Test the email.encoders module 761class TestEncoders(unittest.TestCase): 762 763 def test_EncodersEncode_base64(self): 764 with openfile('PyBanner048.gif', 'rb') as fp: 765 bindata = fp.read() 766 mimed = email.mime.image.MIMEImage(bindata) 767 base64ed = mimed.get_payload() 768 # the transfer-encoded body lines should all be <=76 characters 769 lines = base64ed.split('\n') 770 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 771 772 def test_encode_empty_payload(self): 773 eq = self.assertEqual 774 msg = Message() 775 msg.set_charset('us-ascii') 776 eq(msg['content-transfer-encoding'], '7bit') 777 778 def test_default_cte(self): 779 eq = self.assertEqual 780 # 7bit data and the default us-ascii _charset 781 msg = MIMEText('hello world') 782 eq(msg['content-transfer-encoding'], '7bit') 783 # Similar, but with 8bit data 784 msg = MIMEText('hello \xf8 world') 785 eq(msg['content-transfer-encoding'], 'base64') 786 # And now with a different charset 787 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 788 eq(msg['content-transfer-encoding'], 'quoted-printable') 789 790 def test_encode7or8bit(self): 791 # Make sure a charset whose input character set is 8bit but 792 # whose output character set is 7bit gets a transfer-encoding 793 # of 7bit. 794 eq = self.assertEqual 795 msg = MIMEText('文\n', _charset='euc-jp') 796 eq(msg['content-transfer-encoding'], '7bit') 797 eq(msg.as_string(), textwrap.dedent("""\ 798 MIME-Version: 1.0 799 Content-Type: text/plain; charset="iso-2022-jp" 800 Content-Transfer-Encoding: 7bit 801 802 \x1b$BJ8\x1b(B 803 """)) 804 805 def test_qp_encode_latin1(self): 806 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 807 self.assertEqual(str(msg), textwrap.dedent("""\ 808 MIME-Version: 1.0 809 Content-Type: text/text; charset="iso-8859-1" 810 Content-Transfer-Encoding: quoted-printable 811 812 =E1=F6 813 """)) 814 815 def test_qp_encode_non_latin1(self): 816 # Issue 16948 817 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 818 self.assertEqual(str(msg), textwrap.dedent("""\ 819 MIME-Version: 1.0 820 Content-Type: text/text; charset="iso-8859-2" 821 Content-Transfer-Encoding: quoted-printable 822 823 =BF 824 """)) 825 826 827# Test long header wrapping 828class TestLongHeaders(TestEmailBase): 829 830 maxDiff = None 831 832 def test_split_long_continuation(self): 833 eq = self.ndiffAssertEqual 834 msg = email.message_from_string("""\ 835Subject: bug demonstration 836\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 837\tmore text 838 839test 840""") 841 sfp = StringIO() 842 g = Generator(sfp) 843 g.flatten(msg) 844 eq(sfp.getvalue(), """\ 845Subject: bug demonstration 846\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 847\tmore text 848 849test 850""") 851 852 def test_another_long_almost_unsplittable_header(self): 853 eq = self.ndiffAssertEqual 854 hstr = """\ 855bug demonstration 856\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 857\tmore text""" 858 h = Header(hstr, continuation_ws='\t') 859 eq(h.encode(), """\ 860bug demonstration 861\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 862\tmore text""") 863 h = Header(hstr.replace('\t', ' ')) 864 eq(h.encode(), """\ 865bug demonstration 866 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 867 more text""") 868 869 def test_long_nonstring(self): 870 eq = self.ndiffAssertEqual 871 g = Charset("iso-8859-1") 872 cz = Charset("iso-8859-2") 873 utf8 = Charset("utf-8") 874 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 875 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 876 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 877 b'bef\xf6rdert. ') 878 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 879 b'd\xf9vtipu.. ') 880 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 881 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 882 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 883 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 884 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 885 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 886 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 887 '\u3044\u307e\u3059\u3002') 888 h = Header(g_head, g, header_name='Subject') 889 h.append(cz_head, cz) 890 h.append(utf8_head, utf8) 891 msg = Message() 892 msg['Subject'] = h 893 sfp = StringIO() 894 g = Generator(sfp) 895 g.flatten(msg) 896 eq(sfp.getvalue(), """\ 897Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 898 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 899 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 900 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 901 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 902 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 903 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 904 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 905 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 906 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 907 =?utf-8?b?44CC?= 908 909""") 910 eq(h.encode(maxlinelen=76), """\ 911=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 912 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 913 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 914 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 915 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 916 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 917 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 918 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 919 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 920 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 921 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 922 923 def test_long_header_encode(self): 924 eq = self.ndiffAssertEqual 925 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 926 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 927 header_name='X-Foobar-Spoink-Defrobnit') 928 eq(h.encode(), '''\ 929wasnipoop; giraffes="very-long-necked-animals"; 930 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 931 932 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 933 eq = self.ndiffAssertEqual 934 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 935 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 936 header_name='X-Foobar-Spoink-Defrobnit', 937 continuation_ws='\t') 938 eq(h.encode(), '''\ 939wasnipoop; giraffes="very-long-necked-animals"; 940 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 941 942 def test_long_header_encode_with_tab_continuation(self): 943 eq = self.ndiffAssertEqual 944 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 945 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 946 header_name='X-Foobar-Spoink-Defrobnit', 947 continuation_ws='\t') 948 eq(h.encode(), '''\ 949wasnipoop; giraffes="very-long-necked-animals"; 950\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 951 952 def test_header_encode_with_different_output_charset(self): 953 h = Header('文', 'euc-jp') 954 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 955 956 def test_long_header_encode_with_different_output_charset(self): 957 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 958 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 959 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 960 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 961 res = """\ 962=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 963 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 964 self.assertEqual(h.encode(), res) 965 966 def test_header_splitter(self): 967 eq = self.ndiffAssertEqual 968 msg = MIMEText('') 969 # It'd be great if we could use add_header() here, but that doesn't 970 # guarantee an order of the parameters. 971 msg['X-Foobar-Spoink-Defrobnit'] = ( 972 'wasnipoop; giraffes="very-long-necked-animals"; ' 973 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 974 sfp = StringIO() 975 g = Generator(sfp) 976 g.flatten(msg) 977 eq(sfp.getvalue(), '''\ 978Content-Type: text/plain; charset="us-ascii" 979MIME-Version: 1.0 980Content-Transfer-Encoding: 7bit 981X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 982 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 983 984''') 985 986 def test_no_semis_header_splitter(self): 987 eq = self.ndiffAssertEqual 988 msg = Message() 989 msg['From'] = 'test@dom.ain' 990 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) 991 msg.set_payload('Test') 992 sfp = StringIO() 993 g = Generator(sfp) 994 g.flatten(msg) 995 eq(sfp.getvalue(), """\ 996From: test@dom.ain 997References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> 998 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> 999 1000Test""") 1001 1002 def test_last_split_chunk_does_not_fit(self): 1003 eq = self.ndiffAssertEqual 1004 h = Header('Subject: the first part of this is short, but_the_second' 1005 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1006 '_all_by_itself') 1007 eq(h.encode(), """\ 1008Subject: the first part of this is short, 1009 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1010 1011 def test_splittable_leading_char_followed_by_overlong_unsplitable(self): 1012 eq = self.ndiffAssertEqual 1013 h = Header(', but_the_second' 1014 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1015 '_all_by_itself') 1016 eq(h.encode(), """\ 1017, 1018 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1019 1020 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self): 1021 eq = self.ndiffAssertEqual 1022 h = Header(', , but_the_second' 1023 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1024 '_all_by_itself') 1025 eq(h.encode(), """\ 1026, , 1027 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1028 1029 def test_trailing_splitable_on_overlong_unsplitable(self): 1030 eq = self.ndiffAssertEqual 1031 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1032 'be_on_a_line_all_by_itself;') 1033 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1034 "be_on_a_line_all_by_itself;") 1035 1036 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self): 1037 eq = self.ndiffAssertEqual 1038 h = Header('; ' 1039 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1040 'be_on_a_line_all_by_itself; ') 1041 eq(h.encode(), """\ 1042; 1043 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1044 1045 def test_long_header_with_multiple_sequential_split_chars(self): 1046 eq = self.ndiffAssertEqual 1047 h = Header('This is a long line that has two whitespaces in a row. ' 1048 'This used to cause truncation of the header when folded') 1049 eq(h.encode(), """\ 1050This is a long line that has two whitespaces in a row. This used to cause 1051 truncation of the header when folded""") 1052 1053 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1054 eq = self.ndiffAssertEqual 1055 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1056 'they;arenotlegal;fold,points') 1057 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1058 "arenotlegal;fold,points") 1059 1060 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1061 eq = self.ndiffAssertEqual 1062 h = Header('this is a test where we need to have more than one line ' 1063 'before; our final line that is just too big to fit;; ' 1064 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1065 'be_on_a_line_all_by_itself;') 1066 eq(h.encode(), """\ 1067this is a test where we need to have more than one line before; 1068 our final line that is just too big to fit;; 1069 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1070 1071 def test_overlong_last_part_followed_by_split_point(self): 1072 eq = self.ndiffAssertEqual 1073 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1074 'be_on_a_line_all_by_itself ') 1075 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1076 "should_be_on_a_line_all_by_itself ") 1077 1078 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1079 eq = self.ndiffAssertEqual 1080 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1081 'before_our_final_line_; ; ' 1082 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1083 'be_on_a_line_all_by_itself; ') 1084 eq(h.encode(), """\ 1085this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1086 ; 1087 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1088 1089 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1090 eq = self.ndiffAssertEqual 1091 h = Header('this is a test where we need to have more than one line ' 1092 'before our final line; ; ' 1093 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1094 'be_on_a_line_all_by_itself; ') 1095 eq(h.encode(), """\ 1096this is a test where we need to have more than one line before our final line; 1097 ; 1098 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1099 1100 def test_long_header_with_whitespace_runs(self): 1101 eq = self.ndiffAssertEqual 1102 msg = Message() 1103 msg['From'] = 'test@dom.ain' 1104 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) 1105 msg.set_payload('Test') 1106 sfp = StringIO() 1107 g = Generator(sfp) 1108 g.flatten(msg) 1109 eq(sfp.getvalue(), """\ 1110From: test@dom.ain 1111References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1112 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1113 <foo@dom.ain> <foo@dom.ain>\x20\x20 1114 1115Test""") 1116 1117 def test_long_run_with_semi_header_splitter(self): 1118 eq = self.ndiffAssertEqual 1119 msg = Message() 1120 msg['From'] = 'test@dom.ain' 1121 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' 1122 msg.set_payload('Test') 1123 sfp = StringIO() 1124 g = Generator(sfp) 1125 g.flatten(msg) 1126 eq(sfp.getvalue(), """\ 1127From: test@dom.ain 1128References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1129 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1130 <foo@dom.ain>; abc 1131 1132Test""") 1133 1134 def test_splitter_split_on_punctuation_only_if_fws(self): 1135 eq = self.ndiffAssertEqual 1136 msg = Message() 1137 msg['From'] = 'test@dom.ain' 1138 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1139 'they;arenotlegal;fold,points') 1140 msg.set_payload('Test') 1141 sfp = StringIO() 1142 g = Generator(sfp) 1143 g.flatten(msg) 1144 # XXX the space after the header should not be there. 1145 eq(sfp.getvalue(), """\ 1146From: test@dom.ain 1147References:\x20 1148 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1149 1150Test""") 1151 1152 def test_no_split_long_header(self): 1153 eq = self.ndiffAssertEqual 1154 hstr = 'References: ' + 'x' * 80 1155 h = Header(hstr) 1156 # These come on two lines because Headers are really field value 1157 # classes and don't really know about their field names. 1158 eq(h.encode(), """\ 1159References: 1160 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1161 h = Header('x' * 80) 1162 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1163 1164 def test_splitting_multiple_long_lines(self): 1165 eq = self.ndiffAssertEqual 1166 hstr = """\ 1167from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1168\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1169\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1170""" 1171 h = Header(hstr, continuation_ws='\t') 1172 eq(h.encode(), """\ 1173from babylon.socal-raves.org (localhost [127.0.0.1]); 1174 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1175 for <mailman-admin@babylon.socal-raves.org>; 1176 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1177\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1178 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1179 for <mailman-admin@babylon.socal-raves.org>; 1180 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1181\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1182 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1183 for <mailman-admin@babylon.socal-raves.org>; 1184 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1185 1186 def test_splitting_first_line_only_is_long(self): 1187 eq = self.ndiffAssertEqual 1188 hstr = """\ 1189from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1190\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1191\tid 17k4h5-00034i-00 1192\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1193 h = Header(hstr, maxlinelen=78, header_name='Received', 1194 continuation_ws='\t') 1195 eq(h.encode(), """\ 1196from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1197 helo=cthulhu.gerg.ca) 1198\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1199\tid 17k4h5-00034i-00 1200\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1201 1202 def test_long_8bit_header(self): 1203 eq = self.ndiffAssertEqual 1204 msg = Message() 1205 h = Header('Britische Regierung gibt', 'iso-8859-1', 1206 header_name='Subject') 1207 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1208 eq(h.encode(maxlinelen=76), """\ 1209=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1210 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1211 msg['Subject'] = h 1212 eq(msg.as_string(maxheaderlen=76), """\ 1213Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1214 =?iso-8859-1?q?hore-Windkraftprojekte?= 1215 1216""") 1217 eq(msg.as_string(maxheaderlen=0), """\ 1218Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1219 1220""") 1221 1222 def test_long_8bit_header_no_charset(self): 1223 eq = self.ndiffAssertEqual 1224 msg = Message() 1225 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1226 'f\xfcr Offshore-Windkraftprojekte ' 1227 '<a-very-long-address@example.com>') 1228 msg['Reply-To'] = header_string 1229 eq(msg.as_string(maxheaderlen=78), """\ 1230Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1231 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1232 1233""") 1234 msg = Message() 1235 msg['Reply-To'] = Header(header_string, 1236 header_name='Reply-To') 1237 eq(msg.as_string(maxheaderlen=78), """\ 1238Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1239 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1240 1241""") 1242 1243 def test_long_to_header(self): 1244 eq = self.ndiffAssertEqual 1245 to = ('"Someone Test #A" <someone@eecs.umich.edu>,' 1246 '<someone@eecs.umich.edu>, ' 1247 '"Someone Test #B" <someone@umich.edu>, ' 1248 '"Someone Test #C" <someone@eecs.umich.edu>, ' 1249 '"Someone Test #D" <someone@eecs.umich.edu>') 1250 msg = Message() 1251 msg['To'] = to 1252 eq(msg.as_string(maxheaderlen=78), '''\ 1253To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, 1254 "Someone Test #B" <someone@umich.edu>, 1255 "Someone Test #C" <someone@eecs.umich.edu>, 1256 "Someone Test #D" <someone@eecs.umich.edu> 1257 1258''') 1259 1260 def test_long_line_after_append(self): 1261 eq = self.ndiffAssertEqual 1262 s = 'This is an example of string which has almost the limit of header length.' 1263 h = Header(s) 1264 h.append('Add another line.') 1265 eq(h.encode(maxlinelen=76), """\ 1266This is an example of string which has almost the limit of header length. 1267 Add another line.""") 1268 1269 def test_shorter_line_with_append(self): 1270 eq = self.ndiffAssertEqual 1271 s = 'This is a shorter line.' 1272 h = Header(s) 1273 h.append('Add another sentence. (Surprise?)') 1274 eq(h.encode(), 1275 'This is a shorter line. Add another sentence. (Surprise?)') 1276 1277 def test_long_field_name(self): 1278 eq = self.ndiffAssertEqual 1279 fn = 'X-Very-Very-Very-Long-Header-Name' 1280 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1281 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1282 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1283 'bef\xf6rdert. ') 1284 h = Header(gs, 'iso-8859-1', header_name=fn) 1285 # BAW: this seems broken because the first line is too long 1286 eq(h.encode(maxlinelen=76), """\ 1287=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1288 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1289 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1290 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1291 1292 def test_long_received_header(self): 1293 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1294 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1295 'Wed, 05 Mar 2003 18:10:18 -0700') 1296 msg = Message() 1297 msg['Received-1'] = Header(h, continuation_ws='\t') 1298 msg['Received-2'] = h 1299 # This should be splitting on spaces not semicolons. 1300 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1301Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1302 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1303 Wed, 05 Mar 2003 18:10:18 -0700 1304Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1305 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1306 Wed, 05 Mar 2003 18:10:18 -0700 1307 1308""") 1309 1310 def test_string_headerinst_eq(self): 1311 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' 1312 'tu-muenchen.de> (David Bremner\'s message of ' 1313 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1314 msg = Message() 1315 msg['Received-1'] = Header(h, header_name='Received-1', 1316 continuation_ws='\t') 1317 msg['Received-2'] = h 1318 # XXX The space after the ':' should not be there. 1319 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1320Received-1:\x20 1321 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1322 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1323Received-2:\x20 1324 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1325 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1326 1327""") 1328 1329 def test_long_unbreakable_lines_with_continuation(self): 1330 eq = self.ndiffAssertEqual 1331 msg = Message() 1332 t = """\ 1333iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1334 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1335 msg['Face-1'] = t 1336 msg['Face-2'] = Header(t, header_name='Face-2') 1337 msg['Face-3'] = ' ' + t 1338 # XXX This splitting is all wrong. It the first value line should be 1339 # snug against the field name or the space after the header not there. 1340 eq(msg.as_string(maxheaderlen=78), """\ 1341Face-1:\x20 1342 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1343 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1344Face-2:\x20 1345 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1346 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1347Face-3:\x20 1348 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1349 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1350 1351""") 1352 1353 def test_another_long_multiline_header(self): 1354 eq = self.ndiffAssertEqual 1355 m = ('Received: from siimage.com ' 1356 '([172.25.1.3]) by zima.siliconimage.com with ' 1357 'Microsoft SMTPSVC(5.0.2195.4905); ' 1358 'Wed, 16 Oct 2002 07:41:11 -0700') 1359 msg = email.message_from_string(m) 1360 eq(msg.as_string(maxheaderlen=78), '''\ 1361Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1362 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1363 1364''') 1365 1366 def test_long_lines_with_different_header(self): 1367 eq = self.ndiffAssertEqual 1368 h = ('List-Unsubscribe: ' 1369 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1370 ' <mailto:spamassassin-talk-request@lists.sourceforge.net' 1371 '?subject=unsubscribe>') 1372 msg = Message() 1373 msg['List'] = h 1374 msg['List'] = Header(h, header_name='List') 1375 eq(msg.as_string(maxheaderlen=78), """\ 1376List: List-Unsubscribe: 1377 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1378 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1379List: List-Unsubscribe: 1380 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1381 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1382 1383""") 1384 1385 def test_long_rfc2047_header_with_embedded_fws(self): 1386 h = Header(textwrap.dedent("""\ 1387 We're going to pretend this header is in a non-ascii character set 1388 \tto see if line wrapping with encoded words and embedded 1389 folding white space works"""), 1390 charset='utf-8', 1391 header_name='Test') 1392 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1393 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1394 =?utf-8?q?cter_set?= 1395 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1396 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1397 1398 1399 1400# Test mangling of "From " lines in the body of a message 1401class TestFromMangling(unittest.TestCase): 1402 def setUp(self): 1403 self.msg = Message() 1404 self.msg['From'] = 'aaa@bbb.org' 1405 self.msg.set_payload("""\ 1406From the desk of A.A.A.: 1407Blah blah blah 1408""") 1409 1410 def test_mangled_from(self): 1411 s = StringIO() 1412 g = Generator(s, mangle_from_=True) 1413 g.flatten(self.msg) 1414 self.assertEqual(s.getvalue(), """\ 1415From: aaa@bbb.org 1416 1417>From the desk of A.A.A.: 1418Blah blah blah 1419""") 1420 1421 def test_dont_mangle_from(self): 1422 s = StringIO() 1423 g = Generator(s, mangle_from_=False) 1424 g.flatten(self.msg) 1425 self.assertEqual(s.getvalue(), """\ 1426From: aaa@bbb.org 1427 1428From the desk of A.A.A.: 1429Blah blah blah 1430""") 1431 1432 def test_mangle_from_in_preamble_and_epilog(self): 1433 s = StringIO() 1434 g = Generator(s, mangle_from_=True) 1435 msg = email.message_from_string(textwrap.dedent("""\ 1436 From: foo@bar.com 1437 Mime-Version: 1.0 1438 Content-Type: multipart/mixed; boundary=XXX 1439 1440 From somewhere unknown 1441 1442 --XXX 1443 Content-Type: text/plain 1444 1445 foo 1446 1447 --XXX-- 1448 1449 From somewhere unknowable 1450 """)) 1451 g.flatten(msg) 1452 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1453 if x.startswith('>From ')]), 2) 1454 1455 def test_mangled_from_with_bad_bytes(self): 1456 source = textwrap.dedent("""\ 1457 Content-Type: text/plain; charset="utf-8" 1458 MIME-Version: 1.0 1459 Content-Transfer-Encoding: 8bit 1460 From: aaa@bbb.org 1461 1462 """).encode('utf-8') 1463 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1464 b = BytesIO() 1465 g = BytesGenerator(b, mangle_from_=True) 1466 g.flatten(msg) 1467 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1468 1469 def test_mutltipart_with_bad_bytes_in_cte(self): 1470 # bpo30835 1471 source = textwrap.dedent("""\ 1472 From: aperson@example.com 1473 Content-Type: multipart/mixed; boundary="1" 1474 Content-Transfer-Encoding: \xc8 1475 """).encode('utf-8') 1476 msg = email.message_from_bytes(source) 1477 1478 1479# Test the basic MIMEAudio class 1480class TestMIMEAudio(unittest.TestCase): 1481 def setUp(self): 1482 with openfile('audiotest.au', 'rb') as fp: 1483 self._audiodata = fp.read() 1484 self._au = MIMEAudio(self._audiodata) 1485 1486 def test_guess_minor_type(self): 1487 self.assertEqual(self._au.get_content_type(), 'audio/basic') 1488 1489 def test_encoding(self): 1490 payload = self._au.get_payload() 1491 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1492 self._audiodata) 1493 1494 def test_checkSetMinor(self): 1495 au = MIMEAudio(self._audiodata, 'fish') 1496 self.assertEqual(au.get_content_type(), 'audio/fish') 1497 1498 def test_add_header(self): 1499 eq = self.assertEqual 1500 self._au.add_header('Content-Disposition', 'attachment', 1501 filename='audiotest.au') 1502 eq(self._au['content-disposition'], 1503 'attachment; filename="audiotest.au"') 1504 eq(self._au.get_params(header='content-disposition'), 1505 [('attachment', ''), ('filename', 'audiotest.au')]) 1506 eq(self._au.get_param('filename', header='content-disposition'), 1507 'audiotest.au') 1508 missing = [] 1509 eq(self._au.get_param('attachment', header='content-disposition'), '') 1510 self.assertIs(self._au.get_param('foo', failobj=missing, 1511 header='content-disposition'), missing) 1512 # Try some missing stuff 1513 self.assertIs(self._au.get_param('foobar', missing), missing) 1514 self.assertIs(self._au.get_param('attachment', missing, 1515 header='foobar'), missing) 1516 1517 1518 1519# Test the basic MIMEImage class 1520class TestMIMEImage(unittest.TestCase): 1521 def setUp(self): 1522 with openfile('PyBanner048.gif', 'rb') as fp: 1523 self._imgdata = fp.read() 1524 self._im = MIMEImage(self._imgdata) 1525 1526 def test_guess_minor_type(self): 1527 self.assertEqual(self._im.get_content_type(), 'image/gif') 1528 1529 def test_encoding(self): 1530 payload = self._im.get_payload() 1531 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1532 self._imgdata) 1533 1534 def test_checkSetMinor(self): 1535 im = MIMEImage(self._imgdata, 'fish') 1536 self.assertEqual(im.get_content_type(), 'image/fish') 1537 1538 def test_add_header(self): 1539 eq = self.assertEqual 1540 self._im.add_header('Content-Disposition', 'attachment', 1541 filename='dingusfish.gif') 1542 eq(self._im['content-disposition'], 1543 'attachment; filename="dingusfish.gif"') 1544 eq(self._im.get_params(header='content-disposition'), 1545 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1546 eq(self._im.get_param('filename', header='content-disposition'), 1547 'dingusfish.gif') 1548 missing = [] 1549 eq(self._im.get_param('attachment', header='content-disposition'), '') 1550 self.assertIs(self._im.get_param('foo', failobj=missing, 1551 header='content-disposition'), missing) 1552 # Try some missing stuff 1553 self.assertIs(self._im.get_param('foobar', missing), missing) 1554 self.assertIs(self._im.get_param('attachment', missing, 1555 header='foobar'), missing) 1556 1557 1558 1559# Test the basic MIMEApplication class 1560class TestMIMEApplication(unittest.TestCase): 1561 def test_headers(self): 1562 eq = self.assertEqual 1563 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1564 eq(msg.get_content_type(), 'application/octet-stream') 1565 eq(msg['content-transfer-encoding'], 'base64') 1566 1567 def test_body(self): 1568 eq = self.assertEqual 1569 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1570 msg = MIMEApplication(bytesdata) 1571 # whitespace in the cte encoded block is RFC-irrelevant. 1572 eq(msg.get_payload().strip(), '+vv8/f7/') 1573 eq(msg.get_payload(decode=True), bytesdata) 1574 1575 def test_binary_body_with_encode_7or8bit(self): 1576 # Issue 17171. 1577 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1578 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1579 # Treated as a string, this will be invalid code points. 1580 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1581 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1582 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1583 s = BytesIO() 1584 g = BytesGenerator(s) 1585 g.flatten(msg) 1586 wireform = s.getvalue() 1587 msg2 = email.message_from_bytes(wireform) 1588 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1589 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1590 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1591 1592 def test_binary_body_with_encode_noop(self): 1593 # Issue 16564: This does not produce an RFC valid message, since to be 1594 # valid it should have a CTE of binary. But the below works in 1595 # Python2, and is documented as working this way. 1596 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1597 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1598 # Treated as a string, this will be invalid code points. 1599 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1600 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1601 s = BytesIO() 1602 g = BytesGenerator(s) 1603 g.flatten(msg) 1604 wireform = s.getvalue() 1605 msg2 = email.message_from_bytes(wireform) 1606 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1607 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1608 1609 def test_binary_body_with_unicode_linend_encode_noop(self): 1610 # Issue 19003: This is a variation on #16564. 1611 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1612 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1613 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1614 s = BytesIO() 1615 g = BytesGenerator(s) 1616 g.flatten(msg) 1617 wireform = s.getvalue() 1618 msg2 = email.message_from_bytes(wireform) 1619 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1620 1621 def test_binary_body_with_encode_quopri(self): 1622 # Issue 14360. 1623 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1624 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1625 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1626 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1627 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1628 s = BytesIO() 1629 g = BytesGenerator(s) 1630 g.flatten(msg) 1631 wireform = s.getvalue() 1632 msg2 = email.message_from_bytes(wireform) 1633 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1634 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1635 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1636 1637 def test_binary_body_with_encode_base64(self): 1638 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1639 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1640 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1641 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1642 s = BytesIO() 1643 g = BytesGenerator(s) 1644 g.flatten(msg) 1645 wireform = s.getvalue() 1646 msg2 = email.message_from_bytes(wireform) 1647 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1648 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1649 1650 1651# Test the basic MIMEText class 1652class TestMIMEText(unittest.TestCase): 1653 def setUp(self): 1654 self._msg = MIMEText('hello there') 1655 1656 def test_types(self): 1657 eq = self.assertEqual 1658 eq(self._msg.get_content_type(), 'text/plain') 1659 eq(self._msg.get_param('charset'), 'us-ascii') 1660 missing = [] 1661 self.assertIs(self._msg.get_param('foobar', missing), missing) 1662 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1663 missing) 1664 1665 def test_payload(self): 1666 self.assertEqual(self._msg.get_payload(), 'hello there') 1667 self.assertFalse(self._msg.is_multipart()) 1668 1669 def test_charset(self): 1670 eq = self.assertEqual 1671 msg = MIMEText('hello there', _charset='us-ascii') 1672 eq(msg.get_charset().input_charset, 'us-ascii') 1673 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1674 # Also accept a Charset instance 1675 charset = Charset('utf-8') 1676 charset.body_encoding = None 1677 msg = MIMEText('hello there', _charset=charset) 1678 eq(msg.get_charset().input_charset, 'utf-8') 1679 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1680 eq(msg.get_payload(), 'hello there') 1681 1682 def test_7bit_input(self): 1683 eq = self.assertEqual 1684 msg = MIMEText('hello there', _charset='us-ascii') 1685 eq(msg.get_charset().input_charset, 'us-ascii') 1686 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1687 1688 def test_7bit_input_no_charset(self): 1689 eq = self.assertEqual 1690 msg = MIMEText('hello there') 1691 eq(msg.get_charset(), 'us-ascii') 1692 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1693 self.assertIn('hello there', msg.as_string()) 1694 1695 def test_utf8_input(self): 1696 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1697 eq = self.assertEqual 1698 msg = MIMEText(teststr, _charset='utf-8') 1699 eq(msg.get_charset().output_charset, 'utf-8') 1700 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1701 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1702 1703 @unittest.skip("can't fix because of backward compat in email5, " 1704 "will fix in email6") 1705 def test_utf8_input_no_charset(self): 1706 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1707 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1708 1709 1710 1711# Test complicated multipart/* messages 1712class TestMultipart(TestEmailBase): 1713 def setUp(self): 1714 with openfile('PyBanner048.gif', 'rb') as fp: 1715 data = fp.read() 1716 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1717 image = MIMEImage(data, name='dingusfish.gif') 1718 image.add_header('content-disposition', 'attachment', 1719 filename='dingusfish.gif') 1720 intro = MIMEText('''\ 1721Hi there, 1722 1723This is the dingus fish. 1724''') 1725 container.attach(intro) 1726 container.attach(image) 1727 container['From'] = 'Barry <barry@digicool.com>' 1728 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' 1729 container['Subject'] = 'Here is your dingus fish' 1730 1731 now = 987809702.54848599 1732 timetuple = time.localtime(now) 1733 if timetuple[-1] == 0: 1734 tzsecs = time.timezone 1735 else: 1736 tzsecs = time.altzone 1737 if tzsecs > 0: 1738 sign = '-' 1739 else: 1740 sign = '+' 1741 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1742 container['Date'] = time.strftime( 1743 '%a, %d %b %Y %H:%M:%S', 1744 time.localtime(now)) + tzoffset 1745 self._msg = container 1746 self._im = image 1747 self._txt = intro 1748 1749 def test_hierarchy(self): 1750 # convenience 1751 eq = self.assertEqual 1752 raises = self.assertRaises 1753 # tests 1754 m = self._msg 1755 self.assertTrue(m.is_multipart()) 1756 eq(m.get_content_type(), 'multipart/mixed') 1757 eq(len(m.get_payload()), 2) 1758 raises(IndexError, m.get_payload, 2) 1759 m0 = m.get_payload(0) 1760 m1 = m.get_payload(1) 1761 self.assertIs(m0, self._txt) 1762 self.assertIs(m1, self._im) 1763 eq(m.get_payload(), [m0, m1]) 1764 self.assertFalse(m0.is_multipart()) 1765 self.assertFalse(m1.is_multipart()) 1766 1767 def test_empty_multipart_idempotent(self): 1768 text = """\ 1769Content-Type: multipart/mixed; boundary="BOUNDARY" 1770MIME-Version: 1.0 1771Subject: A subject 1772To: aperson@dom.ain 1773From: bperson@dom.ain 1774 1775 1776--BOUNDARY 1777 1778 1779--BOUNDARY-- 1780""" 1781 msg = Parser().parsestr(text) 1782 self.ndiffAssertEqual(text, msg.as_string()) 1783 1784 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1785 outer = MIMEBase('multipart', 'mixed') 1786 outer['Subject'] = 'A subject' 1787 outer['To'] = 'aperson@dom.ain' 1788 outer['From'] = 'bperson@dom.ain' 1789 outer.set_boundary('BOUNDARY') 1790 self.ndiffAssertEqual(outer.as_string(), '''\ 1791Content-Type: multipart/mixed; boundary="BOUNDARY" 1792MIME-Version: 1.0 1793Subject: A subject 1794To: aperson@dom.ain 1795From: bperson@dom.ain 1796 1797--BOUNDARY 1798 1799--BOUNDARY-- 1800''') 1801 1802 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1803 outer = MIMEBase('multipart', 'mixed') 1804 outer['Subject'] = 'A subject' 1805 outer['To'] = 'aperson@dom.ain' 1806 outer['From'] = 'bperson@dom.ain' 1807 outer.preamble = '' 1808 outer.epilogue = '' 1809 outer.set_boundary('BOUNDARY') 1810 self.ndiffAssertEqual(outer.as_string(), '''\ 1811Content-Type: multipart/mixed; boundary="BOUNDARY" 1812MIME-Version: 1.0 1813Subject: A subject 1814To: aperson@dom.ain 1815From: bperson@dom.ain 1816 1817 1818--BOUNDARY 1819 1820--BOUNDARY-- 1821''') 1822 1823 def test_one_part_in_a_multipart(self): 1824 eq = self.ndiffAssertEqual 1825 outer = MIMEBase('multipart', 'mixed') 1826 outer['Subject'] = 'A subject' 1827 outer['To'] = 'aperson@dom.ain' 1828 outer['From'] = 'bperson@dom.ain' 1829 outer.set_boundary('BOUNDARY') 1830 msg = MIMEText('hello world') 1831 outer.attach(msg) 1832 eq(outer.as_string(), '''\ 1833Content-Type: multipart/mixed; boundary="BOUNDARY" 1834MIME-Version: 1.0 1835Subject: A subject 1836To: aperson@dom.ain 1837From: bperson@dom.ain 1838 1839--BOUNDARY 1840Content-Type: text/plain; charset="us-ascii" 1841MIME-Version: 1.0 1842Content-Transfer-Encoding: 7bit 1843 1844hello world 1845--BOUNDARY-- 1846''') 1847 1848 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1849 eq = self.ndiffAssertEqual 1850 outer = MIMEBase('multipart', 'mixed') 1851 outer['Subject'] = 'A subject' 1852 outer['To'] = 'aperson@dom.ain' 1853 outer['From'] = 'bperson@dom.ain' 1854 outer.preamble = '' 1855 msg = MIMEText('hello world') 1856 outer.attach(msg) 1857 outer.set_boundary('BOUNDARY') 1858 eq(outer.as_string(), '''\ 1859Content-Type: multipart/mixed; boundary="BOUNDARY" 1860MIME-Version: 1.0 1861Subject: A subject 1862To: aperson@dom.ain 1863From: bperson@dom.ain 1864 1865 1866--BOUNDARY 1867Content-Type: text/plain; charset="us-ascii" 1868MIME-Version: 1.0 1869Content-Transfer-Encoding: 7bit 1870 1871hello world 1872--BOUNDARY-- 1873''') 1874 1875 1876 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1877 eq = self.ndiffAssertEqual 1878 outer = MIMEBase('multipart', 'mixed') 1879 outer['Subject'] = 'A subject' 1880 outer['To'] = 'aperson@dom.ain' 1881 outer['From'] = 'bperson@dom.ain' 1882 outer.preamble = None 1883 msg = MIMEText('hello world') 1884 outer.attach(msg) 1885 outer.set_boundary('BOUNDARY') 1886 eq(outer.as_string(), '''\ 1887Content-Type: multipart/mixed; boundary="BOUNDARY" 1888MIME-Version: 1.0 1889Subject: A subject 1890To: aperson@dom.ain 1891From: bperson@dom.ain 1892 1893--BOUNDARY 1894Content-Type: text/plain; charset="us-ascii" 1895MIME-Version: 1.0 1896Content-Transfer-Encoding: 7bit 1897 1898hello world 1899--BOUNDARY-- 1900''') 1901 1902 1903 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1904 eq = self.ndiffAssertEqual 1905 outer = MIMEBase('multipart', 'mixed') 1906 outer['Subject'] = 'A subject' 1907 outer['To'] = 'aperson@dom.ain' 1908 outer['From'] = 'bperson@dom.ain' 1909 outer.epilogue = None 1910 msg = MIMEText('hello world') 1911 outer.attach(msg) 1912 outer.set_boundary('BOUNDARY') 1913 eq(outer.as_string(), '''\ 1914Content-Type: multipart/mixed; boundary="BOUNDARY" 1915MIME-Version: 1.0 1916Subject: A subject 1917To: aperson@dom.ain 1918From: bperson@dom.ain 1919 1920--BOUNDARY 1921Content-Type: text/plain; charset="us-ascii" 1922MIME-Version: 1.0 1923Content-Transfer-Encoding: 7bit 1924 1925hello world 1926--BOUNDARY-- 1927''') 1928 1929 1930 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 1931 eq = self.ndiffAssertEqual 1932 outer = MIMEBase('multipart', 'mixed') 1933 outer['Subject'] = 'A subject' 1934 outer['To'] = 'aperson@dom.ain' 1935 outer['From'] = 'bperson@dom.ain' 1936 outer.epilogue = '' 1937 msg = MIMEText('hello world') 1938 outer.attach(msg) 1939 outer.set_boundary('BOUNDARY') 1940 eq(outer.as_string(), '''\ 1941Content-Type: multipart/mixed; boundary="BOUNDARY" 1942MIME-Version: 1.0 1943Subject: A subject 1944To: aperson@dom.ain 1945From: bperson@dom.ain 1946 1947--BOUNDARY 1948Content-Type: text/plain; charset="us-ascii" 1949MIME-Version: 1.0 1950Content-Transfer-Encoding: 7bit 1951 1952hello world 1953--BOUNDARY-- 1954''') 1955 1956 1957 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 1958 eq = self.ndiffAssertEqual 1959 outer = MIMEBase('multipart', 'mixed') 1960 outer['Subject'] = 'A subject' 1961 outer['To'] = 'aperson@dom.ain' 1962 outer['From'] = 'bperson@dom.ain' 1963 outer.epilogue = '\n' 1964 msg = MIMEText('hello world') 1965 outer.attach(msg) 1966 outer.set_boundary('BOUNDARY') 1967 eq(outer.as_string(), '''\ 1968Content-Type: multipart/mixed; boundary="BOUNDARY" 1969MIME-Version: 1.0 1970Subject: A subject 1971To: aperson@dom.ain 1972From: bperson@dom.ain 1973 1974--BOUNDARY 1975Content-Type: text/plain; charset="us-ascii" 1976MIME-Version: 1.0 1977Content-Transfer-Encoding: 7bit 1978 1979hello world 1980--BOUNDARY-- 1981 1982''') 1983 1984 def test_message_external_body(self): 1985 eq = self.assertEqual 1986 msg = self._msgobj('msg_36.txt') 1987 eq(len(msg.get_payload()), 2) 1988 msg1 = msg.get_payload(1) 1989 eq(msg1.get_content_type(), 'multipart/alternative') 1990 eq(len(msg1.get_payload()), 2) 1991 for subpart in msg1.get_payload(): 1992 eq(subpart.get_content_type(), 'message/external-body') 1993 eq(len(subpart.get_payload()), 1) 1994 subsubpart = subpart.get_payload(0) 1995 eq(subsubpart.get_content_type(), 'text/plain') 1996 1997 def test_double_boundary(self): 1998 # msg_37.txt is a multipart that contains two dash-boundary's in a 1999 # row. Our interpretation of RFC 2046 calls for ignoring the second 2000 # and subsequent boundaries. 2001 msg = self._msgobj('msg_37.txt') 2002 self.assertEqual(len(msg.get_payload()), 3) 2003 2004 def test_nested_inner_contains_outer_boundary(self): 2005 eq = self.ndiffAssertEqual 2006 # msg_38.txt has an inner part that contains outer boundaries. My 2007 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2008 # these are illegal and should be interpreted as unterminated inner 2009 # parts. 2010 msg = self._msgobj('msg_38.txt') 2011 sfp = StringIO() 2012 iterators._structure(msg, sfp) 2013 eq(sfp.getvalue(), """\ 2014multipart/mixed 2015 multipart/mixed 2016 multipart/alternative 2017 text/plain 2018 text/plain 2019 text/plain 2020 text/plain 2021""") 2022 2023 def test_nested_with_same_boundary(self): 2024 eq = self.ndiffAssertEqual 2025 # msg 39.txt is similarly evil in that it's got inner parts that use 2026 # the same boundary as outer parts. Again, I believe the way this is 2027 # parsed is closest to the spirit of RFC 2046 2028 msg = self._msgobj('msg_39.txt') 2029 sfp = StringIO() 2030 iterators._structure(msg, sfp) 2031 eq(sfp.getvalue(), """\ 2032multipart/mixed 2033 multipart/mixed 2034 multipart/alternative 2035 application/octet-stream 2036 application/octet-stream 2037 text/plain 2038""") 2039 2040 def test_boundary_in_non_multipart(self): 2041 msg = self._msgobj('msg_40.txt') 2042 self.assertEqual(msg.as_string(), '''\ 2043MIME-Version: 1.0 2044Content-Type: text/html; boundary="--961284236552522269" 2045 2046----961284236552522269 2047Content-Type: text/html; 2048Content-Transfer-Encoding: 7Bit 2049 2050<html></html> 2051 2052----961284236552522269-- 2053''') 2054 2055 def test_boundary_with_leading_space(self): 2056 eq = self.assertEqual 2057 msg = email.message_from_string('''\ 2058MIME-Version: 1.0 2059Content-Type: multipart/mixed; boundary=" XXXX" 2060 2061-- XXXX 2062Content-Type: text/plain 2063 2064 2065-- XXXX 2066Content-Type: text/plain 2067 2068-- XXXX-- 2069''') 2070 self.assertTrue(msg.is_multipart()) 2071 eq(msg.get_boundary(), ' XXXX') 2072 eq(len(msg.get_payload()), 2) 2073 2074 def test_boundary_without_trailing_newline(self): 2075 m = Parser().parsestr("""\ 2076Content-Type: multipart/mixed; boundary="===============0012394164==" 2077MIME-Version: 1.0 2078 2079--===============0012394164== 2080Content-Type: image/file1.jpg 2081MIME-Version: 1.0 2082Content-Transfer-Encoding: base64 2083 2084YXNkZg== 2085--===============0012394164==--""") 2086 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2087 2088 def test_mimebase_default_policy(self): 2089 m = MIMEBase('multipart', 'mixed') 2090 self.assertIs(m.policy, email.policy.compat32) 2091 2092 def test_mimebase_custom_policy(self): 2093 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2094 self.assertIs(m.policy, email.policy.default) 2095 2096# Test some badly formatted messages 2097class TestNonConformant(TestEmailBase): 2098 2099 def test_parse_missing_minor_type(self): 2100 eq = self.assertEqual 2101 msg = self._msgobj('msg_14.txt') 2102 eq(msg.get_content_type(), 'text/plain') 2103 eq(msg.get_content_maintype(), 'text') 2104 eq(msg.get_content_subtype(), 'plain') 2105 2106 # test_defect_handling 2107 def test_same_boundary_inner_outer(self): 2108 msg = self._msgobj('msg_15.txt') 2109 # XXX We can probably eventually do better 2110 inner = msg.get_payload(0) 2111 self.assertTrue(hasattr(inner, 'defects')) 2112 self.assertEqual(len(inner.defects), 1) 2113 self.assertIsInstance(inner.defects[0], 2114 errors.StartBoundaryNotFoundDefect) 2115 2116 # test_defect_handling 2117 def test_multipart_no_boundary(self): 2118 msg = self._msgobj('msg_25.txt') 2119 self.assertIsInstance(msg.get_payload(), str) 2120 self.assertEqual(len(msg.defects), 2) 2121 self.assertIsInstance(msg.defects[0], 2122 errors.NoBoundaryInMultipartDefect) 2123 self.assertIsInstance(msg.defects[1], 2124 errors.MultipartInvariantViolationDefect) 2125 2126 multipart_msg = textwrap.dedent("""\ 2127 Date: Wed, 14 Nov 2007 12:56:23 GMT 2128 From: foo@bar.invalid 2129 To: foo@bar.invalid 2130 Subject: Content-Transfer-Encoding: base64 and multipart 2131 MIME-Version: 1.0 2132 Content-Type: multipart/mixed; 2133 boundary="===============3344438784458119861=="{} 2134 2135 --===============3344438784458119861== 2136 Content-Type: text/plain 2137 2138 Test message 2139 2140 --===============3344438784458119861== 2141 Content-Type: application/octet-stream 2142 Content-Transfer-Encoding: base64 2143 2144 YWJj 2145 2146 --===============3344438784458119861==-- 2147 """) 2148 2149 # test_defect_handling 2150 def test_multipart_invalid_cte(self): 2151 msg = self._str_msg( 2152 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2153 self.assertEqual(len(msg.defects), 1) 2154 self.assertIsInstance(msg.defects[0], 2155 errors.InvalidMultipartContentTransferEncodingDefect) 2156 2157 # test_defect_handling 2158 def test_multipart_no_cte_no_defect(self): 2159 msg = self._str_msg(self.multipart_msg.format('')) 2160 self.assertEqual(len(msg.defects), 0) 2161 2162 # test_defect_handling 2163 def test_multipart_valid_cte_no_defect(self): 2164 for cte in ('7bit', '8bit', 'BINary'): 2165 msg = self._str_msg( 2166 self.multipart_msg.format( 2167 "\nContent-Transfer-Encoding: {}".format(cte))) 2168 self.assertEqual(len(msg.defects), 0) 2169 2170 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2171 def test_invalid_content_type(self): 2172 eq = self.assertEqual 2173 neq = self.ndiffAssertEqual 2174 msg = Message() 2175 # RFC 2045, $5.2 says invalid yields text/plain 2176 msg['Content-Type'] = 'text' 2177 eq(msg.get_content_maintype(), 'text') 2178 eq(msg.get_content_subtype(), 'plain') 2179 eq(msg.get_content_type(), 'text/plain') 2180 # Clear the old value and try something /really/ invalid 2181 del msg['content-type'] 2182 msg['Content-Type'] = 'foo' 2183 eq(msg.get_content_maintype(), 'text') 2184 eq(msg.get_content_subtype(), 'plain') 2185 eq(msg.get_content_type(), 'text/plain') 2186 # Still, make sure that the message is idempotently generated 2187 s = StringIO() 2188 g = Generator(s) 2189 g.flatten(msg) 2190 neq(s.getvalue(), 'Content-Type: foo\n\n') 2191 2192 def test_no_start_boundary(self): 2193 eq = self.ndiffAssertEqual 2194 msg = self._msgobj('msg_31.txt') 2195 eq(msg.get_payload(), """\ 2196--BOUNDARY 2197Content-Type: text/plain 2198 2199message 1 2200 2201--BOUNDARY 2202Content-Type: text/plain 2203 2204message 2 2205 2206--BOUNDARY-- 2207""") 2208 2209 def test_no_separating_blank_line(self): 2210 eq = self.ndiffAssertEqual 2211 msg = self._msgobj('msg_35.txt') 2212 eq(msg.as_string(), """\ 2213From: aperson@dom.ain 2214To: bperson@dom.ain 2215Subject: here's something interesting 2216 2217counter to RFC 2822, there's no separating newline here 2218""") 2219 2220 # test_defect_handling 2221 def test_lying_multipart(self): 2222 msg = self._msgobj('msg_41.txt') 2223 self.assertTrue(hasattr(msg, 'defects')) 2224 self.assertEqual(len(msg.defects), 2) 2225 self.assertIsInstance(msg.defects[0], 2226 errors.NoBoundaryInMultipartDefect) 2227 self.assertIsInstance(msg.defects[1], 2228 errors.MultipartInvariantViolationDefect) 2229 2230 # test_defect_handling 2231 def test_missing_start_boundary(self): 2232 outer = self._msgobj('msg_42.txt') 2233 # The message structure is: 2234 # 2235 # multipart/mixed 2236 # text/plain 2237 # message/rfc822 2238 # multipart/mixed [*] 2239 # 2240 # [*] This message is missing its start boundary 2241 bad = outer.get_payload(1).get_payload(0) 2242 self.assertEqual(len(bad.defects), 1) 2243 self.assertIsInstance(bad.defects[0], 2244 errors.StartBoundaryNotFoundDefect) 2245 2246 # test_defect_handling 2247 def test_first_line_is_continuation_header(self): 2248 eq = self.assertEqual 2249 m = ' Line 1\nSubject: test\n\nbody' 2250 msg = email.message_from_string(m) 2251 eq(msg.keys(), ['Subject']) 2252 eq(msg.get_payload(), 'body') 2253 eq(len(msg.defects), 1) 2254 self.assertDefectsEqual(msg.defects, 2255 [errors.FirstHeaderLineIsContinuationDefect]) 2256 eq(msg.defects[0].line, ' Line 1\n') 2257 2258 # test_defect_handling 2259 def test_missing_header_body_separator(self): 2260 # Our heuristic if we see a line that doesn't look like a header (no 2261 # leading whitespace but no ':') is to assume that the blank line that 2262 # separates the header from the body is missing, and to stop parsing 2263 # headers and start parsing the body. 2264 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2265 self.assertEqual(msg.keys(), ['Subject']) 2266 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2267 self.assertDefectsEqual(msg.defects, 2268 [errors.MissingHeaderBodySeparatorDefect]) 2269 2270 2271# Test RFC 2047 header encoding and decoding 2272class TestRFC2047(TestEmailBase): 2273 def test_rfc2047_multiline(self): 2274 eq = self.assertEqual 2275 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2276 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2277 dh = decode_header(s) 2278 eq(dh, [ 2279 (b'Re: ', None), 2280 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2281 (b' baz foo bar ', None), 2282 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2283 header = make_header(dh) 2284 eq(str(header), 2285 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2286 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2287Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2288 =?mac-iceland?q?=9Arg=8Cs?=""") 2289 2290 def test_whitespace_keeper_unicode(self): 2291 eq = self.assertEqual 2292 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' 2293 dh = decode_header(s) 2294 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2295 (b' Pirard <pirard@dom.ain>', None)]) 2296 header = str(make_header(dh)) 2297 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 2298 2299 def test_whitespace_keeper_unicode_2(self): 2300 eq = self.assertEqual 2301 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2302 dh = decode_header(s) 2303 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2304 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2305 hu = str(make_header(dh)) 2306 eq(hu, 'The quick brown fox jumped over the lazy dog') 2307 2308 def test_rfc2047_missing_whitespace(self): 2309 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2310 dh = decode_header(s) 2311 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2312 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2313 (b'sbord', None)]) 2314 2315 def test_rfc2047_with_whitespace(self): 2316 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2317 dh = decode_header(s) 2318 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2319 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2320 (b' sbord', None)]) 2321 2322 def test_rfc2047_B_bad_padding(self): 2323 s = '=?iso-8859-1?B?%s?=' 2324 data = [ # only test complete bytes 2325 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2326 ('dmk=', b'vi'), ('dmk', b'vi') 2327 ] 2328 for q, a in data: 2329 dh = decode_header(s % q) 2330 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2331 2332 def test_rfc2047_Q_invalid_digits(self): 2333 # issue 10004. 2334 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2335 self.assertEqual(decode_header(s), 2336 [(b'andr\xe9=zz', 'iso-8859-1')]) 2337 2338 def test_rfc2047_rfc2047_1(self): 2339 # 1st testcase at end of rfc2047 2340 s = '(=?ISO-8859-1?Q?a?=)' 2341 self.assertEqual(decode_header(s), 2342 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2343 2344 def test_rfc2047_rfc2047_2(self): 2345 # 2nd testcase at end of rfc2047 2346 s = '(=?ISO-8859-1?Q?a?= b)' 2347 self.assertEqual(decode_header(s), 2348 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2349 2350 def test_rfc2047_rfc2047_3(self): 2351 # 3rd testcase at end of rfc2047 2352 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2353 self.assertEqual(decode_header(s), 2354 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2355 2356 def test_rfc2047_rfc2047_4(self): 2357 # 4th testcase at end of rfc2047 2358 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2359 self.assertEqual(decode_header(s), 2360 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2361 2362 def test_rfc2047_rfc2047_5a(self): 2363 # 5th testcase at end of rfc2047 newline is \r\n 2364 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2365 self.assertEqual(decode_header(s), 2366 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2367 2368 def test_rfc2047_rfc2047_5b(self): 2369 # 5th testcase at end of rfc2047 newline is \n 2370 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2371 self.assertEqual(decode_header(s), 2372 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2373 2374 def test_rfc2047_rfc2047_6(self): 2375 # 6th testcase at end of rfc2047 2376 s = '(=?ISO-8859-1?Q?a_b?=)' 2377 self.assertEqual(decode_header(s), 2378 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2379 2380 def test_rfc2047_rfc2047_7(self): 2381 # 7th testcase at end of rfc2047 2382 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2383 self.assertEqual(decode_header(s), 2384 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2385 (b')', None)]) 2386 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2387 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2388 2389 def test_multiline_header(self): 2390 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>' 2391 self.assertEqual(decode_header(s), 2392 [(b'"M\xfcller T"', 'windows-1252'), 2393 (b'<T.Mueller@xxx.com>', None)]) 2394 self.assertEqual(make_header(decode_header(s)).encode(), 2395 ''.join(s.splitlines())) 2396 self.assertEqual(str(make_header(decode_header(s))), 2397 '"Müller T" <T.Mueller@xxx.com>') 2398 2399 2400# Test the MIMEMessage class 2401class TestMIMEMessage(TestEmailBase): 2402 def setUp(self): 2403 with openfile('msg_11.txt') as fp: 2404 self._text = fp.read() 2405 2406 def test_type_error(self): 2407 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2408 2409 def test_valid_argument(self): 2410 eq = self.assertEqual 2411 subject = 'A sub-message' 2412 m = Message() 2413 m['Subject'] = subject 2414 r = MIMEMessage(m) 2415 eq(r.get_content_type(), 'message/rfc822') 2416 payload = r.get_payload() 2417 self.assertIsInstance(payload, list) 2418 eq(len(payload), 1) 2419 subpart = payload[0] 2420 self.assertIs(subpart, m) 2421 eq(subpart['subject'], subject) 2422 2423 def test_bad_multipart(self): 2424 msg1 = Message() 2425 msg1['Subject'] = 'subpart 1' 2426 msg2 = Message() 2427 msg2['Subject'] = 'subpart 2' 2428 r = MIMEMessage(msg1) 2429 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2430 2431 def test_generate(self): 2432 # First craft the message to be encapsulated 2433 m = Message() 2434 m['Subject'] = 'An enclosed message' 2435 m.set_payload('Here is the body of the message.\n') 2436 r = MIMEMessage(m) 2437 r['Subject'] = 'The enclosing message' 2438 s = StringIO() 2439 g = Generator(s) 2440 g.flatten(r) 2441 self.assertEqual(s.getvalue(), """\ 2442Content-Type: message/rfc822 2443MIME-Version: 1.0 2444Subject: The enclosing message 2445 2446Subject: An enclosed message 2447 2448Here is the body of the message. 2449""") 2450 2451 def test_parse_message_rfc822(self): 2452 eq = self.assertEqual 2453 msg = self._msgobj('msg_11.txt') 2454 eq(msg.get_content_type(), 'message/rfc822') 2455 payload = msg.get_payload() 2456 self.assertIsInstance(payload, list) 2457 eq(len(payload), 1) 2458 submsg = payload[0] 2459 self.assertIsInstance(submsg, Message) 2460 eq(submsg['subject'], 'An enclosed message') 2461 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2462 2463 def test_dsn(self): 2464 eq = self.assertEqual 2465 # msg 16 is a Delivery Status Notification, see RFC 1894 2466 msg = self._msgobj('msg_16.txt') 2467 eq(msg.get_content_type(), 'multipart/report') 2468 self.assertTrue(msg.is_multipart()) 2469 eq(len(msg.get_payload()), 3) 2470 # Subpart 1 is a text/plain, human readable section 2471 subpart = msg.get_payload(0) 2472 eq(subpart.get_content_type(), 'text/plain') 2473 eq(subpart.get_payload(), """\ 2474This report relates to a message you sent with the following header fields: 2475 2476 Message-id: <002001c144a6$8752e060$56104586@oxy.edu> 2477 Date: Sun, 23 Sep 2001 20:10:55 -0700 2478 From: "Ian T. Henry" <henryi@oxy.edu> 2479 To: SoCal Raves <scr@socal-raves.org> 2480 Subject: [scr] yeah for Ians!! 2481 2482Your message cannot be delivered to the following recipients: 2483 2484 Recipient address: jangel1@cougar.noc.ucla.edu 2485 Reason: recipient reached disk quota 2486 2487""") 2488 # Subpart 2 contains the machine parsable DSN information. It 2489 # consists of two blocks of headers, represented by two nested Message 2490 # objects. 2491 subpart = msg.get_payload(1) 2492 eq(subpart.get_content_type(), 'message/delivery-status') 2493 eq(len(subpart.get_payload()), 2) 2494 # message/delivery-status should treat each block as a bunch of 2495 # headers, i.e. a bunch of Message objects. 2496 dsn1 = subpart.get_payload(0) 2497 self.assertIsInstance(dsn1, Message) 2498 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') 2499 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2500 # Try a missing one <wink> 2501 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2502 dsn2 = subpart.get_payload(1) 2503 self.assertIsInstance(dsn2, Message) 2504 eq(dsn2['action'], 'failed') 2505 eq(dsn2.get_params(header='original-recipient'), 2506 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) 2507 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2508 # Subpart 3 is the original message 2509 subpart = msg.get_payload(2) 2510 eq(subpart.get_content_type(), 'message/rfc822') 2511 payload = subpart.get_payload() 2512 self.assertIsInstance(payload, list) 2513 eq(len(payload), 1) 2514 subsubpart = payload[0] 2515 self.assertIsInstance(subsubpart, Message) 2516 eq(subsubpart.get_content_type(), 'text/plain') 2517 eq(subsubpart['message-id'], 2518 '<002001c144a6$8752e060$56104586@oxy.edu>') 2519 2520 def test_epilogue(self): 2521 eq = self.ndiffAssertEqual 2522 with openfile('msg_21.txt') as fp: 2523 text = fp.read() 2524 msg = Message() 2525 msg['From'] = 'aperson@dom.ain' 2526 msg['To'] = 'bperson@dom.ain' 2527 msg['Subject'] = 'Test' 2528 msg.preamble = 'MIME message' 2529 msg.epilogue = 'End of MIME message\n' 2530 msg1 = MIMEText('One') 2531 msg2 = MIMEText('Two') 2532 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2533 msg.attach(msg1) 2534 msg.attach(msg2) 2535 sfp = StringIO() 2536 g = Generator(sfp) 2537 g.flatten(msg) 2538 eq(sfp.getvalue(), text) 2539 2540 def test_no_nl_preamble(self): 2541 eq = self.ndiffAssertEqual 2542 msg = Message() 2543 msg['From'] = 'aperson@dom.ain' 2544 msg['To'] = 'bperson@dom.ain' 2545 msg['Subject'] = 'Test' 2546 msg.preamble = 'MIME message' 2547 msg.epilogue = '' 2548 msg1 = MIMEText('One') 2549 msg2 = MIMEText('Two') 2550 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2551 msg.attach(msg1) 2552 msg.attach(msg2) 2553 eq(msg.as_string(), """\ 2554From: aperson@dom.ain 2555To: bperson@dom.ain 2556Subject: Test 2557Content-Type: multipart/mixed; boundary="BOUNDARY" 2558 2559MIME message 2560--BOUNDARY 2561Content-Type: text/plain; charset="us-ascii" 2562MIME-Version: 1.0 2563Content-Transfer-Encoding: 7bit 2564 2565One 2566--BOUNDARY 2567Content-Type: text/plain; charset="us-ascii" 2568MIME-Version: 1.0 2569Content-Transfer-Encoding: 7bit 2570 2571Two 2572--BOUNDARY-- 2573""") 2574 2575 def test_default_type(self): 2576 eq = self.assertEqual 2577 with openfile('msg_30.txt') as fp: 2578 msg = email.message_from_file(fp) 2579 container1 = msg.get_payload(0) 2580 eq(container1.get_default_type(), 'message/rfc822') 2581 eq(container1.get_content_type(), 'message/rfc822') 2582 container2 = msg.get_payload(1) 2583 eq(container2.get_default_type(), 'message/rfc822') 2584 eq(container2.get_content_type(), 'message/rfc822') 2585 container1a = container1.get_payload(0) 2586 eq(container1a.get_default_type(), 'text/plain') 2587 eq(container1a.get_content_type(), 'text/plain') 2588 container2a = container2.get_payload(0) 2589 eq(container2a.get_default_type(), 'text/plain') 2590 eq(container2a.get_content_type(), 'text/plain') 2591 2592 def test_default_type_with_explicit_container_type(self): 2593 eq = self.assertEqual 2594 with openfile('msg_28.txt') as fp: 2595 msg = email.message_from_file(fp) 2596 container1 = msg.get_payload(0) 2597 eq(container1.get_default_type(), 'message/rfc822') 2598 eq(container1.get_content_type(), 'message/rfc822') 2599 container2 = msg.get_payload(1) 2600 eq(container2.get_default_type(), 'message/rfc822') 2601 eq(container2.get_content_type(), 'message/rfc822') 2602 container1a = container1.get_payload(0) 2603 eq(container1a.get_default_type(), 'text/plain') 2604 eq(container1a.get_content_type(), 'text/plain') 2605 container2a = container2.get_payload(0) 2606 eq(container2a.get_default_type(), 'text/plain') 2607 eq(container2a.get_content_type(), 'text/plain') 2608 2609 def test_default_type_non_parsed(self): 2610 eq = self.assertEqual 2611 neq = self.ndiffAssertEqual 2612 # Set up container 2613 container = MIMEMultipart('digest', 'BOUNDARY') 2614 container.epilogue = '' 2615 # Set up subparts 2616 subpart1a = MIMEText('message 1\n') 2617 subpart2a = MIMEText('message 2\n') 2618 subpart1 = MIMEMessage(subpart1a) 2619 subpart2 = MIMEMessage(subpart2a) 2620 container.attach(subpart1) 2621 container.attach(subpart2) 2622 eq(subpart1.get_content_type(), 'message/rfc822') 2623 eq(subpart1.get_default_type(), 'message/rfc822') 2624 eq(subpart2.get_content_type(), 'message/rfc822') 2625 eq(subpart2.get_default_type(), 'message/rfc822') 2626 neq(container.as_string(0), '''\ 2627Content-Type: multipart/digest; boundary="BOUNDARY" 2628MIME-Version: 1.0 2629 2630--BOUNDARY 2631Content-Type: message/rfc822 2632MIME-Version: 1.0 2633 2634Content-Type: text/plain; charset="us-ascii" 2635MIME-Version: 1.0 2636Content-Transfer-Encoding: 7bit 2637 2638message 1 2639 2640--BOUNDARY 2641Content-Type: message/rfc822 2642MIME-Version: 1.0 2643 2644Content-Type: text/plain; charset="us-ascii" 2645MIME-Version: 1.0 2646Content-Transfer-Encoding: 7bit 2647 2648message 2 2649 2650--BOUNDARY-- 2651''') 2652 del subpart1['content-type'] 2653 del subpart1['mime-version'] 2654 del subpart2['content-type'] 2655 del subpart2['mime-version'] 2656 eq(subpart1.get_content_type(), 'message/rfc822') 2657 eq(subpart1.get_default_type(), 'message/rfc822') 2658 eq(subpart2.get_content_type(), 'message/rfc822') 2659 eq(subpart2.get_default_type(), 'message/rfc822') 2660 neq(container.as_string(0), '''\ 2661Content-Type: multipart/digest; boundary="BOUNDARY" 2662MIME-Version: 1.0 2663 2664--BOUNDARY 2665 2666Content-Type: text/plain; charset="us-ascii" 2667MIME-Version: 1.0 2668Content-Transfer-Encoding: 7bit 2669 2670message 1 2671 2672--BOUNDARY 2673 2674Content-Type: text/plain; charset="us-ascii" 2675MIME-Version: 1.0 2676Content-Transfer-Encoding: 7bit 2677 2678message 2 2679 2680--BOUNDARY-- 2681''') 2682 2683 def test_mime_attachments_in_constructor(self): 2684 eq = self.assertEqual 2685 text1 = MIMEText('') 2686 text2 = MIMEText('') 2687 msg = MIMEMultipart(_subparts=(text1, text2)) 2688 eq(len(msg.get_payload()), 2) 2689 eq(msg.get_payload(0), text1) 2690 eq(msg.get_payload(1), text2) 2691 2692 def test_default_multipart_constructor(self): 2693 msg = MIMEMultipart() 2694 self.assertTrue(msg.is_multipart()) 2695 2696 def test_multipart_default_policy(self): 2697 msg = MIMEMultipart() 2698 msg['To'] = 'a@b.com' 2699 msg['To'] = 'c@d.com' 2700 self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com']) 2701 2702 def test_multipart_custom_policy(self): 2703 msg = MIMEMultipart(policy=email.policy.default) 2704 msg['To'] = 'a@b.com' 2705 with self.assertRaises(ValueError) as cm: 2706 msg['To'] = 'c@d.com' 2707 self.assertEqual(str(cm.exception), 2708 'There may be at most 1 To headers in a message') 2709 2710# A general test of parser->model->generator idempotency. IOW, read a message 2711# in, parse it into a message object tree, then without touching the tree, 2712# regenerate the plain text. The original text and the transformed text 2713# should be identical. Note: that we ignore the Unix-From since that may 2714# contain a changed date. 2715class TestIdempotent(TestEmailBase): 2716 2717 linesep = '\n' 2718 2719 def _msgobj(self, filename): 2720 with openfile(filename) as fp: 2721 data = fp.read() 2722 msg = email.message_from_string(data) 2723 return msg, data 2724 2725 def _idempotent(self, msg, text, unixfrom=False): 2726 eq = self.ndiffAssertEqual 2727 s = StringIO() 2728 g = Generator(s, maxheaderlen=0) 2729 g.flatten(msg, unixfrom=unixfrom) 2730 eq(text, s.getvalue()) 2731 2732 def test_parse_text_message(self): 2733 eq = self.assertEqual 2734 msg, text = self._msgobj('msg_01.txt') 2735 eq(msg.get_content_type(), 'text/plain') 2736 eq(msg.get_content_maintype(), 'text') 2737 eq(msg.get_content_subtype(), 'plain') 2738 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2739 eq(msg.get_param('charset'), 'us-ascii') 2740 eq(msg.preamble, None) 2741 eq(msg.epilogue, None) 2742 self._idempotent(msg, text) 2743 2744 def test_parse_untyped_message(self): 2745 eq = self.assertEqual 2746 msg, text = self._msgobj('msg_03.txt') 2747 eq(msg.get_content_type(), 'text/plain') 2748 eq(msg.get_params(), None) 2749 eq(msg.get_param('charset'), None) 2750 self._idempotent(msg, text) 2751 2752 def test_simple_multipart(self): 2753 msg, text = self._msgobj('msg_04.txt') 2754 self._idempotent(msg, text) 2755 2756 def test_MIME_digest(self): 2757 msg, text = self._msgobj('msg_02.txt') 2758 self._idempotent(msg, text) 2759 2760 def test_long_header(self): 2761 msg, text = self._msgobj('msg_27.txt') 2762 self._idempotent(msg, text) 2763 2764 def test_MIME_digest_with_part_headers(self): 2765 msg, text = self._msgobj('msg_28.txt') 2766 self._idempotent(msg, text) 2767 2768 def test_mixed_with_image(self): 2769 msg, text = self._msgobj('msg_06.txt') 2770 self._idempotent(msg, text) 2771 2772 def test_multipart_report(self): 2773 msg, text = self._msgobj('msg_05.txt') 2774 self._idempotent(msg, text) 2775 2776 def test_dsn(self): 2777 msg, text = self._msgobj('msg_16.txt') 2778 self._idempotent(msg, text) 2779 2780 def test_preamble_epilogue(self): 2781 msg, text = self._msgobj('msg_21.txt') 2782 self._idempotent(msg, text) 2783 2784 def test_multipart_one_part(self): 2785 msg, text = self._msgobj('msg_23.txt') 2786 self._idempotent(msg, text) 2787 2788 def test_multipart_no_parts(self): 2789 msg, text = self._msgobj('msg_24.txt') 2790 self._idempotent(msg, text) 2791 2792 def test_no_start_boundary(self): 2793 msg, text = self._msgobj('msg_31.txt') 2794 self._idempotent(msg, text) 2795 2796 def test_rfc2231_charset(self): 2797 msg, text = self._msgobj('msg_32.txt') 2798 self._idempotent(msg, text) 2799 2800 def test_more_rfc2231_parameters(self): 2801 msg, text = self._msgobj('msg_33.txt') 2802 self._idempotent(msg, text) 2803 2804 def test_text_plain_in_a_multipart_digest(self): 2805 msg, text = self._msgobj('msg_34.txt') 2806 self._idempotent(msg, text) 2807 2808 def test_nested_multipart_mixeds(self): 2809 msg, text = self._msgobj('msg_12a.txt') 2810 self._idempotent(msg, text) 2811 2812 def test_message_external_body_idempotent(self): 2813 msg, text = self._msgobj('msg_36.txt') 2814 self._idempotent(msg, text) 2815 2816 def test_message_delivery_status(self): 2817 msg, text = self._msgobj('msg_43.txt') 2818 self._idempotent(msg, text, unixfrom=True) 2819 2820 def test_message_signed_idempotent(self): 2821 msg, text = self._msgobj('msg_45.txt') 2822 self._idempotent(msg, text) 2823 2824 def test_content_type(self): 2825 eq = self.assertEqual 2826 # Get a message object and reset the seek pointer for other tests 2827 msg, text = self._msgobj('msg_05.txt') 2828 eq(msg.get_content_type(), 'multipart/report') 2829 # Test the Content-Type: parameters 2830 params = {} 2831 for pk, pv in msg.get_params(): 2832 params[pk] = pv 2833 eq(params['report-type'], 'delivery-status') 2834 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2835 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2836 eq(msg.epilogue, self.linesep) 2837 eq(len(msg.get_payload()), 3) 2838 # Make sure the subparts are what we expect 2839 msg1 = msg.get_payload(0) 2840 eq(msg1.get_content_type(), 'text/plain') 2841 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2842 msg2 = msg.get_payload(1) 2843 eq(msg2.get_content_type(), 'text/plain') 2844 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2845 msg3 = msg.get_payload(2) 2846 eq(msg3.get_content_type(), 'message/rfc822') 2847 self.assertIsInstance(msg3, Message) 2848 payload = msg3.get_payload() 2849 self.assertIsInstance(payload, list) 2850 eq(len(payload), 1) 2851 msg4 = payload[0] 2852 self.assertIsInstance(msg4, Message) 2853 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2854 2855 def test_parser(self): 2856 eq = self.assertEqual 2857 msg, text = self._msgobj('msg_06.txt') 2858 # Check some of the outer headers 2859 eq(msg.get_content_type(), 'message/rfc822') 2860 # Make sure the payload is a list of exactly one sub-Message, and that 2861 # that submessage has a type of text/plain 2862 payload = msg.get_payload() 2863 self.assertIsInstance(payload, list) 2864 eq(len(payload), 1) 2865 msg1 = payload[0] 2866 self.assertIsInstance(msg1, Message) 2867 eq(msg1.get_content_type(), 'text/plain') 2868 self.assertIsInstance(msg1.get_payload(), str) 2869 eq(msg1.get_payload(), self.linesep) 2870 2871 2872 2873# Test various other bits of the package's functionality 2874class TestMiscellaneous(TestEmailBase): 2875 def test_message_from_string(self): 2876 with openfile('msg_01.txt') as fp: 2877 text = fp.read() 2878 msg = email.message_from_string(text) 2879 s = StringIO() 2880 # Don't wrap/continue long headers since we're trying to test 2881 # idempotency. 2882 g = Generator(s, maxheaderlen=0) 2883 g.flatten(msg) 2884 self.assertEqual(text, s.getvalue()) 2885 2886 def test_message_from_file(self): 2887 with openfile('msg_01.txt') as fp: 2888 text = fp.read() 2889 fp.seek(0) 2890 msg = email.message_from_file(fp) 2891 s = StringIO() 2892 # Don't wrap/continue long headers since we're trying to test 2893 # idempotency. 2894 g = Generator(s, maxheaderlen=0) 2895 g.flatten(msg) 2896 self.assertEqual(text, s.getvalue()) 2897 2898 def test_message_from_string_with_class(self): 2899 with openfile('msg_01.txt') as fp: 2900 text = fp.read() 2901 2902 # Create a subclass 2903 class MyMessage(Message): 2904 pass 2905 2906 msg = email.message_from_string(text, MyMessage) 2907 self.assertIsInstance(msg, MyMessage) 2908 # Try something more complicated 2909 with openfile('msg_02.txt') as fp: 2910 text = fp.read() 2911 msg = email.message_from_string(text, MyMessage) 2912 for subpart in msg.walk(): 2913 self.assertIsInstance(subpart, MyMessage) 2914 2915 def test_message_from_file_with_class(self): 2916 # Create a subclass 2917 class MyMessage(Message): 2918 pass 2919 2920 with openfile('msg_01.txt') as fp: 2921 msg = email.message_from_file(fp, MyMessage) 2922 self.assertIsInstance(msg, MyMessage) 2923 # Try something more complicated 2924 with openfile('msg_02.txt') as fp: 2925 msg = email.message_from_file(fp, MyMessage) 2926 for subpart in msg.walk(): 2927 self.assertIsInstance(subpart, MyMessage) 2928 2929 def test_custom_message_does_not_require_arguments(self): 2930 class MyMessage(Message): 2931 def __init__(self): 2932 super().__init__() 2933 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 2934 self.assertIsInstance(msg, MyMessage) 2935 2936 def test__all__(self): 2937 module = __import__('email') 2938 self.assertEqual(sorted(module.__all__), [ 2939 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 2940 'generator', 'header', 'iterators', 'message', 2941 'message_from_binary_file', 'message_from_bytes', 2942 'message_from_file', 'message_from_string', 'mime', 'parser', 2943 'quoprimime', 'utils', 2944 ]) 2945 2946 def test_formatdate(self): 2947 now = time.time() 2948 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 2949 time.gmtime(now)[:6]) 2950 2951 def test_formatdate_localtime(self): 2952 now = time.time() 2953 self.assertEqual( 2954 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 2955 time.localtime(now)[:6]) 2956 2957 def test_formatdate_usegmt(self): 2958 now = time.time() 2959 self.assertEqual( 2960 utils.formatdate(now, localtime=False), 2961 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 2962 self.assertEqual( 2963 utils.formatdate(now, localtime=False, usegmt=True), 2964 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 2965 2966 # parsedate and parsedate_tz will become deprecated interfaces someday 2967 def test_parsedate_returns_None_for_invalid_strings(self): 2968 self.assertIsNone(utils.parsedate('')) 2969 self.assertIsNone(utils.parsedate_tz('')) 2970 self.assertIsNone(utils.parsedate('0')) 2971 self.assertIsNone(utils.parsedate_tz('0')) 2972 self.assertIsNone(utils.parsedate('A Complete Waste of Time')) 2973 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time')) 2974 # Not a part of the spec but, but this has historically worked: 2975 self.assertIsNone(utils.parsedate(None)) 2976 self.assertIsNone(utils.parsedate_tz(None)) 2977 2978 def test_parsedate_compact(self): 2979 # The FWS after the comma is optional 2980 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), 2981 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) 2982 2983 def test_parsedate_no_dayofweek(self): 2984 eq = self.assertEqual 2985 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), 2986 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) 2987 2988 def test_parsedate_compact_no_dayofweek(self): 2989 eq = self.assertEqual 2990 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 2991 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 2992 2993 def test_parsedate_no_space_before_positive_offset(self): 2994 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 2995 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 2996 2997 def test_parsedate_no_space_before_negative_offset(self): 2998 # Issue 1155362: we already handled '+' for this case. 2999 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3000 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3001 3002 3003 def test_parsedate_accepts_time_with_dots(self): 3004 eq = self.assertEqual 3005 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3006 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3007 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3008 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3009 3010 def test_parsedate_acceptable_to_time_functions(self): 3011 eq = self.assertEqual 3012 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3013 t = int(time.mktime(timetup)) 3014 eq(time.localtime(t)[:6], timetup[:6]) 3015 eq(int(time.strftime('%Y', timetup)), 2003) 3016 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3017 t = int(time.mktime(timetup[:9])) 3018 eq(time.localtime(t)[:6], timetup[:6]) 3019 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3020 3021 def test_mktime_tz(self): 3022 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3023 -1, -1, -1, 0)), 0) 3024 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3025 -1, -1, -1, 1234)), -1234) 3026 3027 def test_parsedate_y2k(self): 3028 """Test for parsing a date with a two-digit year. 3029 3030 Parsing a date with a two-digit year should return the correct 3031 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3032 obsoletes RFC822) requires four-digit years. 3033 3034 """ 3035 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3036 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3037 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3038 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3039 3040 def test_parseaddr_empty(self): 3041 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3042 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3043 3044 def test_parseaddr_multiple_domains(self): 3045 self.assertEqual( 3046 utils.parseaddr('a@b@c'), 3047 ('', '') 3048 ) 3049 self.assertEqual( 3050 utils.parseaddr('a@b.c@c'), 3051 ('', '') 3052 ) 3053 self.assertEqual( 3054 utils.parseaddr('a@172.17.0.1@c'), 3055 ('', '') 3056 ) 3057 3058 def test_noquote_dump(self): 3059 self.assertEqual( 3060 utils.formataddr(('A Silly Person', 'person@dom.ain')), 3061 'A Silly Person <person@dom.ain>') 3062 3063 def test_escape_dump(self): 3064 self.assertEqual( 3065 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), 3066 r'"A (Very) Silly Person" <person@dom.ain>') 3067 self.assertEqual( 3068 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'), 3069 ('A (Very) Silly Person', 'person@dom.ain')) 3070 a = r'A \(Special\) Person' 3071 b = 'person@dom.ain' 3072 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3073 3074 def test_escape_backslashes(self): 3075 self.assertEqual( 3076 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')), 3077 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') 3078 a = r'Arthur \Backslash\ Foobar' 3079 b = 'person@dom.ain' 3080 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3081 3082 def test_quotes_unicode_names(self): 3083 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3084 name = "H\u00e4ns W\u00fcrst" 3085 addr = 'person@dom.ain' 3086 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3087 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" 3088 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3089 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3090 latin1_quopri) 3091 3092 def test_accepts_any_charset_like_object(self): 3093 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3094 name = "H\u00e4ns W\u00fcrst" 3095 addr = 'person@dom.ain' 3096 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3097 foobar = "FOOBAR" 3098 class CharsetMock: 3099 def header_encode(self, string): 3100 return foobar 3101 mock = CharsetMock() 3102 mock_expected = "%s <%s>" % (foobar, addr) 3103 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3104 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3105 utf8_base64) 3106 3107 def test_invalid_charset_like_object_raises_error(self): 3108 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3109 name = "H\u00e4ns W\u00fcrst" 3110 addr = 'person@dom.ain' 3111 # An object without a header_encode method: 3112 bad_charset = object() 3113 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3114 bad_charset) 3115 3116 def test_unicode_address_raises_error(self): 3117 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3118 addr = 'pers\u00f6n@dom.in' 3119 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3120 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3121 3122 def test_name_with_dot(self): 3123 x = 'John X. Doe <jxd@example.com>' 3124 y = '"John X. Doe" <jxd@example.com>' 3125 a, b = ('John X. Doe', 'jxd@example.com') 3126 self.assertEqual(utils.parseaddr(x), (a, b)) 3127 self.assertEqual(utils.parseaddr(y), (a, b)) 3128 # formataddr() quotes the name if there's a dot in it 3129 self.assertEqual(utils.formataddr((a, b)), y) 3130 3131 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3132 # issue 10005. Note that in the third test the second pair of 3133 # backslashes is not actually a quoted pair because it is not inside a 3134 # comment or quoted string: the address being parsed has a quoted 3135 # string containing a quoted backslash, followed by 'example' and two 3136 # backslashes, followed by another quoted string containing a space and 3137 # the word 'example'. parseaddr copies those two backslashes 3138 # literally. Per rfc5322 this is not technically correct since a \ may 3139 # not appear in an address outside of a quoted string. It is probably 3140 # a sensible Postel interpretation, though. 3141 eq = self.assertEqual 3142 eq(utils.parseaddr('""example" example"@example.com'), 3143 ('', '""example" example"@example.com')) 3144 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3145 ('', '"\\"example\\" example"@example.com')) 3146 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3147 ('', '"\\\\"example\\\\" example"@example.com')) 3148 3149 def test_parseaddr_preserves_spaces_in_local_part(self): 3150 # issue 9286. A normal RFC5322 local part should not contain any 3151 # folding white space, but legacy local parts can (they are a sequence 3152 # of atoms, not dotatoms). On the other hand we strip whitespace from 3153 # before the @ and around dots, on the assumption that the whitespace 3154 # around the punctuation is a mistake in what would otherwise be 3155 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3156 self.assertEqual(('', "merwok wok@xample.com"), 3157 utils.parseaddr("merwok wok@xample.com")) 3158 self.assertEqual(('', "merwok wok@xample.com"), 3159 utils.parseaddr("merwok wok@xample.com")) 3160 self.assertEqual(('', "merwok wok@xample.com"), 3161 utils.parseaddr(" merwok wok @xample.com")) 3162 self.assertEqual(('', 'merwok"wok" wok@xample.com'), 3163 utils.parseaddr('merwok"wok" wok@xample.com')) 3164 self.assertEqual(('', 'merwok.wok.wok@xample.com'), 3165 utils.parseaddr('merwok. wok . wok@xample.com')) 3166 3167 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3168 addr = ("'foo@example.com' (foo@example.com)", 3169 'foo@example.com') 3170 addrstr = ('"\'foo@example.com\' ' 3171 '(foo@example.com)" <foo@example.com>') 3172 self.assertEqual(utils.parseaddr(addrstr), addr) 3173 self.assertEqual(utils.formataddr(addr), addrstr) 3174 3175 3176 def test_multiline_from_comment(self): 3177 x = """\ 3178Foo 3179\tBar <foo@example.com>""" 3180 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) 3181 3182 def test_quote_dump(self): 3183 self.assertEqual( 3184 utils.formataddr(('A Silly; Person', 'person@dom.ain')), 3185 r'"A Silly; Person" <person@dom.ain>') 3186 3187 def test_charset_richcomparisons(self): 3188 eq = self.assertEqual 3189 ne = self.assertNotEqual 3190 cset1 = Charset() 3191 cset2 = Charset() 3192 eq(cset1, 'us-ascii') 3193 eq(cset1, 'US-ASCII') 3194 eq(cset1, 'Us-AsCiI') 3195 eq('us-ascii', cset1) 3196 eq('US-ASCII', cset1) 3197 eq('Us-AsCiI', cset1) 3198 ne(cset1, 'usascii') 3199 ne(cset1, 'USASCII') 3200 ne(cset1, 'UsAsCiI') 3201 ne('usascii', cset1) 3202 ne('USASCII', cset1) 3203 ne('UsAsCiI', cset1) 3204 eq(cset1, cset2) 3205 eq(cset2, cset1) 3206 3207 def test_getaddresses(self): 3208 eq = self.assertEqual 3209 eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 3210 'Bud Person <bperson@dom.ain>']), 3211 [('Al Person', 'aperson@dom.ain'), 3212 ('Bud Person', 'bperson@dom.ain')]) 3213 3214 def test_getaddresses_nasty(self): 3215 eq = self.assertEqual 3216 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3217 eq(utils.getaddresses( 3218 ['[]*-- =~$']), 3219 [('', ''), ('', ''), ('', '*--')]) 3220 eq(utils.getaddresses( 3221 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), 3222 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) 3223 3224 def test_getaddresses_embedded_comment(self): 3225 """Test proper handling of a nested comment""" 3226 eq = self.assertEqual 3227 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) 3228 eq(addrs[0][1], 'foo@bar.com') 3229 3230 def test_make_msgid_collisions(self): 3231 # Test make_msgid uniqueness, even with multiple threads 3232 class MsgidsThread(Thread): 3233 def run(self): 3234 # generate msgids for 3 seconds 3235 self.msgids = [] 3236 append = self.msgids.append 3237 make_msgid = utils.make_msgid 3238 clock = time.monotonic 3239 tfin = clock() + 3.0 3240 while clock() < tfin: 3241 append(make_msgid(domain='testdomain-string')) 3242 3243 threads = [MsgidsThread() for i in range(5)] 3244 with start_threads(threads): 3245 pass 3246 all_ids = sum([t.msgids for t in threads], []) 3247 self.assertEqual(len(set(all_ids)), len(all_ids)) 3248 3249 def test_utils_quote_unquote(self): 3250 eq = self.assertEqual 3251 msg = Message() 3252 msg.add_header('content-disposition', 'attachment', 3253 filename='foo\\wacky"name') 3254 eq(msg.get_filename(), 'foo\\wacky"name') 3255 3256 def test_get_body_encoding_with_bogus_charset(self): 3257 charset = Charset('not a charset') 3258 self.assertEqual(charset.get_body_encoding(), 'base64') 3259 3260 def test_get_body_encoding_with_uppercase_charset(self): 3261 eq = self.assertEqual 3262 msg = Message() 3263 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3264 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3265 charsets = msg.get_charsets() 3266 eq(len(charsets), 1) 3267 eq(charsets[0], 'utf-8') 3268 charset = Charset(charsets[0]) 3269 eq(charset.get_body_encoding(), 'base64') 3270 msg.set_payload(b'hello world', charset=charset) 3271 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3272 eq(msg.get_payload(decode=True), b'hello world') 3273 eq(msg['content-transfer-encoding'], 'base64') 3274 # Try another one 3275 msg = Message() 3276 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3277 charsets = msg.get_charsets() 3278 eq(len(charsets), 1) 3279 eq(charsets[0], 'us-ascii') 3280 charset = Charset(charsets[0]) 3281 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3282 msg.set_payload('hello world', charset=charset) 3283 eq(msg.get_payload(), 'hello world') 3284 eq(msg['content-transfer-encoding'], '7bit') 3285 3286 def test_charsets_case_insensitive(self): 3287 lc = Charset('us-ascii') 3288 uc = Charset('US-ASCII') 3289 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3290 3291 def test_partial_falls_inside_message_delivery_status(self): 3292 eq = self.ndiffAssertEqual 3293 # The Parser interface provides chunks of data to FeedParser in 8192 3294 # byte gulps. SF bug #1076485 found one of those chunks inside 3295 # message/delivery-status header block, which triggered an 3296 # unreadline() of NeedMoreData. 3297 msg = self._msgobj('msg_43.txt') 3298 sfp = StringIO() 3299 iterators._structure(msg, sfp) 3300 eq(sfp.getvalue(), """\ 3301multipart/report 3302 text/plain 3303 message/delivery-status 3304 text/plain 3305 text/plain 3306 text/plain 3307 text/plain 3308 text/plain 3309 text/plain 3310 text/plain 3311 text/plain 3312 text/plain 3313 text/plain 3314 text/plain 3315 text/plain 3316 text/plain 3317 text/plain 3318 text/plain 3319 text/plain 3320 text/plain 3321 text/plain 3322 text/plain 3323 text/plain 3324 text/plain 3325 text/plain 3326 text/plain 3327 text/plain 3328 text/plain 3329 text/plain 3330 text/rfc822-headers 3331""") 3332 3333 def test_make_msgid_domain(self): 3334 self.assertEqual( 3335 email.utils.make_msgid(domain='testdomain-string')[-19:], 3336 '@testdomain-string>') 3337 3338 def test_make_msgid_idstring(self): 3339 self.assertEqual( 3340 email.utils.make_msgid(idstring='test-idstring', 3341 domain='testdomain-string')[-33:], 3342 '.test-idstring@testdomain-string>') 3343 3344 def test_make_msgid_default_domain(self): 3345 with patch('socket.getfqdn') as mock_getfqdn: 3346 mock_getfqdn.return_value = domain = 'pythontest.example.com' 3347 self.assertTrue( 3348 email.utils.make_msgid().endswith( 3349 '@' + domain + '>')) 3350 3351 def test_Generator_linend(self): 3352 # Issue 14645. 3353 with openfile('msg_26.txt', newline='\n') as f: 3354 msgtxt = f.read() 3355 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3356 msg = email.message_from_string(msgtxt) 3357 s = StringIO() 3358 g = email.generator.Generator(s) 3359 g.flatten(msg) 3360 self.assertEqual(s.getvalue(), msgtxt_nl) 3361 3362 def test_BytesGenerator_linend(self): 3363 # Issue 14645. 3364 with openfile('msg_26.txt', newline='\n') as f: 3365 msgtxt = f.read() 3366 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3367 msg = email.message_from_string(msgtxt_nl) 3368 s = BytesIO() 3369 g = email.generator.BytesGenerator(s) 3370 g.flatten(msg, linesep='\r\n') 3371 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3372 3373 def test_BytesGenerator_linend_with_non_ascii(self): 3374 # Issue 14645. 3375 with openfile('msg_26.txt', 'rb') as f: 3376 msgtxt = f.read() 3377 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3378 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3379 msg = email.message_from_bytes(msgtxt_nl) 3380 s = BytesIO() 3381 g = email.generator.BytesGenerator(s) 3382 g.flatten(msg, linesep='\r\n') 3383 self.assertEqual(s.getvalue(), msgtxt) 3384 3385 def test_mime_classes_policy_argument(self): 3386 with openfile('audiotest.au', 'rb') as fp: 3387 audiodata = fp.read() 3388 with openfile('PyBanner048.gif', 'rb') as fp: 3389 bindata = fp.read() 3390 classes = [ 3391 (MIMEApplication, ('',)), 3392 (MIMEAudio, (audiodata,)), 3393 (MIMEImage, (bindata,)), 3394 (MIMEMessage, (Message(),)), 3395 (MIMENonMultipart, ('multipart', 'mixed')), 3396 (MIMEText, ('',)), 3397 ] 3398 for cls, constructor in classes: 3399 with self.subTest(cls=cls.__name__, policy='compat32'): 3400 m = cls(*constructor) 3401 self.assertIs(m.policy, email.policy.compat32) 3402 with self.subTest(cls=cls.__name__, policy='default'): 3403 m = cls(*constructor, policy=email.policy.default) 3404 self.assertIs(m.policy, email.policy.default) 3405 3406 3407# Test the iterator/generators 3408class TestIterators(TestEmailBase): 3409 def test_body_line_iterator(self): 3410 eq = self.assertEqual 3411 neq = self.ndiffAssertEqual 3412 # First a simple non-multipart message 3413 msg = self._msgobj('msg_01.txt') 3414 it = iterators.body_line_iterator(msg) 3415 lines = list(it) 3416 eq(len(lines), 6) 3417 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3418 # Now a more complicated multipart 3419 msg = self._msgobj('msg_02.txt') 3420 it = iterators.body_line_iterator(msg) 3421 lines = list(it) 3422 eq(len(lines), 43) 3423 with openfile('msg_19.txt') as fp: 3424 neq(EMPTYSTRING.join(lines), fp.read()) 3425 3426 def test_typed_subpart_iterator(self): 3427 eq = self.assertEqual 3428 msg = self._msgobj('msg_04.txt') 3429 it = iterators.typed_subpart_iterator(msg, 'text') 3430 lines = [] 3431 subparts = 0 3432 for subpart in it: 3433 subparts += 1 3434 lines.append(subpart.get_payload()) 3435 eq(subparts, 2) 3436 eq(EMPTYSTRING.join(lines), """\ 3437a simple kind of mirror 3438to reflect upon our own 3439a simple kind of mirror 3440to reflect upon our own 3441""") 3442 3443 def test_typed_subpart_iterator_default_type(self): 3444 eq = self.assertEqual 3445 msg = self._msgobj('msg_03.txt') 3446 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3447 lines = [] 3448 subparts = 0 3449 for subpart in it: 3450 subparts += 1 3451 lines.append(subpart.get_payload()) 3452 eq(subparts, 1) 3453 eq(EMPTYSTRING.join(lines), """\ 3454 3455Hi, 3456 3457Do you like this message? 3458 3459-Me 3460""") 3461 3462 def test_pushCR_LF(self): 3463 '''FeedParser BufferedSubFile.push() assumed it received complete 3464 line endings. A CR ending one push() followed by a LF starting 3465 the next push() added an empty line. 3466 ''' 3467 imt = [ 3468 ("a\r \n", 2), 3469 ("b", 0), 3470 ("c\n", 1), 3471 ("", 0), 3472 ("d\r\n", 1), 3473 ("e\r", 0), 3474 ("\nf", 1), 3475 ("\r\n", 1), 3476 ] 3477 from email.feedparser import BufferedSubFile, NeedMoreData 3478 bsf = BufferedSubFile() 3479 om = [] 3480 nt = 0 3481 for il, n in imt: 3482 bsf.push(il) 3483 nt += n 3484 n1 = 0 3485 for ol in iter(bsf.readline, NeedMoreData): 3486 om.append(ol) 3487 n1 += 1 3488 self.assertEqual(n, n1) 3489 self.assertEqual(len(om), nt) 3490 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3491 3492 def test_push_random(self): 3493 from email.feedparser import BufferedSubFile, NeedMoreData 3494 3495 n = 10000 3496 chunksize = 5 3497 chars = 'abcd \t\r\n' 3498 3499 s = ''.join(choice(chars) for i in range(n)) + '\n' 3500 target = s.splitlines(True) 3501 3502 bsf = BufferedSubFile() 3503 lines = [] 3504 for i in range(0, len(s), chunksize): 3505 chunk = s[i:i+chunksize] 3506 bsf.push(chunk) 3507 lines.extend(iter(bsf.readline, NeedMoreData)) 3508 self.assertEqual(lines, target) 3509 3510 3511class TestFeedParsers(TestEmailBase): 3512 3513 def parse(self, chunks): 3514 feedparser = FeedParser() 3515 for chunk in chunks: 3516 feedparser.feed(chunk) 3517 return feedparser.close() 3518 3519 def test_empty_header_name_handled(self): 3520 # Issue 19996 3521 msg = self.parse("First: val\n: bad\nSecond: val") 3522 self.assertEqual(msg['First'], 'val') 3523 self.assertEqual(msg['Second'], 'val') 3524 3525 def test_newlines(self): 3526 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3527 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3528 m = self.parse(['a:\nb:\rc:\r\nd:']) 3529 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3530 m = self.parse(['a:\rb', 'c:\n']) 3531 self.assertEqual(m.keys(), ['a', 'bc']) 3532 m = self.parse(['a:\r', 'b:\n']) 3533 self.assertEqual(m.keys(), ['a', 'b']) 3534 m = self.parse(['a:\r', '\nb:\n']) 3535 self.assertEqual(m.keys(), ['a', 'b']) 3536 3537 # Only CR and LF should break header fields 3538 m = self.parse(['a:\x85b:\u2028c:\n']) 3539 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3540 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3541 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3542 3543 def test_long_lines(self): 3544 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3545 M, N = 1000, 20000 3546 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3547 self.assertEqual(m.items(), [('a', 'b')]) 3548 self.assertEqual(m.get_payload(), 'x'*M*N) 3549 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3550 self.assertEqual(m.items(), [('a', 'b')]) 3551 self.assertEqual(m.get_payload(), 'x'*M*N) 3552 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3553 self.assertEqual(m.items(), [('a', 'b')]) 3554 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3555 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3556 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3557 3558 3559class TestParsers(TestEmailBase): 3560 3561 def test_header_parser(self): 3562 eq = self.assertEqual 3563 # Parse only the headers of a complex multipart MIME document 3564 with openfile('msg_02.txt') as fp: 3565 msg = HeaderParser().parse(fp) 3566 eq(msg['from'], 'ppp-request@zzz.org') 3567 eq(msg['to'], 'ppp@zzz.org') 3568 eq(msg.get_content_type(), 'multipart/mixed') 3569 self.assertFalse(msg.is_multipart()) 3570 self.assertIsInstance(msg.get_payload(), str) 3571 3572 def test_bytes_header_parser(self): 3573 eq = self.assertEqual 3574 # Parse only the headers of a complex multipart MIME document 3575 with openfile('msg_02.txt', 'rb') as fp: 3576 msg = email.parser.BytesHeaderParser().parse(fp) 3577 eq(msg['from'], 'ppp-request@zzz.org') 3578 eq(msg['to'], 'ppp@zzz.org') 3579 eq(msg.get_content_type(), 'multipart/mixed') 3580 self.assertFalse(msg.is_multipart()) 3581 self.assertIsInstance(msg.get_payload(), str) 3582 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3583 3584 def test_bytes_parser_does_not_close_file(self): 3585 with openfile('msg_02.txt', 'rb') as fp: 3586 email.parser.BytesParser().parse(fp) 3587 self.assertFalse(fp.closed) 3588 3589 def test_bytes_parser_on_exception_does_not_close_file(self): 3590 with openfile('msg_15.txt', 'rb') as fp: 3591 bytesParser = email.parser.BytesParser 3592 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3593 bytesParser(policy=email.policy.strict).parse, 3594 fp) 3595 self.assertFalse(fp.closed) 3596 3597 def test_parser_does_not_close_file(self): 3598 with openfile('msg_02.txt', 'r') as fp: 3599 email.parser.Parser().parse(fp) 3600 self.assertFalse(fp.closed) 3601 3602 def test_parser_on_exception_does_not_close_file(self): 3603 with openfile('msg_15.txt', 'r') as fp: 3604 parser = email.parser.Parser 3605 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3606 parser(policy=email.policy.strict).parse, fp) 3607 self.assertFalse(fp.closed) 3608 3609 def test_whitespace_continuation(self): 3610 eq = self.assertEqual 3611 # This message contains a line after the Subject: header that has only 3612 # whitespace, but it is not empty! 3613 msg = email.message_from_string("""\ 3614From: aperson@dom.ain 3615To: bperson@dom.ain 3616Subject: the next line has a space on it 3617\x20 3618Date: Mon, 8 Apr 2002 15:09:19 -0400 3619Message-ID: spam 3620 3621Here's the message body 3622""") 3623 eq(msg['subject'], 'the next line has a space on it\n ') 3624 eq(msg['message-id'], 'spam') 3625 eq(msg.get_payload(), "Here's the message body\n") 3626 3627 def test_whitespace_continuation_last_header(self): 3628 eq = self.assertEqual 3629 # Like the previous test, but the subject line is the last 3630 # header. 3631 msg = email.message_from_string("""\ 3632From: aperson@dom.ain 3633To: bperson@dom.ain 3634Date: Mon, 8 Apr 2002 15:09:19 -0400 3635Message-ID: spam 3636Subject: the next line has a space on it 3637\x20 3638 3639Here's the message body 3640""") 3641 eq(msg['subject'], 'the next line has a space on it\n ') 3642 eq(msg['message-id'], 'spam') 3643 eq(msg.get_payload(), "Here's the message body\n") 3644 3645 def test_crlf_separation(self): 3646 eq = self.assertEqual 3647 with openfile('msg_26.txt', newline='\n') as fp: 3648 msg = Parser().parse(fp) 3649 eq(len(msg.get_payload()), 2) 3650 part1 = msg.get_payload(0) 3651 eq(part1.get_content_type(), 'text/plain') 3652 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3653 part2 = msg.get_payload(1) 3654 eq(part2.get_content_type(), 'application/riscos') 3655 3656 def test_crlf_flatten(self): 3657 # Using newline='\n' preserves the crlfs in this input file. 3658 with openfile('msg_26.txt', newline='\n') as fp: 3659 text = fp.read() 3660 msg = email.message_from_string(text) 3661 s = StringIO() 3662 g = Generator(s) 3663 g.flatten(msg, linesep='\r\n') 3664 self.assertEqual(s.getvalue(), text) 3665 3666 maxDiff = None 3667 3668 def test_multipart_digest_with_extra_mime_headers(self): 3669 eq = self.assertEqual 3670 neq = self.ndiffAssertEqual 3671 with openfile('msg_28.txt') as fp: 3672 msg = email.message_from_file(fp) 3673 # Structure is: 3674 # multipart/digest 3675 # message/rfc822 3676 # text/plain 3677 # message/rfc822 3678 # text/plain 3679 eq(msg.is_multipart(), 1) 3680 eq(len(msg.get_payload()), 2) 3681 part1 = msg.get_payload(0) 3682 eq(part1.get_content_type(), 'message/rfc822') 3683 eq(part1.is_multipart(), 1) 3684 eq(len(part1.get_payload()), 1) 3685 part1a = part1.get_payload(0) 3686 eq(part1a.is_multipart(), 0) 3687 eq(part1a.get_content_type(), 'text/plain') 3688 neq(part1a.get_payload(), 'message 1\n') 3689 # next message/rfc822 3690 part2 = msg.get_payload(1) 3691 eq(part2.get_content_type(), 'message/rfc822') 3692 eq(part2.is_multipart(), 1) 3693 eq(len(part2.get_payload()), 1) 3694 part2a = part2.get_payload(0) 3695 eq(part2a.is_multipart(), 0) 3696 eq(part2a.get_content_type(), 'text/plain') 3697 neq(part2a.get_payload(), 'message 2\n') 3698 3699 def test_three_lines(self): 3700 # A bug report by Andrew McNamara 3701 lines = ['From: Andrew Person <aperson@dom.ain', 3702 'Subject: Test', 3703 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3704 msg = email.message_from_string(NL.join(lines)) 3705 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3706 3707 def test_strip_line_feed_and_carriage_return_in_headers(self): 3708 eq = self.assertEqual 3709 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3710 value1 = 'text' 3711 value2 = 'more text' 3712 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3713 value1, value2) 3714 msg = email.message_from_string(m) 3715 eq(msg.get('Header'), value1) 3716 eq(msg.get('Next-Header'), value2) 3717 3718 def test_rfc2822_header_syntax(self): 3719 eq = self.assertEqual 3720 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3721 msg = email.message_from_string(m) 3722 eq(len(msg), 3) 3723 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3724 eq(msg.get_payload(), 'body') 3725 3726 def test_rfc2822_space_not_allowed_in_header(self): 3727 eq = self.assertEqual 3728 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3729 msg = email.message_from_string(m) 3730 eq(len(msg.keys()), 0) 3731 3732 def test_rfc2822_one_character_header(self): 3733 eq = self.assertEqual 3734 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3735 msg = email.message_from_string(m) 3736 headers = msg.keys() 3737 headers.sort() 3738 eq(headers, ['A', 'B', 'CC']) 3739 eq(msg.get_payload(), 'body') 3740 3741 def test_CRLFLF_at_end_of_part(self): 3742 # issue 5610: feedparser should not eat two chars from body part ending 3743 # with "\r\n\n". 3744 m = ( 3745 "From: foo@bar.com\n" 3746 "To: baz\n" 3747 "Mime-Version: 1.0\n" 3748 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3749 "\n" 3750 "--BOUNDARY\n" 3751 "Content-Type: text/plain\n" 3752 "\n" 3753 "body ending with CRLF newline\r\n" 3754 "\n" 3755 "--BOUNDARY--\n" 3756 ) 3757 msg = email.message_from_string(m) 3758 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3759 3760 3761class Test8BitBytesHandling(TestEmailBase): 3762 # In Python3 all input is string, but that doesn't work if the actual input 3763 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3764 # decode byte streams using the surrogateescape error handler, and 3765 # reconvert to binary at appropriate places if we detect surrogates. This 3766 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3767 # but it does allow us to parse and preserve them, and to decode body 3768 # parts that use an 8bit CTE. 3769 3770 bodytest_msg = textwrap.dedent("""\ 3771 From: foo@bar.com 3772 To: baz 3773 Mime-Version: 1.0 3774 Content-Type: text/plain; charset={charset} 3775 Content-Transfer-Encoding: {cte} 3776 3777 {bodyline} 3778 """) 3779 3780 def test_known_8bit_CTE(self): 3781 m = self.bodytest_msg.format(charset='utf-8', 3782 cte='8bit', 3783 bodyline='pöstal').encode('utf-8') 3784 msg = email.message_from_bytes(m) 3785 self.assertEqual(msg.get_payload(), "pöstal\n") 3786 self.assertEqual(msg.get_payload(decode=True), 3787 "pöstal\n".encode('utf-8')) 3788 3789 def test_unknown_8bit_CTE(self): 3790 m = self.bodytest_msg.format(charset='notavalidcharset', 3791 cte='8bit', 3792 bodyline='pöstal').encode('utf-8') 3793 msg = email.message_from_bytes(m) 3794 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3795 self.assertEqual(msg.get_payload(decode=True), 3796 "pöstal\n".encode('utf-8')) 3797 3798 def test_8bit_in_quopri_body(self): 3799 # This is non-RFC compliant data...without 'decode' the library code 3800 # decodes the body using the charset from the headers, and because the 3801 # source byte really is utf-8 this works. This is likely to fail 3802 # against real dirty data (ie: produce mojibake), but the data is 3803 # invalid anyway so it is as good a guess as any. But this means that 3804 # this test just confirms the current behavior; that behavior is not 3805 # necessarily the best possible behavior. With 'decode' it is 3806 # returning the raw bytes, so that test should be of correct behavior, 3807 # or at least produce the same result that email4 did. 3808 m = self.bodytest_msg.format(charset='utf-8', 3809 cte='quoted-printable', 3810 bodyline='p=C3=B6stál').encode('utf-8') 3811 msg = email.message_from_bytes(m) 3812 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 3813 self.assertEqual(msg.get_payload(decode=True), 3814 'pöstál\n'.encode('utf-8')) 3815 3816 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3817 # This is similar to the previous test, but proves that if the 8bit 3818 # byte is undecodeable in the specified charset, it gets replaced 3819 # by the unicode 'unknown' character. Again, this may or may not 3820 # be the ideal behavior. Note that if decode=False none of the 3821 # decoders will get involved, so this is the only test we need 3822 # for this behavior. 3823 m = self.bodytest_msg.format(charset='ascii', 3824 cte='quoted-printable', 3825 bodyline='p=C3=B6stál').encode('utf-8') 3826 msg = email.message_from_bytes(m) 3827 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3828 self.assertEqual(msg.get_payload(decode=True), 3829 'pöstál\n'.encode('utf-8')) 3830 3831 # test_defect_handling:test_invalid_chars_in_base64_payload 3832 def test_8bit_in_base64_body(self): 3833 # If we get 8bit bytes in a base64 body, we can just ignore them 3834 # as being outside the base64 alphabet and decode anyway. But 3835 # we register a defect. 3836 m = self.bodytest_msg.format(charset='utf-8', 3837 cte='base64', 3838 bodyline='cMO2c3RhbAá=').encode('utf-8') 3839 msg = email.message_from_bytes(m) 3840 self.assertEqual(msg.get_payload(decode=True), 3841 'pöstal'.encode('utf-8')) 3842 self.assertIsInstance(msg.defects[0], 3843 errors.InvalidBase64CharactersDefect) 3844 3845 def test_8bit_in_uuencode_body(self): 3846 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3847 # normal means, so the block is returned undecoded, but as bytes. 3848 m = self.bodytest_msg.format(charset='utf-8', 3849 cte='uuencode', 3850 bodyline='<,.V<W1A; á ').encode('utf-8') 3851 msg = email.message_from_bytes(m) 3852 self.assertEqual(msg.get_payload(decode=True), 3853 '<,.V<W1A; á \n'.encode('utf-8')) 3854 3855 3856 headertest_headers = ( 3857 ('From: foo@bar.com', ('From', 'foo@bar.com')), 3858 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3859 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 3860 '\tJean de Baddie', 3861 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3862 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3863 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3864 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3865 ) 3866 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3867 '\nYes, they are flying.\n').encode('utf-8') 3868 3869 def test_get_8bit_header(self): 3870 msg = email.message_from_bytes(self.headertest_msg) 3871 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3872 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3873 3874 def test_print_8bit_headers(self): 3875 msg = email.message_from_bytes(self.headertest_msg) 3876 self.assertEqual(str(msg), 3877 textwrap.dedent("""\ 3878 From: {} 3879 To: {} 3880 Subject: {} 3881 From: {} 3882 3883 Yes, they are flying. 3884 """).format(*[expected[1] for (_, expected) in 3885 self.headertest_headers])) 3886 3887 def test_values_with_8bit_headers(self): 3888 msg = email.message_from_bytes(self.headertest_msg) 3889 self.assertListEqual([str(x) for x in msg.values()], 3890 ['foo@bar.com', 3891 'b\uFFFD\uFFFDz', 3892 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 3893 'coll\uFFFD\uFFFDgue, le pouf ' 3894 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3895 '\tJean de Baddie', 3896 "g\uFFFD\uFFFDst"]) 3897 3898 def test_items_with_8bit_headers(self): 3899 msg = email.message_from_bytes(self.headertest_msg) 3900 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 3901 [('From', 'foo@bar.com'), 3902 ('To', 'b\uFFFD\uFFFDz'), 3903 ('Subject', 'Maintenant je vous ' 3904 'pr\uFFFD\uFFFDsente ' 3905 'mon coll\uFFFD\uFFFDgue, le pouf ' 3906 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3907 '\tJean de Baddie'), 3908 ('From', 'g\uFFFD\uFFFDst')]) 3909 3910 def test_get_all_with_8bit_headers(self): 3911 msg = email.message_from_bytes(self.headertest_msg) 3912 self.assertListEqual([str(x) for x in msg.get_all('from')], 3913 ['foo@bar.com', 3914 'g\uFFFD\uFFFDst']) 3915 3916 def test_get_content_type_with_8bit(self): 3917 msg = email.message_from_bytes(textwrap.dedent("""\ 3918 Content-Type: text/pl\xA7in; charset=utf-8 3919 """).encode('latin-1')) 3920 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 3921 self.assertEqual(msg.get_content_maintype(), "text") 3922 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 3923 3924 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 3925 def test_get_params_with_8bit(self): 3926 msg = email.message_from_bytes( 3927 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 3928 self.assertEqual(msg.get_params(header='x-header'), 3929 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 3930 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 3931 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 3932 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 3933 3934 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 3935 def test_get_rfc2231_params_with_8bit(self): 3936 msg = email.message_from_bytes(textwrap.dedent("""\ 3937 Content-Type: text/plain; charset=us-ascii; 3938 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3939 ).encode('latin-1')) 3940 self.assertEqual(msg.get_param('title'), 3941 ('us-ascii', 'en', 'This is not f\uFFFDn')) 3942 3943 def test_set_rfc2231_params_with_8bit(self): 3944 msg = email.message_from_bytes(textwrap.dedent("""\ 3945 Content-Type: text/plain; charset=us-ascii; 3946 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3947 ).encode('latin-1')) 3948 msg.set_param('title', 'test') 3949 self.assertEqual(msg.get_param('title'), 'test') 3950 3951 def test_del_rfc2231_params_with_8bit(self): 3952 msg = email.message_from_bytes(textwrap.dedent("""\ 3953 Content-Type: text/plain; charset=us-ascii; 3954 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3955 ).encode('latin-1')) 3956 msg.del_param('title') 3957 self.assertEqual(msg.get_param('title'), None) 3958 self.assertEqual(msg.get_content_maintype(), 'text') 3959 3960 def test_get_payload_with_8bit_cte_header(self): 3961 msg = email.message_from_bytes(textwrap.dedent("""\ 3962 Content-Transfer-Encoding: b\xa7se64 3963 Content-Type: text/plain; charset=latin-1 3964 3965 payload 3966 """).encode('latin-1')) 3967 self.assertEqual(msg.get_payload(), 'payload\n') 3968 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 3969 3970 non_latin_bin_msg = textwrap.dedent("""\ 3971 From: foo@bar.com 3972 To: báz 3973 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 3974 \tJean de Baddie 3975 Mime-Version: 1.0 3976 Content-Type: text/plain; charset="utf-8" 3977 Content-Transfer-Encoding: 8bit 3978 3979 Да, они летят. 3980 """).encode('utf-8') 3981 3982 def test_bytes_generator(self): 3983 msg = email.message_from_bytes(self.non_latin_bin_msg) 3984 out = BytesIO() 3985 email.generator.BytesGenerator(out).flatten(msg) 3986 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 3987 3988 def test_bytes_generator_handles_None_body(self): 3989 #Issue 11019 3990 msg = email.message.Message() 3991 out = BytesIO() 3992 email.generator.BytesGenerator(out).flatten(msg) 3993 self.assertEqual(out.getvalue(), b"\n") 3994 3995 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 3996 From: foo@bar.com 3997 To: =?unknown-8bit?q?b=C3=A1z?= 3998 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 3999 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 4000 =?unknown-8bit?q?_Jean_de_Baddie?= 4001 Mime-Version: 1.0 4002 Content-Type: text/plain; charset="utf-8" 4003 Content-Transfer-Encoding: base64 4004 4005 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4006 """) 4007 4008 def test_generator_handles_8bit(self): 4009 msg = email.message_from_bytes(self.non_latin_bin_msg) 4010 out = StringIO() 4011 email.generator.Generator(out).flatten(msg) 4012 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4013 4014 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4015 msg = email.message_from_bytes(self.non_latin_bin_msg) 4016 out = BytesIO() 4017 BytesGenerator(out).flatten(msg) 4018 orig_value = out.getvalue() 4019 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4020 out = BytesIO() 4021 BytesGenerator(out).flatten(msg) 4022 self.assertEqual(out.getvalue(), orig_value) 4023 4024 def test_bytes_generator_with_unix_from(self): 4025 # The unixfrom contains a current date, so we can't check it 4026 # literally. Just make sure the first word is 'From' and the 4027 # rest of the message matches the input. 4028 msg = email.message_from_bytes(self.non_latin_bin_msg) 4029 out = BytesIO() 4030 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4031 lines = out.getvalue().split(b'\n') 4032 self.assertEqual(lines[0].split()[0], b'From') 4033 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4034 4035 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4036 non_latin_bin_msg_as7bit[2:4] = [ 4037 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4038 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4039 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4040 4041 def test_message_from_binary_file(self): 4042 fn = 'test.msg' 4043 self.addCleanup(unlink, fn) 4044 with open(fn, 'wb') as testfile: 4045 testfile.write(self.non_latin_bin_msg) 4046 with open(fn, 'rb') as testfile: 4047 m = email.parser.BytesParser().parse(testfile) 4048 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4049 4050 latin_bin_msg = textwrap.dedent("""\ 4051 From: foo@bar.com 4052 To: Dinsdale 4053 Subject: Nudge nudge, wink, wink 4054 Mime-Version: 1.0 4055 Content-Type: text/plain; charset="latin-1" 4056 Content-Transfer-Encoding: 8bit 4057 4058 oh là là, know what I mean, know what I mean? 4059 """).encode('latin-1') 4060 4061 latin_bin_msg_as7bit = textwrap.dedent("""\ 4062 From: foo@bar.com 4063 To: Dinsdale 4064 Subject: Nudge nudge, wink, wink 4065 Mime-Version: 1.0 4066 Content-Type: text/plain; charset="iso-8859-1" 4067 Content-Transfer-Encoding: quoted-printable 4068 4069 oh l=E0 l=E0, know what I mean, know what I mean? 4070 """) 4071 4072 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4073 m = email.message_from_bytes(self.latin_bin_msg) 4074 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4075 4076 def test_decoded_generator_emits_unicode_body(self): 4077 m = email.message_from_bytes(self.latin_bin_msg) 4078 out = StringIO() 4079 email.generator.DecodedGenerator(out).flatten(m) 4080 #DecodedHeader output contains an extra blank line compared 4081 #to the input message. RDM: not sure if this is a bug or not, 4082 #but it is not specific to the 8bit->7bit conversion. 4083 self.assertEqual(out.getvalue(), 4084 self.latin_bin_msg.decode('latin-1')+'\n') 4085 4086 def test_bytes_feedparser(self): 4087 bfp = email.feedparser.BytesFeedParser() 4088 for i in range(0, len(self.latin_bin_msg), 10): 4089 bfp.feed(self.latin_bin_msg[i:i+10]) 4090 m = bfp.close() 4091 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4092 4093 def test_crlf_flatten(self): 4094 with openfile('msg_26.txt', 'rb') as fp: 4095 text = fp.read() 4096 msg = email.message_from_bytes(text) 4097 s = BytesIO() 4098 g = email.generator.BytesGenerator(s) 4099 g.flatten(msg, linesep='\r\n') 4100 self.assertEqual(s.getvalue(), text) 4101 4102 def test_8bit_multipart(self): 4103 # Issue 11605 4104 source = textwrap.dedent("""\ 4105 Date: Fri, 18 Mar 2011 17:15:43 +0100 4106 To: foo@example.com 4107 From: foodwatch-Newsletter <bar@example.com> 4108 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4109 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> 4110 MIME-Version: 1.0 4111 Content-Type: multipart/alternative; 4112 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4113 4114 --b1_76a486bee62b0d200f33dc2ca08220ad 4115 Content-Type: text/plain; charset="utf-8" 4116 Content-Transfer-Encoding: 8bit 4117 4118 Guten Tag, , 4119 4120 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4121 Nachrichten aus Japan. 4122 4123 4124 --b1_76a486bee62b0d200f33dc2ca08220ad 4125 Content-Type: text/html; charset="utf-8" 4126 Content-Transfer-Encoding: 8bit 4127 4128 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4129 "http://www.w3.org/TR/html4/loose.dtd"> 4130 <html lang="de"> 4131 <head> 4132 <title>foodwatch - Newsletter</title> 4133 </head> 4134 <body> 4135 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4136 die Nachrichten aus Japan.</p> 4137 </body> 4138 </html> 4139 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4140 4141 """).encode('utf-8') 4142 msg = email.message_from_bytes(source) 4143 s = BytesIO() 4144 g = email.generator.BytesGenerator(s) 4145 g.flatten(msg) 4146 self.assertEqual(s.getvalue(), source) 4147 4148 def test_bytes_generator_b_encoding_linesep(self): 4149 # Issue 14062: b encoding was tacking on an extra \n. 4150 m = Message() 4151 # This has enough non-ascii that it should always end up b encoded. 4152 m['Subject'] = Header('žluťoučký kůň') 4153 s = BytesIO() 4154 g = email.generator.BytesGenerator(s) 4155 g.flatten(m, linesep='\r\n') 4156 self.assertEqual( 4157 s.getvalue(), 4158 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4159 4160 def test_generator_b_encoding_linesep(self): 4161 # Since this broke in ByteGenerator, test Generator for completeness. 4162 m = Message() 4163 # This has enough non-ascii that it should always end up b encoded. 4164 m['Subject'] = Header('žluťoučký kůň') 4165 s = StringIO() 4166 g = email.generator.Generator(s) 4167 g.flatten(m, linesep='\r\n') 4168 self.assertEqual( 4169 s.getvalue(), 4170 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4171 4172 maxDiff = None 4173 4174 4175class BaseTestBytesGeneratorIdempotent: 4176 4177 maxDiff = None 4178 4179 def _msgobj(self, filename): 4180 with openfile(filename, 'rb') as fp: 4181 data = fp.read() 4182 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4183 msg = email.message_from_bytes(data) 4184 return msg, data 4185 4186 def _idempotent(self, msg, data, unixfrom=False): 4187 b = BytesIO() 4188 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4189 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4190 self.assertEqual(data, b.getvalue()) 4191 4192 4193class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4194 TestIdempotent): 4195 linesep = '\n' 4196 blinesep = b'\n' 4197 normalize_linesep_regex = re.compile(br'\r\n') 4198 4199 4200class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4201 TestIdempotent): 4202 linesep = '\r\n' 4203 blinesep = b'\r\n' 4204 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4205 4206 4207class TestBase64(unittest.TestCase): 4208 def test_len(self): 4209 eq = self.assertEqual 4210 eq(base64mime.header_length('hello'), 4211 len(base64mime.body_encode(b'hello', eol=''))) 4212 for size in range(15): 4213 if size == 0 : bsize = 0 4214 elif size <= 3 : bsize = 4 4215 elif size <= 6 : bsize = 8 4216 elif size <= 9 : bsize = 12 4217 elif size <= 12: bsize = 16 4218 else : bsize = 20 4219 eq(base64mime.header_length('x' * size), bsize) 4220 4221 def test_decode(self): 4222 eq = self.assertEqual 4223 eq(base64mime.decode(''), b'') 4224 eq(base64mime.decode('aGVsbG8='), b'hello') 4225 4226 def test_encode(self): 4227 eq = self.assertEqual 4228 eq(base64mime.body_encode(b''), b'') 4229 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4230 # Test the binary flag 4231 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4232 # Test the maxlinelen arg 4233 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4234eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4235eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4236eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4237eHh4eCB4eHh4IA== 4238""") 4239 # Test the eol argument 4240 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4241 """\ 4242eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4243eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4244eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4245eHh4eCB4eHh4IA==\r 4246""") 4247 4248 def test_header_encode(self): 4249 eq = self.assertEqual 4250 he = base64mime.header_encode 4251 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4252 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4253 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4254 # Test the charset option 4255 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4256 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4257 4258 4259 4260class TestQuopri(unittest.TestCase): 4261 def setUp(self): 4262 # Set of characters (as byte integers) that don't need to be encoded 4263 # in headers. 4264 self.hlit = list(chain( 4265 range(ord('a'), ord('z') + 1), 4266 range(ord('A'), ord('Z') + 1), 4267 range(ord('0'), ord('9') + 1), 4268 (c for c in b'!*+-/'))) 4269 # Set of characters (as byte integers) that do need to be encoded in 4270 # headers. 4271 self.hnon = [c for c in range(256) if c not in self.hlit] 4272 assert len(self.hlit) + len(self.hnon) == 256 4273 # Set of characters (as byte integers) that don't need to be encoded 4274 # in bodies. 4275 self.blit = list(range(ord(' '), ord('~') + 1)) 4276 self.blit.append(ord('\t')) 4277 self.blit.remove(ord('=')) 4278 # Set of characters (as byte integers) that do need to be encoded in 4279 # bodies. 4280 self.bnon = [c for c in range(256) if c not in self.blit] 4281 assert len(self.blit) + len(self.bnon) == 256 4282 4283 def test_quopri_header_check(self): 4284 for c in self.hlit: 4285 self.assertFalse(quoprimime.header_check(c), 4286 'Should not be header quopri encoded: %s' % chr(c)) 4287 for c in self.hnon: 4288 self.assertTrue(quoprimime.header_check(c), 4289 'Should be header quopri encoded: %s' % chr(c)) 4290 4291 def test_quopri_body_check(self): 4292 for c in self.blit: 4293 self.assertFalse(quoprimime.body_check(c), 4294 'Should not be body quopri encoded: %s' % chr(c)) 4295 for c in self.bnon: 4296 self.assertTrue(quoprimime.body_check(c), 4297 'Should be body quopri encoded: %s' % chr(c)) 4298 4299 def test_header_quopri_len(self): 4300 eq = self.assertEqual 4301 eq(quoprimime.header_length(b'hello'), 5) 4302 # RFC 2047 chrome is not included in header_length(). 4303 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4304 quoprimime.header_length(b'hello') + 4305 # =?xxx?q?...?= means 10 extra characters 4306 10) 4307 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4308 # RFC 2047 chrome is not included in header_length(). 4309 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4310 quoprimime.header_length(b'h@e@l@l@o@') + 4311 # =?xxx?q?...?= means 10 extra characters 4312 10) 4313 for c in self.hlit: 4314 eq(quoprimime.header_length(bytes([c])), 1, 4315 'expected length 1 for %r' % chr(c)) 4316 for c in self.hnon: 4317 # Space is special; it's encoded to _ 4318 if c == ord(' '): 4319 continue 4320 eq(quoprimime.header_length(bytes([c])), 3, 4321 'expected length 3 for %r' % chr(c)) 4322 eq(quoprimime.header_length(b' '), 1) 4323 4324 def test_body_quopri_len(self): 4325 eq = self.assertEqual 4326 for c in self.blit: 4327 eq(quoprimime.body_length(bytes([c])), 1) 4328 for c in self.bnon: 4329 eq(quoprimime.body_length(bytes([c])), 3) 4330 4331 def test_quote_unquote_idempotent(self): 4332 for x in range(256): 4333 c = chr(x) 4334 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4335 4336 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4337 if charset is None: 4338 encoded_header = quoprimime.header_encode(header) 4339 else: 4340 encoded_header = quoprimime.header_encode(header, charset) 4341 self.assertEqual(encoded_header, expected_encoded_header) 4342 4343 def test_header_encode_null(self): 4344 self._test_header_encode(b'', '') 4345 4346 def test_header_encode_one_word(self): 4347 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4348 4349 def test_header_encode_two_lines(self): 4350 self._test_header_encode(b'hello\nworld', 4351 '=?iso-8859-1?q?hello=0Aworld?=') 4352 4353 def test_header_encode_non_ascii(self): 4354 self._test_header_encode(b'hello\xc7there', 4355 '=?iso-8859-1?q?hello=C7there?=') 4356 4357 def test_header_encode_alt_charset(self): 4358 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4359 charset='iso-8859-2') 4360 4361 def _test_header_decode(self, encoded_header, expected_decoded_header): 4362 decoded_header = quoprimime.header_decode(encoded_header) 4363 self.assertEqual(decoded_header, expected_decoded_header) 4364 4365 def test_header_decode_null(self): 4366 self._test_header_decode('', '') 4367 4368 def test_header_decode_one_word(self): 4369 self._test_header_decode('hello', 'hello') 4370 4371 def test_header_decode_two_lines(self): 4372 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4373 4374 def test_header_decode_non_ascii(self): 4375 self._test_header_decode('hello=C7there', 'hello\xc7there') 4376 4377 def test_header_decode_re_bug_18380(self): 4378 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4379 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4380 4381 def _test_decode(self, encoded, expected_decoded, eol=None): 4382 if eol is None: 4383 decoded = quoprimime.decode(encoded) 4384 else: 4385 decoded = quoprimime.decode(encoded, eol=eol) 4386 self.assertEqual(decoded, expected_decoded) 4387 4388 def test_decode_null_word(self): 4389 self._test_decode('', '') 4390 4391 def test_decode_null_line_null_word(self): 4392 self._test_decode('\r\n', '\n') 4393 4394 def test_decode_one_word(self): 4395 self._test_decode('hello', 'hello') 4396 4397 def test_decode_one_word_eol(self): 4398 self._test_decode('hello', 'hello', eol='X') 4399 4400 def test_decode_one_line(self): 4401 self._test_decode('hello\r\n', 'hello\n') 4402 4403 def test_decode_one_line_lf(self): 4404 self._test_decode('hello\n', 'hello\n') 4405 4406 def test_decode_one_line_cr(self): 4407 self._test_decode('hello\r', 'hello\n') 4408 4409 def test_decode_one_line_nl(self): 4410 self._test_decode('hello\n', 'helloX', eol='X') 4411 4412 def test_decode_one_line_crnl(self): 4413 self._test_decode('hello\r\n', 'helloX', eol='X') 4414 4415 def test_decode_one_line_one_word(self): 4416 self._test_decode('hello\r\nworld', 'hello\nworld') 4417 4418 def test_decode_one_line_one_word_eol(self): 4419 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4420 4421 def test_decode_two_lines(self): 4422 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4423 4424 def test_decode_two_lines_eol(self): 4425 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4426 4427 def test_decode_one_long_line(self): 4428 self._test_decode('Spam' * 250, 'Spam' * 250) 4429 4430 def test_decode_one_space(self): 4431 self._test_decode(' ', '') 4432 4433 def test_decode_multiple_spaces(self): 4434 self._test_decode(' ' * 5, '') 4435 4436 def test_decode_one_line_trailing_spaces(self): 4437 self._test_decode('hello \r\n', 'hello\n') 4438 4439 def test_decode_two_lines_trailing_spaces(self): 4440 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4441 4442 def test_decode_quoted_word(self): 4443 self._test_decode('=22quoted=20words=22', '"quoted words"') 4444 4445 def test_decode_uppercase_quoting(self): 4446 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4447 4448 def test_decode_lowercase_quoting(self): 4449 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4450 4451 def test_decode_soft_line_break(self): 4452 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4453 4454 def test_decode_false_quoting(self): 4455 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4456 4457 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4458 kwargs = {} 4459 if maxlinelen is None: 4460 # Use body_encode's default. 4461 maxlinelen = 76 4462 else: 4463 kwargs['maxlinelen'] = maxlinelen 4464 if eol is None: 4465 # Use body_encode's default. 4466 eol = '\n' 4467 else: 4468 kwargs['eol'] = eol 4469 encoded_body = quoprimime.body_encode(body, **kwargs) 4470 self.assertEqual(encoded_body, expected_encoded_body) 4471 if eol == '\n' or eol == '\r\n': 4472 # We know how to split the result back into lines, so maxlinelen 4473 # can be checked. 4474 for line in encoded_body.splitlines(): 4475 self.assertLessEqual(len(line), maxlinelen) 4476 4477 def test_encode_null(self): 4478 self._test_encode('', '') 4479 4480 def test_encode_null_lines(self): 4481 self._test_encode('\n\n', '\n\n') 4482 4483 def test_encode_one_line(self): 4484 self._test_encode('hello\n', 'hello\n') 4485 4486 def test_encode_one_line_crlf(self): 4487 self._test_encode('hello\r\n', 'hello\n') 4488 4489 def test_encode_one_line_eol(self): 4490 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4491 4492 def test_encode_one_line_eol_after_non_ascii(self): 4493 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4494 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4495 'hello=CF=85\r\n', eol='\r\n') 4496 4497 def test_encode_one_space(self): 4498 self._test_encode(' ', '=20') 4499 4500 def test_encode_one_line_one_space(self): 4501 self._test_encode(' \n', '=20\n') 4502 4503# XXX: body_encode() expect strings, but uses ord(char) from these strings 4504# to index into a 256-entry list. For code points above 255, this will fail. 4505# Should there be a check for 8-bit only ord() values in body, or at least 4506# a comment about the expected input? 4507 4508 def test_encode_two_lines_one_space(self): 4509 self._test_encode(' \n \n', '=20\n=20\n') 4510 4511 def test_encode_one_word_trailing_spaces(self): 4512 self._test_encode('hello ', 'hello =20') 4513 4514 def test_encode_one_line_trailing_spaces(self): 4515 self._test_encode('hello \n', 'hello =20\n') 4516 4517 def test_encode_one_word_trailing_tab(self): 4518 self._test_encode('hello \t', 'hello =09') 4519 4520 def test_encode_one_line_trailing_tab(self): 4521 self._test_encode('hello \t\n', 'hello =09\n') 4522 4523 def test_encode_trailing_space_before_maxlinelen(self): 4524 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4525 4526 def test_encode_trailing_space_at_maxlinelen(self): 4527 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4528 4529 def test_encode_trailing_space_beyond_maxlinelen(self): 4530 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4531 4532 def test_encode_whitespace_lines(self): 4533 self._test_encode(' \n' * 5, '=20\n' * 5) 4534 4535 def test_encode_quoted_equals(self): 4536 self._test_encode('a = b', 'a =3D b') 4537 4538 def test_encode_one_long_string(self): 4539 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4540 4541 def test_encode_one_long_line(self): 4542 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4543 4544 def test_encode_one_very_long_line(self): 4545 self._test_encode('x' * 200 + '\n', 4546 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4547 4548 def test_encode_shortest_maxlinelen(self): 4549 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4550 4551 def test_encode_maxlinelen_too_small(self): 4552 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4553 4554 def test_encode(self): 4555 eq = self.assertEqual 4556 eq(quoprimime.body_encode(''), '') 4557 eq(quoprimime.body_encode('hello'), 'hello') 4558 # Test the binary flag 4559 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4560 # Test the maxlinelen arg 4561 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4562xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4563 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4564x xxxx xxxx xxxx xxxx=20""") 4565 # Test the eol argument 4566 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4567 """\ 4568xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4569 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4570x xxxx xxxx xxxx xxxx=20""") 4571 eq(quoprimime.body_encode("""\ 4572one line 4573 4574two line"""), """\ 4575one line 4576 4577two line""") 4578 4579 4580 4581# Test the Charset class 4582class TestCharset(unittest.TestCase): 4583 def tearDown(self): 4584 from email import charset as CharsetModule 4585 try: 4586 del CharsetModule.CHARSETS['fake'] 4587 except KeyError: 4588 pass 4589 4590 def test_codec_encodeable(self): 4591 eq = self.assertEqual 4592 # Make sure us-ascii = no Unicode conversion 4593 c = Charset('us-ascii') 4594 eq(c.header_encode('Hello World!'), 'Hello World!') 4595 # Test 8-bit idempotency with us-ascii 4596 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4597 self.assertRaises(UnicodeError, c.header_encode, s) 4598 c = Charset('utf-8') 4599 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4600 4601 def test_body_encode(self): 4602 eq = self.assertEqual 4603 # Try a charset with QP body encoding 4604 c = Charset('iso-8859-1') 4605 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4606 # Try a charset with Base64 body encoding 4607 c = Charset('utf-8') 4608 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4609 # Try a charset with None body encoding 4610 c = Charset('us-ascii') 4611 eq('hello world', c.body_encode('hello world')) 4612 # Try the convert argument, where input codec != output codec 4613 c = Charset('euc-jp') 4614 # With apologies to Tokio Kikuchi ;) 4615 # XXX FIXME 4616## try: 4617## eq('\x1b$B5FCO;~IW\x1b(B', 4618## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4619## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4620## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4621## except LookupError: 4622## # We probably don't have the Japanese codecs installed 4623## pass 4624 # Testing SF bug #625509, which we have to fake, since there are no 4625 # built-in encodings where the header encoding is QP but the body 4626 # encoding is not. 4627 from email import charset as CharsetModule 4628 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4629 c = Charset('fake') 4630 eq('hello world', c.body_encode('hello world')) 4631 4632 def test_unicode_charset_name(self): 4633 charset = Charset('us-ascii') 4634 self.assertEqual(str(charset), 'us-ascii') 4635 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4636 4637 4638 4639# Test multilingual MIME headers. 4640class TestHeader(TestEmailBase): 4641 def test_simple(self): 4642 eq = self.ndiffAssertEqual 4643 h = Header('Hello World!') 4644 eq(h.encode(), 'Hello World!') 4645 h.append(' Goodbye World!') 4646 eq(h.encode(), 'Hello World! Goodbye World!') 4647 4648 def test_simple_surprise(self): 4649 eq = self.ndiffAssertEqual 4650 h = Header('Hello World!') 4651 eq(h.encode(), 'Hello World!') 4652 h.append('Goodbye World!') 4653 eq(h.encode(), 'Hello World! Goodbye World!') 4654 4655 def test_header_needs_no_decoding(self): 4656 h = 'no decoding needed' 4657 self.assertEqual(decode_header(h), [(h, None)]) 4658 4659 def test_long(self): 4660 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4661 maxlinelen=76) 4662 for l in h.encode(splitchars=' ').split('\n '): 4663 self.assertLessEqual(len(l), 76) 4664 4665 def test_multilingual(self): 4666 eq = self.ndiffAssertEqual 4667 g = Charset("iso-8859-1") 4668 cz = Charset("iso-8859-2") 4669 utf8 = Charset("utf-8") 4670 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4671 b'Foerderband komfortabel den Korridor entlang, ' 4672 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4673 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4674 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4675 b'd\xf9vtipu.. ') 4676 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4677 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4678 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4679 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4680 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4681 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4682 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4683 '\u3044\u307e\u3059\u3002') 4684 h = Header(g_head, g) 4685 h.append(cz_head, cz) 4686 h.append(utf8_head, utf8) 4687 enc = h.encode(maxlinelen=76) 4688 eq(enc, """\ 4689=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4690 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4691 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4692 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4693 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4694 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4695 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4696 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4697 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4698 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4699 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4700 decoded = decode_header(enc) 4701 eq(len(decoded), 3) 4702 eq(decoded[0], (g_head, 'iso-8859-1')) 4703 eq(decoded[1], (cz_head, 'iso-8859-2')) 4704 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4705 ustr = str(h) 4706 eq(ustr, 4707 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4708 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4709 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4710 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4711 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4712 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4713 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4714 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4715 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4716 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4717 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4718 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4719 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4720 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4721 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4722 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4723 ).decode('utf-8')) 4724 # Test make_header() 4725 newh = make_header(decode_header(enc)) 4726 eq(newh, h) 4727 4728 def test_empty_header_encode(self): 4729 h = Header() 4730 self.assertEqual(h.encode(), '') 4731 4732 def test_header_ctor_default_args(self): 4733 eq = self.ndiffAssertEqual 4734 h = Header() 4735 eq(h, '') 4736 h.append('foo', Charset('iso-8859-1')) 4737 eq(h, 'foo') 4738 4739 def test_explicit_maxlinelen(self): 4740 eq = self.ndiffAssertEqual 4741 hstr = ('A very long line that must get split to something other ' 4742 'than at the 76th character boundary to test the non-default ' 4743 'behavior') 4744 h = Header(hstr) 4745 eq(h.encode(), '''\ 4746A very long line that must get split to something other than at the 76th 4747 character boundary to test the non-default behavior''') 4748 eq(str(h), hstr) 4749 h = Header(hstr, header_name='Subject') 4750 eq(h.encode(), '''\ 4751A very long line that must get split to something other than at the 4752 76th character boundary to test the non-default behavior''') 4753 eq(str(h), hstr) 4754 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4755 eq(h.encode(), hstr) 4756 eq(str(h), hstr) 4757 4758 def test_quopri_splittable(self): 4759 eq = self.ndiffAssertEqual 4760 h = Header(charset='iso-8859-1', maxlinelen=20) 4761 x = 'xxxx ' * 20 4762 h.append(x) 4763 s = h.encode() 4764 eq(s, """\ 4765=?iso-8859-1?q?xxx?= 4766 =?iso-8859-1?q?x_?= 4767 =?iso-8859-1?q?xx?= 4768 =?iso-8859-1?q?xx?= 4769 =?iso-8859-1?q?_x?= 4770 =?iso-8859-1?q?xx?= 4771 =?iso-8859-1?q?x_?= 4772 =?iso-8859-1?q?xx?= 4773 =?iso-8859-1?q?xx?= 4774 =?iso-8859-1?q?_x?= 4775 =?iso-8859-1?q?xx?= 4776 =?iso-8859-1?q?x_?= 4777 =?iso-8859-1?q?xx?= 4778 =?iso-8859-1?q?xx?= 4779 =?iso-8859-1?q?_x?= 4780 =?iso-8859-1?q?xx?= 4781 =?iso-8859-1?q?x_?= 4782 =?iso-8859-1?q?xx?= 4783 =?iso-8859-1?q?xx?= 4784 =?iso-8859-1?q?_x?= 4785 =?iso-8859-1?q?xx?= 4786 =?iso-8859-1?q?x_?= 4787 =?iso-8859-1?q?xx?= 4788 =?iso-8859-1?q?xx?= 4789 =?iso-8859-1?q?_x?= 4790 =?iso-8859-1?q?xx?= 4791 =?iso-8859-1?q?x_?= 4792 =?iso-8859-1?q?xx?= 4793 =?iso-8859-1?q?xx?= 4794 =?iso-8859-1?q?_x?= 4795 =?iso-8859-1?q?xx?= 4796 =?iso-8859-1?q?x_?= 4797 =?iso-8859-1?q?xx?= 4798 =?iso-8859-1?q?xx?= 4799 =?iso-8859-1?q?_x?= 4800 =?iso-8859-1?q?xx?= 4801 =?iso-8859-1?q?x_?= 4802 =?iso-8859-1?q?xx?= 4803 =?iso-8859-1?q?xx?= 4804 =?iso-8859-1?q?_x?= 4805 =?iso-8859-1?q?xx?= 4806 =?iso-8859-1?q?x_?= 4807 =?iso-8859-1?q?xx?= 4808 =?iso-8859-1?q?xx?= 4809 =?iso-8859-1?q?_x?= 4810 =?iso-8859-1?q?xx?= 4811 =?iso-8859-1?q?x_?= 4812 =?iso-8859-1?q?xx?= 4813 =?iso-8859-1?q?xx?= 4814 =?iso-8859-1?q?_?=""") 4815 eq(x, str(make_header(decode_header(s)))) 4816 h = Header(charset='iso-8859-1', maxlinelen=40) 4817 h.append('xxxx ' * 20) 4818 s = h.encode() 4819 eq(s, """\ 4820=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4821 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4822 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4823 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4824 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4825 eq(x, str(make_header(decode_header(s)))) 4826 4827 def test_base64_splittable(self): 4828 eq = self.ndiffAssertEqual 4829 h = Header(charset='koi8-r', maxlinelen=20) 4830 x = 'xxxx ' * 20 4831 h.append(x) 4832 s = h.encode() 4833 eq(s, """\ 4834=?koi8-r?b?eHh4?= 4835 =?koi8-r?b?eCB4?= 4836 =?koi8-r?b?eHh4?= 4837 =?koi8-r?b?IHh4?= 4838 =?koi8-r?b?eHgg?= 4839 =?koi8-r?b?eHh4?= 4840 =?koi8-r?b?eCB4?= 4841 =?koi8-r?b?eHh4?= 4842 =?koi8-r?b?IHh4?= 4843 =?koi8-r?b?eHgg?= 4844 =?koi8-r?b?eHh4?= 4845 =?koi8-r?b?eCB4?= 4846 =?koi8-r?b?eHh4?= 4847 =?koi8-r?b?IHh4?= 4848 =?koi8-r?b?eHgg?= 4849 =?koi8-r?b?eHh4?= 4850 =?koi8-r?b?eCB4?= 4851 =?koi8-r?b?eHh4?= 4852 =?koi8-r?b?IHh4?= 4853 =?koi8-r?b?eHgg?= 4854 =?koi8-r?b?eHh4?= 4855 =?koi8-r?b?eCB4?= 4856 =?koi8-r?b?eHh4?= 4857 =?koi8-r?b?IHh4?= 4858 =?koi8-r?b?eHgg?= 4859 =?koi8-r?b?eHh4?= 4860 =?koi8-r?b?eCB4?= 4861 =?koi8-r?b?eHh4?= 4862 =?koi8-r?b?IHh4?= 4863 =?koi8-r?b?eHgg?= 4864 =?koi8-r?b?eHh4?= 4865 =?koi8-r?b?eCB4?= 4866 =?koi8-r?b?eHh4?= 4867 =?koi8-r?b?IA==?=""") 4868 eq(x, str(make_header(decode_header(s)))) 4869 h = Header(charset='koi8-r', maxlinelen=40) 4870 h.append(x) 4871 s = h.encode() 4872 eq(s, """\ 4873=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4874 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4875 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4876 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4877 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4878 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4879 eq(x, str(make_header(decode_header(s)))) 4880 4881 def test_us_ascii_header(self): 4882 eq = self.assertEqual 4883 s = 'hello' 4884 x = decode_header(s) 4885 eq(x, [('hello', None)]) 4886 h = make_header(x) 4887 eq(s, h.encode()) 4888 4889 def test_string_charset(self): 4890 eq = self.assertEqual 4891 h = Header() 4892 h.append('hello', 'iso-8859-1') 4893 eq(h, 'hello') 4894 4895## def test_unicode_error(self): 4896## raises = self.assertRaises 4897## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 4898## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 4899## h = Header() 4900## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 4901## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 4902## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 4903 4904 def test_utf8_shortest(self): 4905 eq = self.assertEqual 4906 h = Header('p\xf6stal', 'utf-8') 4907 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 4908 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 4909 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 4910 4911 def test_bad_8bit_header(self): 4912 raises = self.assertRaises 4913 eq = self.assertEqual 4914 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4915 raises(UnicodeError, Header, x) 4916 h = Header() 4917 raises(UnicodeError, h.append, x) 4918 e = x.decode('utf-8', 'replace') 4919 eq(str(Header(x, errors='replace')), e) 4920 h.append(x, errors='replace') 4921 eq(str(h), e) 4922 4923 def test_escaped_8bit_header(self): 4924 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4925 e = x.decode('ascii', 'surrogateescape') 4926 h = Header(e, charset=email.charset.UNKNOWN8BIT) 4927 self.assertEqual(str(h), 4928 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4929 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4930 4931 def test_header_handles_binary_unknown8bit(self): 4932 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4933 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4934 self.assertEqual(str(h), 4935 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4936 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4937 4938 def test_make_header_handles_binary_unknown8bit(self): 4939 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4940 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4941 h2 = email.header.make_header(email.header.decode_header(h)) 4942 self.assertEqual(str(h2), 4943 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4944 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 4945 4946 def test_modify_returned_list_does_not_change_header(self): 4947 h = Header('test') 4948 chunks = email.header.decode_header(h) 4949 chunks.append(('ascii', 'test2')) 4950 self.assertEqual(str(h), 'test') 4951 4952 def test_encoded_adjacent_nonencoded(self): 4953 eq = self.assertEqual 4954 h = Header() 4955 h.append('hello', 'iso-8859-1') 4956 h.append('world') 4957 s = h.encode() 4958 eq(s, '=?iso-8859-1?q?hello?= world') 4959 h = make_header(decode_header(s)) 4960 eq(h.encode(), s) 4961 4962 def test_whitespace_keeper(self): 4963 eq = self.assertEqual 4964 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 4965 parts = decode_header(s) 4966 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 4967 hdr = make_header(parts) 4968 eq(hdr.encode(), 4969 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 4970 4971 def test_broken_base64_header(self): 4972 raises = self.assertRaises 4973 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 4974 raises(errors.HeaderParseError, decode_header, s) 4975 4976 def test_shift_jis_charset(self): 4977 h = Header('文', charset='shift_jis') 4978 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 4979 4980 def test_flatten_header_with_no_value(self): 4981 # Issue 11401 (regression from email 4.x) Note that the space after 4982 # the header doesn't reflect the input, but this is also the way 4983 # email 4.x behaved. At some point it would be nice to fix that. 4984 msg = email.message_from_string("EmptyHeader:") 4985 self.assertEqual(str(msg), "EmptyHeader: \n\n") 4986 4987 def test_encode_preserves_leading_ws_on_value(self): 4988 msg = Message() 4989 msg['SomeHeader'] = ' value with leading ws' 4990 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 4991 4992 def test_whitespace_header(self): 4993 self.assertEqual(Header(' ').encode(), ' ') 4994 4995 4996 4997# Test RFC 2231 header parameters (en/de)coding 4998class TestRFC2231(TestEmailBase): 4999 5000 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5001 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5002 def test_get_param(self): 5003 eq = self.assertEqual 5004 msg = self._msgobj('msg_29.txt') 5005 eq(msg.get_param('title'), 5006 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5007 eq(msg.get_param('title', unquote=False), 5008 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5009 5010 def test_set_param(self): 5011 eq = self.ndiffAssertEqual 5012 msg = Message() 5013 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5014 charset='us-ascii') 5015 eq(msg.get_param('title'), 5016 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5017 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5018 charset='us-ascii', language='en') 5019 eq(msg.get_param('title'), 5020 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5021 msg = self._msgobj('msg_01.txt') 5022 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5023 charset='us-ascii', language='en') 5024 eq(msg.as_string(maxheaderlen=78), """\ 5025Return-Path: <bbb@zzz.org> 5026Delivered-To: bbb@zzz.org 5027Received: by mail.zzz.org (Postfix, from userid 889) 5028\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5029MIME-Version: 1.0 5030Content-Transfer-Encoding: 7bit 5031Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5032From: bbb@ddd.com (John X. Doe) 5033To: bbb@zzz.org 5034Subject: This is a test message 5035Date: Fri, 4 May 2001 14:05:44 -0400 5036Content-Type: text/plain; charset=us-ascii; 5037 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5038 5039 5040Hi, 5041 5042Do you like this message? 5043 5044-Me 5045""") 5046 5047 def test_set_param_requote(self): 5048 msg = Message() 5049 msg.set_param('title', 'foo') 5050 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5051 msg.set_param('title', 'bar', requote=False) 5052 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5053 # tspecial is still quoted. 5054 msg.set_param('title', "(bar)bell", requote=False) 5055 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5056 5057 def test_del_param(self): 5058 eq = self.ndiffAssertEqual 5059 msg = self._msgobj('msg_01.txt') 5060 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5061 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5062 charset='us-ascii', language='en') 5063 msg.del_param('foo', header='Content-Type') 5064 eq(msg.as_string(maxheaderlen=78), """\ 5065Return-Path: <bbb@zzz.org> 5066Delivered-To: bbb@zzz.org 5067Received: by mail.zzz.org (Postfix, from userid 889) 5068\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5069MIME-Version: 1.0 5070Content-Transfer-Encoding: 7bit 5071Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5072From: bbb@ddd.com (John X. Doe) 5073To: bbb@zzz.org 5074Subject: This is a test message 5075Date: Fri, 4 May 2001 14:05:44 -0400 5076Content-Type: text/plain; charset="us-ascii"; 5077 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5078 5079 5080Hi, 5081 5082Do you like this message? 5083 5084-Me 5085""") 5086 5087 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5088 # I changed the charset name, though, because the one in the file isn't 5089 # a legal charset name. Should add a test for an illegal charset. 5090 def test_rfc2231_get_content_charset(self): 5091 eq = self.assertEqual 5092 msg = self._msgobj('msg_32.txt') 5093 eq(msg.get_content_charset(), 'us-ascii') 5094 5095 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5096 def test_rfc2231_parse_rfc_quoting(self): 5097 m = textwrap.dedent('''\ 5098 Content-Disposition: inline; 5099 \tfilename*0*=''This%20is%20even%20more%20; 5100 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5101 \tfilename*2="is it not.pdf" 5102 5103 ''') 5104 msg = email.message_from_string(m) 5105 self.assertEqual(msg.get_filename(), 5106 'This is even more ***fun*** is it not.pdf') 5107 self.assertEqual(m, msg.as_string()) 5108 5109 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5110 def test_rfc2231_parse_extra_quoting(self): 5111 m = textwrap.dedent('''\ 5112 Content-Disposition: inline; 5113 \tfilename*0*="''This%20is%20even%20more%20"; 5114 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5115 \tfilename*2="is it not.pdf" 5116 5117 ''') 5118 msg = email.message_from_string(m) 5119 self.assertEqual(msg.get_filename(), 5120 'This is even more ***fun*** is it not.pdf') 5121 self.assertEqual(m, msg.as_string()) 5122 5123 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5124 # but new test uses *0* because otherwise lang/charset is not valid. 5125 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5126 def test_rfc2231_no_language_or_charset(self): 5127 m = '''\ 5128Content-Transfer-Encoding: 8bit 5129Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5130Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5131 5132''' 5133 msg = email.message_from_string(m) 5134 param = msg.get_param('NAME') 5135 self.assertNotIsInstance(param, tuple) 5136 self.assertEqual( 5137 param, 5138 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5139 5140 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5141 def test_rfc2231_no_language_or_charset_in_filename(self): 5142 m = '''\ 5143Content-Disposition: inline; 5144\tfilename*0*="''This%20is%20even%20more%20"; 5145\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5146\tfilename*2="is it not.pdf" 5147 5148''' 5149 msg = email.message_from_string(m) 5150 self.assertEqual(msg.get_filename(), 5151 'This is even more ***fun*** is it not.pdf') 5152 5153 # Duplicate of previous test? 5154 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5155 m = '''\ 5156Content-Disposition: inline; 5157\tfilename*0*="''This%20is%20even%20more%20"; 5158\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5159\tfilename*2="is it not.pdf" 5160 5161''' 5162 msg = email.message_from_string(m) 5163 self.assertEqual(msg.get_filename(), 5164 'This is even more ***fun*** is it not.pdf') 5165 5166 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5167 # but the test below is wrong (the first part should be decoded). 5168 def test_rfc2231_partly_encoded(self): 5169 m = '''\ 5170Content-Disposition: inline; 5171\tfilename*0="''This%20is%20even%20more%20"; 5172\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5173\tfilename*2="is it not.pdf" 5174 5175''' 5176 msg = email.message_from_string(m) 5177 self.assertEqual( 5178 msg.get_filename(), 5179 'This%20is%20even%20more%20***fun*** is it not.pdf') 5180 5181 def test_rfc2231_partly_nonencoded(self): 5182 m = '''\ 5183Content-Disposition: inline; 5184\tfilename*0="This%20is%20even%20more%20"; 5185\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5186\tfilename*2="is it not.pdf" 5187 5188''' 5189 msg = email.message_from_string(m) 5190 self.assertEqual( 5191 msg.get_filename(), 5192 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5193 5194 def test_rfc2231_no_language_or_charset_in_boundary(self): 5195 m = '''\ 5196Content-Type: multipart/alternative; 5197\tboundary*0*="''This%20is%20even%20more%20"; 5198\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5199\tboundary*2="is it not.pdf" 5200 5201''' 5202 msg = email.message_from_string(m) 5203 self.assertEqual(msg.get_boundary(), 5204 'This is even more ***fun*** is it not.pdf') 5205 5206 def test_rfc2231_no_language_or_charset_in_charset(self): 5207 # This is a nonsensical charset value, but tests the code anyway 5208 m = '''\ 5209Content-Type: text/plain; 5210\tcharset*0*="This%20is%20even%20more%20"; 5211\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5212\tcharset*2="is it not.pdf" 5213 5214''' 5215 msg = email.message_from_string(m) 5216 self.assertEqual(msg.get_content_charset(), 5217 'this is even more ***fun*** is it not.pdf') 5218 5219 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5220 def test_rfc2231_bad_encoding_in_filename(self): 5221 m = '''\ 5222Content-Disposition: inline; 5223\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5224\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5225\tfilename*2="is it not.pdf" 5226 5227''' 5228 msg = email.message_from_string(m) 5229 self.assertEqual(msg.get_filename(), 5230 'This is even more ***fun*** is it not.pdf') 5231 5232 def test_rfc2231_bad_encoding_in_charset(self): 5233 m = """\ 5234Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5235 5236""" 5237 msg = email.message_from_string(m) 5238 # This should return None because non-ascii characters in the charset 5239 # are not allowed. 5240 self.assertEqual(msg.get_content_charset(), None) 5241 5242 def test_rfc2231_bad_character_in_charset(self): 5243 m = """\ 5244Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5245 5246""" 5247 msg = email.message_from_string(m) 5248 # This should return None because non-ascii characters in the charset 5249 # are not allowed. 5250 self.assertEqual(msg.get_content_charset(), None) 5251 5252 def test_rfc2231_bad_character_in_filename(self): 5253 m = '''\ 5254Content-Disposition: inline; 5255\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5256\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5257\tfilename*2*="is it not.pdf%E2" 5258 5259''' 5260 msg = email.message_from_string(m) 5261 self.assertEqual(msg.get_filename(), 5262 'This is even more ***fun*** is it not.pdf\ufffd') 5263 5264 def test_rfc2231_unknown_encoding(self): 5265 m = """\ 5266Content-Transfer-Encoding: 8bit 5267Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5268 5269""" 5270 msg = email.message_from_string(m) 5271 self.assertEqual(msg.get_filename(), 'myfile.txt') 5272 5273 def test_rfc2231_single_tick_in_filename_extended(self): 5274 eq = self.assertEqual 5275 m = """\ 5276Content-Type: application/x-foo; 5277\tname*0*=\"Frank's\"; name*1*=\" Document\" 5278 5279""" 5280 msg = email.message_from_string(m) 5281 charset, language, s = msg.get_param('name') 5282 eq(charset, None) 5283 eq(language, None) 5284 eq(s, "Frank's Document") 5285 5286 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5287 def test_rfc2231_single_tick_in_filename(self): 5288 m = """\ 5289Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5290 5291""" 5292 msg = email.message_from_string(m) 5293 param = msg.get_param('name') 5294 self.assertNotIsInstance(param, tuple) 5295 self.assertEqual(param, "Frank's Document") 5296 5297 def test_rfc2231_missing_tick(self): 5298 m = '''\ 5299Content-Disposition: inline; 5300\tfilename*0*="'This%20is%20broken"; 5301''' 5302 msg = email.message_from_string(m) 5303 self.assertEqual( 5304 msg.get_filename(), 5305 "'This is broken") 5306 5307 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5308 m = '''\ 5309Content-Disposition: inline; 5310\tfilename*0*="'This%20is%E2broken"; 5311''' 5312 msg = email.message_from_string(m) 5313 self.assertEqual( 5314 msg.get_filename(), 5315 "'This is\ufffdbroken") 5316 5317 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5318 def test_rfc2231_tick_attack_extended(self): 5319 eq = self.assertEqual 5320 m = """\ 5321Content-Type: application/x-foo; 5322\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5323 5324""" 5325 msg = email.message_from_string(m) 5326 charset, language, s = msg.get_param('name') 5327 eq(charset, 'us-ascii') 5328 eq(language, 'en-us') 5329 eq(s, "Frank's Document") 5330 5331 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5332 def test_rfc2231_tick_attack(self): 5333 m = """\ 5334Content-Type: application/x-foo; 5335\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5336 5337""" 5338 msg = email.message_from_string(m) 5339 param = msg.get_param('name') 5340 self.assertNotIsInstance(param, tuple) 5341 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5342 5343 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5344 def test_rfc2231_no_extended_values(self): 5345 eq = self.assertEqual 5346 m = """\ 5347Content-Type: application/x-foo; name=\"Frank's Document\" 5348 5349""" 5350 msg = email.message_from_string(m) 5351 eq(msg.get_param('name'), "Frank's Document") 5352 5353 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5354 def test_rfc2231_encoded_then_unencoded_segments(self): 5355 eq = self.assertEqual 5356 m = """\ 5357Content-Type: application/x-foo; 5358\tname*0*=\"us-ascii'en-us'My\"; 5359\tname*1=\" Document\"; 5360\tname*2*=\" For You\" 5361 5362""" 5363 msg = email.message_from_string(m) 5364 charset, language, s = msg.get_param('name') 5365 eq(charset, 'us-ascii') 5366 eq(language, 'en-us') 5367 eq(s, 'My Document For You') 5368 5369 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5370 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5371 def test_rfc2231_unencoded_then_encoded_segments(self): 5372 eq = self.assertEqual 5373 m = """\ 5374Content-Type: application/x-foo; 5375\tname*0=\"us-ascii'en-us'My\"; 5376\tname*1*=\" Document\"; 5377\tname*2*=\" For You\" 5378 5379""" 5380 msg = email.message_from_string(m) 5381 charset, language, s = msg.get_param('name') 5382 eq(charset, 'us-ascii') 5383 eq(language, 'en-us') 5384 eq(s, 'My Document For You') 5385 5386 def test_should_not_hang_on_invalid_ew_messages(self): 5387 messages = ["""From: user@host.com 5388To: user@host.com 5389Bad-Header: 5390 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5391 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5392 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5393 5394Hello! 5395""", """From: ����� �������� <xxx@xxx> 5396To: "xxx" <xxx@xxx> 5397Subject: ��� ���������� ����� ����� � ��������� �� ���� 5398MIME-Version: 1.0 5399Content-Type: text/plain; charset="windows-1251"; 5400Content-Transfer-Encoding: 8bit 5401 5402�� ����� � ���� ������ ��� �������� 5403"""] 5404 for m in messages: 5405 with self.subTest(m=m): 5406 msg = email.message_from_string(m) 5407 5408 5409# Tests to ensure that signed parts of an email are completely preserved, as 5410# required by RFC1847 section 2.1. Note that these are incomplete, because the 5411# email package does not currently always preserve the body. See issue 1670765. 5412class TestSigned(TestEmailBase): 5413 5414 def _msg_and_obj(self, filename): 5415 with openfile(filename) as fp: 5416 original = fp.read() 5417 msg = email.message_from_string(original) 5418 return original, msg 5419 5420 def _signed_parts_eq(self, original, result): 5421 # Extract the first mime part of each message 5422 import re 5423 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5424 inpart = repart.search(original).group(2) 5425 outpart = repart.search(result).group(2) 5426 self.assertEqual(outpart, inpart) 5427 5428 def test_long_headers_as_string(self): 5429 original, msg = self._msg_and_obj('msg_45.txt') 5430 result = msg.as_string() 5431 self._signed_parts_eq(original, result) 5432 5433 def test_long_headers_as_string_maxheaderlen(self): 5434 original, msg = self._msg_and_obj('msg_45.txt') 5435 result = msg.as_string(maxheaderlen=60) 5436 self._signed_parts_eq(original, result) 5437 5438 def test_long_headers_flatten(self): 5439 original, msg = self._msg_and_obj('msg_45.txt') 5440 fp = StringIO() 5441 Generator(fp).flatten(msg) 5442 result = fp.getvalue() 5443 self._signed_parts_eq(original, result) 5444 5445 5446 5447if __name__ == '__main__': 5448 unittest.main() 5449