1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: email-sig@python.org 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10import warnings 11 12from io import StringIO, BytesIO 13from itertools import chain 14from random import choice 15from threading import Thread 16from unittest.mock import patch 17 18import email 19import email.policy 20import email.utils 21 22from email.charset import Charset 23from email.generator import Generator, DecodedGenerator, BytesGenerator 24from email.header import Header, decode_header, make_header 25from email.headerregistry import HeaderRegistry 26from email.message import Message 27from email.mime.application import MIMEApplication 28from email.mime.audio import MIMEAudio 29from email.mime.base import MIMEBase 30from email.mime.image import MIMEImage 31from email.mime.message import MIMEMessage 32from email.mime.multipart import MIMEMultipart 33from email.mime.nonmultipart import MIMENonMultipart 34from email.mime.text import MIMEText 35from email.parser import Parser, HeaderParser 36from email import base64mime 37from email import encoders 38from email import errors 39from email import iterators 40from email import quoprimime 41from email import utils 42 43from test.support import threading_helper 44from test.support.os_helper import unlink 45from test.test_email import openfile, TestEmailBase 46 47# These imports are documented to work, but we are testing them using a 48# different path, so we import them here just to make sure they are importable. 49from email.parser import FeedParser, BytesFeedParser 50 51NL = '\n' 52EMPTYSTRING = '' 53SPACE = ' ' 54 55 56# Test various aspects of the Message class's API 57class TestMessageAPI(TestEmailBase): 58 def test_get_all(self): 59 eq = self.assertEqual 60 msg = self._msgobj('msg_20.txt') 61 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) 62 eq(msg.get_all('xx', 'n/a'), 'n/a') 63 64 def test_getset_charset(self): 65 eq = self.assertEqual 66 msg = Message() 67 eq(msg.get_charset(), None) 68 charset = Charset('iso-8859-1') 69 msg.set_charset(charset) 70 eq(msg['mime-version'], '1.0') 71 eq(msg.get_content_type(), 'text/plain') 72 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 73 eq(msg.get_param('charset'), 'iso-8859-1') 74 eq(msg['content-transfer-encoding'], 'quoted-printable') 75 eq(msg.get_charset().input_charset, 'iso-8859-1') 76 # Remove the charset 77 msg.set_charset(None) 78 eq(msg.get_charset(), None) 79 eq(msg['content-type'], 'text/plain') 80 # Try adding a charset when there's already MIME headers present 81 msg = Message() 82 msg['MIME-Version'] = '2.0' 83 msg['Content-Type'] = 'text/x-weird' 84 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 85 msg.set_charset(charset) 86 eq(msg['mime-version'], '2.0') 87 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 88 eq(msg['content-transfer-encoding'], 'quinted-puntable') 89 90 def test_set_charset_from_string(self): 91 eq = self.assertEqual 92 msg = Message() 93 msg.set_charset('us-ascii') 94 eq(msg.get_charset().input_charset, 'us-ascii') 95 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 96 97 def test_set_payload_with_charset(self): 98 msg = Message() 99 charset = Charset('iso-8859-1') 100 msg.set_payload('This is a string payload', charset) 101 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 102 103 def test_set_payload_with_8bit_data_and_charset(self): 104 data = b'\xd0\x90\xd0\x91\xd0\x92' 105 charset = Charset('utf-8') 106 msg = Message() 107 msg.set_payload(data, charset) 108 self.assertEqual(msg['content-transfer-encoding'], 'base64') 109 self.assertEqual(msg.get_payload(decode=True), data) 110 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 111 112 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 113 data = b'\xd0\x90\xd0\x91\xd0\x92' 114 charset = Charset('utf-8') 115 charset.body_encoding = None # Disable base64 encoding 116 msg = Message() 117 msg.set_payload(data.decode('utf-8'), charset) 118 self.assertEqual(msg['content-transfer-encoding'], '8bit') 119 self.assertEqual(msg.get_payload(decode=True), data) 120 121 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 122 data = b'\xd0\x90\xd0\x91\xd0\x92' 123 charset = Charset('utf-8') 124 charset.body_encoding = None # Disable base64 encoding 125 msg = Message() 126 msg.set_payload(data, charset) 127 self.assertEqual(msg['content-transfer-encoding'], '8bit') 128 self.assertEqual(msg.get_payload(decode=True), data) 129 130 def test_set_payload_to_list(self): 131 msg = Message() 132 msg.set_payload([]) 133 self.assertEqual(msg.get_payload(), []) 134 135 def test_attach_when_payload_is_string(self): 136 msg = Message() 137 msg['Content-Type'] = 'multipart/mixed' 138 msg.set_payload('string payload') 139 sub_msg = MIMEMessage(Message()) 140 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 141 msg.attach, sub_msg) 142 143 def test_get_charsets(self): 144 eq = self.assertEqual 145 146 msg = self._msgobj('msg_08.txt') 147 charsets = msg.get_charsets() 148 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 149 150 msg = self._msgobj('msg_09.txt') 151 charsets = msg.get_charsets('dingbat') 152 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 153 'koi8-r']) 154 155 msg = self._msgobj('msg_12.txt') 156 charsets = msg.get_charsets() 157 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 158 'iso-8859-3', 'us-ascii', 'koi8-r']) 159 160 def test_get_filename(self): 161 eq = self.assertEqual 162 163 msg = self._msgobj('msg_04.txt') 164 filenames = [p.get_filename() for p in msg.get_payload()] 165 eq(filenames, ['msg.txt', 'msg.txt']) 166 167 msg = self._msgobj('msg_07.txt') 168 subpart = msg.get_payload(1) 169 eq(subpart.get_filename(), 'dingusfish.gif') 170 171 def test_get_filename_with_name_parameter(self): 172 eq = self.assertEqual 173 174 msg = self._msgobj('msg_44.txt') 175 filenames = [p.get_filename() for p in msg.get_payload()] 176 eq(filenames, ['msg.txt', 'msg.txt']) 177 178 def test_get_boundary(self): 179 eq = self.assertEqual 180 msg = self._msgobj('msg_07.txt') 181 # No quotes! 182 eq(msg.get_boundary(), 'BOUNDARY') 183 184 def test_set_boundary(self): 185 eq = self.assertEqual 186 # This one has no existing boundary parameter, but the Content-Type: 187 # header appears fifth. 188 msg = self._msgobj('msg_01.txt') 189 msg.set_boundary('BOUNDARY') 190 header, value = msg.items()[4] 191 eq(header.lower(), 'content-type') 192 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 193 # This one has a Content-Type: header, with a boundary, stuck in the 194 # middle of its headers. Make sure the order is preserved; it should 195 # be fifth. 196 msg = self._msgobj('msg_04.txt') 197 msg.set_boundary('BOUNDARY') 198 header, value = msg.items()[4] 199 eq(header.lower(), 'content-type') 200 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 201 # And this one has no Content-Type: header at all. 202 msg = self._msgobj('msg_03.txt') 203 self.assertRaises(errors.HeaderParseError, 204 msg.set_boundary, 'BOUNDARY') 205 206 def test_make_boundary(self): 207 msg = MIMEMultipart('form-data') 208 # Note that when the boundary gets created is an implementation 209 # detail and might change. 210 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 211 # Trigger creation of boundary 212 msg.as_string() 213 self.assertEqual(msg.items()[0][1][:33], 214 'multipart/form-data; boundary="==') 215 # XXX: there ought to be tests of the uniqueness of the boundary, too. 216 217 def test_message_rfc822_only(self): 218 # Issue 7970: message/rfc822 not in multipart parsed by 219 # HeaderParser caused an exception when flattened. 220 with openfile('msg_46.txt', encoding="utf-8") as fp: 221 msgdata = fp.read() 222 parser = HeaderParser() 223 msg = parser.parsestr(msgdata) 224 out = StringIO() 225 gen = Generator(out, True, 0) 226 gen.flatten(msg, False) 227 self.assertEqual(out.getvalue(), msgdata) 228 229 def test_byte_message_rfc822_only(self): 230 # Make sure new bytes header parser also passes this. 231 with openfile('msg_46.txt', encoding="utf-8") as fp: 232 msgdata = fp.read().encode('ascii') 233 parser = email.parser.BytesHeaderParser() 234 msg = parser.parsebytes(msgdata) 235 out = BytesIO() 236 gen = email.generator.BytesGenerator(out) 237 gen.flatten(msg) 238 self.assertEqual(out.getvalue(), msgdata) 239 240 def test_get_decoded_payload(self): 241 eq = self.assertEqual 242 msg = self._msgobj('msg_10.txt') 243 # The outer message is a multipart 244 eq(msg.get_payload(decode=True), None) 245 # Subpart 1 is 7bit encoded 246 eq(msg.get_payload(0).get_payload(decode=True), 247 b'This is a 7bit encoded message.\n') 248 # Subpart 2 is quopri 249 eq(msg.get_payload(1).get_payload(decode=True), 250 b'\xa1This is a Quoted Printable encoded message!\n') 251 # Subpart 3 is base64 252 eq(msg.get_payload(2).get_payload(decode=True), 253 b'This is a Base64 encoded message.') 254 # Subpart 4 is base64 with a trailing newline, which 255 # used to be stripped (issue 7143). 256 eq(msg.get_payload(3).get_payload(decode=True), 257 b'This is a Base64 encoded message.\n') 258 # Subpart 5 has no Content-Transfer-Encoding: header. 259 eq(msg.get_payload(4).get_payload(decode=True), 260 b'This has no Content-Transfer-Encoding: header.\n') 261 262 def test_get_decoded_uu_payload(self): 263 eq = self.assertEqual 264 msg = Message() 265 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 266 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 267 msg['content-transfer-encoding'] = cte 268 eq(msg.get_payload(decode=True), b'hello world') 269 # Now try some bogus data 270 msg.set_payload('foo') 271 eq(msg.get_payload(decode=True), b'foo') 272 273 def test_get_payload_n_raises_on_non_multipart(self): 274 msg = Message() 275 self.assertRaises(TypeError, msg.get_payload, 1) 276 277 def test_decoded_generator(self): 278 eq = self.assertEqual 279 msg = self._msgobj('msg_07.txt') 280 with openfile('msg_17.txt', encoding="utf-8") as fp: 281 text = fp.read() 282 s = StringIO() 283 g = DecodedGenerator(s) 284 g.flatten(msg) 285 eq(s.getvalue(), text) 286 287 def test__contains__(self): 288 msg = Message() 289 msg['From'] = 'Me' 290 msg['to'] = 'You' 291 # Check for case insensitivity 292 self.assertIn('from', msg) 293 self.assertIn('From', msg) 294 self.assertIn('FROM', msg) 295 self.assertIn('to', msg) 296 self.assertIn('To', msg) 297 self.assertIn('TO', msg) 298 299 def test_as_string(self): 300 msg = self._msgobj('msg_01.txt') 301 with openfile('msg_01.txt', encoding="utf-8") as fp: 302 text = fp.read() 303 self.assertEqual(text, str(msg)) 304 fullrepr = msg.as_string(unixfrom=True) 305 lines = fullrepr.split('\n') 306 self.assertTrue(lines[0].startswith('From ')) 307 self.assertEqual(text, NL.join(lines[1:])) 308 309 def test_as_string_policy(self): 310 msg = self._msgobj('msg_01.txt') 311 newpolicy = msg.policy.clone(linesep='\r\n') 312 fullrepr = msg.as_string(policy=newpolicy) 313 s = StringIO() 314 g = Generator(s, policy=newpolicy) 315 g.flatten(msg) 316 self.assertEqual(fullrepr, s.getvalue()) 317 318 def test_nonascii_as_string_without_cte(self): 319 m = textwrap.dedent("""\ 320 MIME-Version: 1.0 321 Content-type: text/plain; charset="iso-8859-1" 322 323 Test if non-ascii messages with no Content-Transfer-Encoding set 324 can be as_string'd: 325 Föö bär 326 """) 327 source = m.encode('iso-8859-1') 328 expected = textwrap.dedent("""\ 329 MIME-Version: 1.0 330 Content-type: text/plain; charset="iso-8859-1" 331 Content-Transfer-Encoding: quoted-printable 332 333 Test if non-ascii messages with no Content-Transfer-Encoding set 334 can be as_string'd: 335 F=F6=F6 b=E4r 336 """) 337 msg = email.message_from_bytes(source) 338 self.assertEqual(msg.as_string(), expected) 339 340 def test_nonascii_as_string_without_content_type_and_cte(self): 341 m = textwrap.dedent("""\ 342 MIME-Version: 1.0 343 344 Test if non-ascii messages with no Content-Type nor 345 Content-Transfer-Encoding set can be as_string'd: 346 Föö bär 347 """) 348 source = m.encode('iso-8859-1') 349 expected = source.decode('ascii', 'replace') 350 msg = email.message_from_bytes(source) 351 self.assertEqual(msg.as_string(), expected) 352 353 def test_as_bytes(self): 354 msg = self._msgobj('msg_01.txt') 355 with openfile('msg_01.txt', encoding="utf-8") as fp: 356 data = fp.read().encode('ascii') 357 self.assertEqual(data, bytes(msg)) 358 fullrepr = msg.as_bytes(unixfrom=True) 359 lines = fullrepr.split(b'\n') 360 self.assertTrue(lines[0].startswith(b'From ')) 361 self.assertEqual(data, b'\n'.join(lines[1:])) 362 363 def test_as_bytes_policy(self): 364 msg = self._msgobj('msg_01.txt') 365 newpolicy = msg.policy.clone(linesep='\r\n') 366 fullrepr = msg.as_bytes(policy=newpolicy) 367 s = BytesIO() 368 g = BytesGenerator(s,policy=newpolicy) 369 g.flatten(msg) 370 self.assertEqual(fullrepr, s.getvalue()) 371 372 # test_headerregistry.TestContentTypeHeader.bad_params 373 def test_bad_param(self): 374 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 375 self.assertEqual(msg.get_param('baz'), '') 376 377 def test_missing_filename(self): 378 msg = email.message_from_string("From: foo\n") 379 self.assertEqual(msg.get_filename(), None) 380 381 def test_bogus_filename(self): 382 msg = email.message_from_string( 383 "Content-Disposition: blarg; filename\n") 384 self.assertEqual(msg.get_filename(), '') 385 386 def test_missing_boundary(self): 387 msg = email.message_from_string("From: foo\n") 388 self.assertEqual(msg.get_boundary(), None) 389 390 def test_get_params(self): 391 eq = self.assertEqual 392 msg = email.message_from_string( 393 'X-Header: foo=one; bar=two; baz=three\n') 394 eq(msg.get_params(header='x-header'), 395 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 396 msg = email.message_from_string( 397 'X-Header: foo; bar=one; baz=two\n') 398 eq(msg.get_params(header='x-header'), 399 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 400 eq(msg.get_params(), None) 401 msg = email.message_from_string( 402 'X-Header: foo; bar="one"; baz=two\n') 403 eq(msg.get_params(header='x-header'), 404 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 405 406 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 407 def test_get_param_liberal(self): 408 msg = Message() 409 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 410 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 411 412 def test_get_param(self): 413 eq = self.assertEqual 414 msg = email.message_from_string( 415 "X-Header: foo=one; bar=two; baz=three\n") 416 eq(msg.get_param('bar', header='x-header'), 'two') 417 eq(msg.get_param('quuz', header='x-header'), None) 418 eq(msg.get_param('quuz'), None) 419 msg = email.message_from_string( 420 'X-Header: foo; bar="one"; baz=two\n') 421 eq(msg.get_param('foo', header='x-header'), '') 422 eq(msg.get_param('bar', header='x-header'), 'one') 423 eq(msg.get_param('baz', header='x-header'), 'two') 424 # XXX: We are not RFC-2045 compliant! We cannot parse: 425 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 426 # msg.get_param("weird") 427 # yet. 428 429 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 430 def test_get_param_funky_continuation_lines(self): 431 msg = self._msgobj('msg_22.txt') 432 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 433 434 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 435 def test_get_param_with_semis_in_quotes(self): 436 msg = email.message_from_string( 437 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 438 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 439 self.assertEqual(msg.get_param('name', unquote=False), 440 '"Jim&&Jill"') 441 442 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 443 def test_get_param_with_quotes(self): 444 msg = email.message_from_string( 445 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 446 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 447 msg = email.message_from_string( 448 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 449 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 450 451 def test_field_containment(self): 452 msg = email.message_from_string('Header: exists') 453 self.assertIn('header', msg) 454 self.assertIn('Header', msg) 455 self.assertIn('HEADER', msg) 456 self.assertNotIn('headerx', msg) 457 458 def test_set_param(self): 459 eq = self.assertEqual 460 msg = Message() 461 msg.set_param('charset', 'iso-2022-jp') 462 eq(msg.get_param('charset'), 'iso-2022-jp') 463 msg.set_param('importance', 'high value') 464 eq(msg.get_param('importance'), 'high value') 465 eq(msg.get_param('importance', unquote=False), '"high value"') 466 eq(msg.get_params(), [('text/plain', ''), 467 ('charset', 'iso-2022-jp'), 468 ('importance', 'high value')]) 469 eq(msg.get_params(unquote=False), [('text/plain', ''), 470 ('charset', '"iso-2022-jp"'), 471 ('importance', '"high value"')]) 472 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 473 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 474 475 def test_del_param(self): 476 eq = self.assertEqual 477 msg = self._msgobj('msg_05.txt') 478 eq(msg.get_params(), 479 [('multipart/report', ''), ('report-type', 'delivery-status'), 480 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 481 old_val = msg.get_param("report-type") 482 msg.del_param("report-type") 483 eq(msg.get_params(), 484 [('multipart/report', ''), 485 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 486 msg.set_param("report-type", old_val) 487 eq(msg.get_params(), 488 [('multipart/report', ''), 489 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 490 ('report-type', old_val)]) 491 492 def test_del_param_on_other_header(self): 493 msg = Message() 494 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 495 msg.del_param('filename', 'content-disposition') 496 self.assertEqual(msg['content-disposition'], 'attachment') 497 498 def test_del_param_on_nonexistent_header(self): 499 msg = Message() 500 # Deleting param on empty msg should not raise exception. 501 msg.del_param('filename', 'content-disposition') 502 503 def test_del_nonexistent_param(self): 504 msg = Message() 505 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 506 existing_header = msg['Content-Type'] 507 msg.del_param('foobar', header='Content-Type') 508 self.assertEqual(msg['Content-Type'], existing_header) 509 510 def test_set_type(self): 511 eq = self.assertEqual 512 msg = Message() 513 self.assertRaises(ValueError, msg.set_type, 'text') 514 msg.set_type('text/plain') 515 eq(msg['content-type'], 'text/plain') 516 msg.set_param('charset', 'us-ascii') 517 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 518 msg.set_type('text/html') 519 eq(msg['content-type'], 'text/html; charset="us-ascii"') 520 521 def test_set_type_on_other_header(self): 522 msg = Message() 523 msg['X-Content-Type'] = 'text/plain' 524 msg.set_type('application/octet-stream', 'X-Content-Type') 525 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 526 527 def test_get_content_type_missing(self): 528 msg = Message() 529 self.assertEqual(msg.get_content_type(), 'text/plain') 530 531 def test_get_content_type_missing_with_default_type(self): 532 msg = Message() 533 msg.set_default_type('message/rfc822') 534 self.assertEqual(msg.get_content_type(), 'message/rfc822') 535 536 def test_get_content_type_from_message_implicit(self): 537 msg = self._msgobj('msg_30.txt') 538 self.assertEqual(msg.get_payload(0).get_content_type(), 539 'message/rfc822') 540 541 def test_get_content_type_from_message_explicit(self): 542 msg = self._msgobj('msg_28.txt') 543 self.assertEqual(msg.get_payload(0).get_content_type(), 544 'message/rfc822') 545 546 def test_get_content_type_from_message_text_plain_implicit(self): 547 msg = self._msgobj('msg_03.txt') 548 self.assertEqual(msg.get_content_type(), 'text/plain') 549 550 def test_get_content_type_from_message_text_plain_explicit(self): 551 msg = self._msgobj('msg_01.txt') 552 self.assertEqual(msg.get_content_type(), 'text/plain') 553 554 def test_get_content_maintype_missing(self): 555 msg = Message() 556 self.assertEqual(msg.get_content_maintype(), 'text') 557 558 def test_get_content_maintype_missing_with_default_type(self): 559 msg = Message() 560 msg.set_default_type('message/rfc822') 561 self.assertEqual(msg.get_content_maintype(), 'message') 562 563 def test_get_content_maintype_from_message_implicit(self): 564 msg = self._msgobj('msg_30.txt') 565 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 566 567 def test_get_content_maintype_from_message_explicit(self): 568 msg = self._msgobj('msg_28.txt') 569 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 570 571 def test_get_content_maintype_from_message_text_plain_implicit(self): 572 msg = self._msgobj('msg_03.txt') 573 self.assertEqual(msg.get_content_maintype(), 'text') 574 575 def test_get_content_maintype_from_message_text_plain_explicit(self): 576 msg = self._msgobj('msg_01.txt') 577 self.assertEqual(msg.get_content_maintype(), 'text') 578 579 def test_get_content_subtype_missing(self): 580 msg = Message() 581 self.assertEqual(msg.get_content_subtype(), 'plain') 582 583 def test_get_content_subtype_missing_with_default_type(self): 584 msg = Message() 585 msg.set_default_type('message/rfc822') 586 self.assertEqual(msg.get_content_subtype(), 'rfc822') 587 588 def test_get_content_subtype_from_message_implicit(self): 589 msg = self._msgobj('msg_30.txt') 590 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 591 592 def test_get_content_subtype_from_message_explicit(self): 593 msg = self._msgobj('msg_28.txt') 594 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 595 596 def test_get_content_subtype_from_message_text_plain_implicit(self): 597 msg = self._msgobj('msg_03.txt') 598 self.assertEqual(msg.get_content_subtype(), 'plain') 599 600 def test_get_content_subtype_from_message_text_plain_explicit(self): 601 msg = self._msgobj('msg_01.txt') 602 self.assertEqual(msg.get_content_subtype(), 'plain') 603 604 def test_get_content_maintype_error(self): 605 msg = Message() 606 msg['Content-Type'] = 'no-slash-in-this-string' 607 self.assertEqual(msg.get_content_maintype(), 'text') 608 609 def test_get_content_subtype_error(self): 610 msg = Message() 611 msg['Content-Type'] = 'no-slash-in-this-string' 612 self.assertEqual(msg.get_content_subtype(), 'plain') 613 614 def test_replace_header(self): 615 eq = self.assertEqual 616 msg = Message() 617 msg.add_header('First', 'One') 618 msg.add_header('Second', 'Two') 619 msg.add_header('Third', 'Three') 620 eq(msg.keys(), ['First', 'Second', 'Third']) 621 eq(msg.values(), ['One', 'Two', 'Three']) 622 msg.replace_header('Second', 'Twenty') 623 eq(msg.keys(), ['First', 'Second', 'Third']) 624 eq(msg.values(), ['One', 'Twenty', 'Three']) 625 msg.add_header('First', 'Eleven') 626 msg.replace_header('First', 'One Hundred') 627 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 628 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 629 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 630 631 def test_get_content_disposition(self): 632 msg = Message() 633 self.assertIsNone(msg.get_content_disposition()) 634 msg.add_header('Content-Disposition', 'attachment', 635 filename='random.avi') 636 self.assertEqual(msg.get_content_disposition(), 'attachment') 637 msg.replace_header('Content-Disposition', 'inline') 638 self.assertEqual(msg.get_content_disposition(), 'inline') 639 msg.replace_header('Content-Disposition', 'InlinE') 640 self.assertEqual(msg.get_content_disposition(), 'inline') 641 642 # test_defect_handling:test_invalid_chars_in_base64_payload 643 def test_broken_base64_payload(self): 644 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 645 msg = Message() 646 msg['content-type'] = 'audio/x-midi' 647 msg['content-transfer-encoding'] = 'base64' 648 msg.set_payload(x) 649 self.assertEqual(msg.get_payload(decode=True), 650 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 651 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 652 self.assertIsInstance(msg.defects[0], 653 errors.InvalidBase64CharactersDefect) 654 655 def test_broken_unicode_payload(self): 656 # This test improves coverage but is not a compliance test. 657 # The behavior in this situation is currently undefined by the API. 658 x = 'this is a br\xf6ken thing to do' 659 msg = Message() 660 msg['content-type'] = 'text/plain' 661 msg['content-transfer-encoding'] = '8bit' 662 msg.set_payload(x) 663 self.assertEqual(msg.get_payload(decode=True), 664 bytes(x, 'raw-unicode-escape')) 665 666 def test_questionable_bytes_payload(self): 667 # This test improves coverage but is not a compliance test, 668 # since it involves poking inside the black box. 669 x = 'this is a quéstionable thing to do'.encode('utf-8') 670 msg = Message() 671 msg['content-type'] = 'text/plain; charset="utf-8"' 672 msg['content-transfer-encoding'] = '8bit' 673 msg._payload = x 674 self.assertEqual(msg.get_payload(decode=True), x) 675 676 # Issue 1078919 677 def test_ascii_add_header(self): 678 msg = Message() 679 msg.add_header('Content-Disposition', 'attachment', 680 filename='bud.gif') 681 self.assertEqual('attachment; filename="bud.gif"', 682 msg['Content-Disposition']) 683 684 def test_noascii_add_header(self): 685 msg = Message() 686 msg.add_header('Content-Disposition', 'attachment', 687 filename="Fußballer.ppt") 688 self.assertEqual( 689 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 690 msg['Content-Disposition']) 691 692 def test_nonascii_add_header_via_triple(self): 693 msg = Message() 694 msg.add_header('Content-Disposition', 'attachment', 695 filename=('iso-8859-1', '', 'Fußballer.ppt')) 696 self.assertEqual( 697 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 698 msg['Content-Disposition']) 699 700 def test_ascii_add_header_with_tspecial(self): 701 msg = Message() 702 msg.add_header('Content-Disposition', 'attachment', 703 filename="windows [filename].ppt") 704 self.assertEqual( 705 'attachment; filename="windows [filename].ppt"', 706 msg['Content-Disposition']) 707 708 def test_nonascii_add_header_with_tspecial(self): 709 msg = Message() 710 msg.add_header('Content-Disposition', 'attachment', 711 filename="Fußballer [filename].ppt") 712 self.assertEqual( 713 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 714 msg['Content-Disposition']) 715 716 def test_binary_quopri_payload(self): 717 for charset in ('latin-1', 'ascii'): 718 msg = Message() 719 msg['content-type'] = 'text/plain; charset=%s' % charset 720 msg['content-transfer-encoding'] = 'quoted-printable' 721 msg.set_payload(b'foo=e6=96=87bar') 722 self.assertEqual( 723 msg.get_payload(decode=True), 724 b'foo\xe6\x96\x87bar', 725 'get_payload returns wrong result with charset %s.' % charset) 726 727 def test_binary_base64_payload(self): 728 for charset in ('latin-1', 'ascii'): 729 msg = Message() 730 msg['content-type'] = 'text/plain; charset=%s' % charset 731 msg['content-transfer-encoding'] = 'base64' 732 msg.set_payload(b'Zm9v5paHYmFy') 733 self.assertEqual( 734 msg.get_payload(decode=True), 735 b'foo\xe6\x96\x87bar', 736 'get_payload returns wrong result with charset %s.' % charset) 737 738 def test_binary_uuencode_payload(self): 739 for charset in ('latin-1', 'ascii'): 740 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 741 msg = Message() 742 msg['content-type'] = 'text/plain; charset=%s' % charset 743 msg['content-transfer-encoding'] = encoding 744 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 745 self.assertEqual( 746 msg.get_payload(decode=True), 747 b'foo\xe6\x96\x87bar', 748 str(('get_payload returns wrong result ', 749 'with charset {0} and encoding {1}.')).\ 750 format(charset, encoding)) 751 752 def test_add_header_with_name_only_param(self): 753 msg = Message() 754 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 755 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 756 757 def test_add_header_with_no_value(self): 758 msg = Message() 759 msg.add_header('X-Status', None) 760 self.assertEqual('', msg['X-Status']) 761 762 # Issue 5871: reject an attempt to embed a header inside a header value 763 # (header injection attack). 764 def test_embedded_header_via_Header_rejected(self): 765 msg = Message() 766 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 767 self.assertRaises(errors.HeaderParseError, msg.as_string) 768 769 def test_embedded_header_via_string_rejected(self): 770 msg = Message() 771 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 772 self.assertRaises(errors.HeaderParseError, msg.as_string) 773 774 def test_unicode_header_defaults_to_utf8_encoding(self): 775 # Issue 14291 776 m = MIMEText('abc\n') 777 m['Subject'] = 'É test' 778 self.assertEqual(str(m),textwrap.dedent("""\ 779 Content-Type: text/plain; charset="us-ascii" 780 MIME-Version: 1.0 781 Content-Transfer-Encoding: 7bit 782 Subject: =?utf-8?q?=C3=89_test?= 783 784 abc 785 """)) 786 787 def test_unicode_body_defaults_to_utf8_encoding(self): 788 # Issue 14291 789 m = MIMEText('É testabc\n') 790 self.assertEqual(str(m),textwrap.dedent("""\ 791 Content-Type: text/plain; charset="utf-8" 792 MIME-Version: 1.0 793 Content-Transfer-Encoding: base64 794 795 w4kgdGVzdGFiYwo= 796 """)) 797 798 799# Test the email.encoders module 800class TestEncoders(unittest.TestCase): 801 802 def test_EncodersEncode_base64(self): 803 with openfile('python.gif', 'rb') as fp: 804 bindata = fp.read() 805 mimed = email.mime.image.MIMEImage(bindata) 806 base64ed = mimed.get_payload() 807 # the transfer-encoded body lines should all be <=76 characters 808 lines = base64ed.split('\n') 809 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 810 811 def test_encode_empty_payload(self): 812 eq = self.assertEqual 813 msg = Message() 814 msg.set_charset('us-ascii') 815 eq(msg['content-transfer-encoding'], '7bit') 816 817 def test_default_cte(self): 818 eq = self.assertEqual 819 # 7bit data and the default us-ascii _charset 820 msg = MIMEText('hello world') 821 eq(msg['content-transfer-encoding'], '7bit') 822 # Similar, but with 8bit data 823 msg = MIMEText('hello \xf8 world') 824 eq(msg['content-transfer-encoding'], 'base64') 825 # And now with a different charset 826 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 827 eq(msg['content-transfer-encoding'], 'quoted-printable') 828 829 def test_encode7or8bit(self): 830 # Make sure a charset whose input character set is 8bit but 831 # whose output character set is 7bit gets a transfer-encoding 832 # of 7bit. 833 eq = self.assertEqual 834 msg = MIMEText('文\n', _charset='euc-jp') 835 eq(msg['content-transfer-encoding'], '7bit') 836 eq(msg.as_string(), textwrap.dedent("""\ 837 MIME-Version: 1.0 838 Content-Type: text/plain; charset="iso-2022-jp" 839 Content-Transfer-Encoding: 7bit 840 841 \x1b$BJ8\x1b(B 842 """)) 843 844 def test_qp_encode_latin1(self): 845 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 846 self.assertEqual(str(msg), textwrap.dedent("""\ 847 MIME-Version: 1.0 848 Content-Type: text/text; charset="iso-8859-1" 849 Content-Transfer-Encoding: quoted-printable 850 851 =E1=F6 852 """)) 853 854 def test_qp_encode_non_latin1(self): 855 # Issue 16948 856 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 857 self.assertEqual(str(msg), textwrap.dedent("""\ 858 MIME-Version: 1.0 859 Content-Type: text/text; charset="iso-8859-2" 860 Content-Transfer-Encoding: quoted-printable 861 862 =BF 863 """)) 864 865 866# Test long header wrapping 867class TestLongHeaders(TestEmailBase): 868 869 maxDiff = None 870 871 def test_split_long_continuation(self): 872 eq = self.ndiffAssertEqual 873 msg = email.message_from_string("""\ 874Subject: bug demonstration 875\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 876\tmore text 877 878test 879""") 880 sfp = StringIO() 881 g = Generator(sfp) 882 g.flatten(msg) 883 eq(sfp.getvalue(), """\ 884Subject: bug demonstration 885\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 886\tmore text 887 888test 889""") 890 891 def test_another_long_almost_unsplittable_header(self): 892 eq = self.ndiffAssertEqual 893 hstr = """\ 894bug demonstration 895\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 896\tmore text""" 897 h = Header(hstr, continuation_ws='\t') 898 eq(h.encode(), """\ 899bug demonstration 900\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 901\tmore text""") 902 h = Header(hstr.replace('\t', ' ')) 903 eq(h.encode(), """\ 904bug demonstration 905 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 906 more text""") 907 908 def test_long_nonstring(self): 909 eq = self.ndiffAssertEqual 910 g = Charset("iso-8859-1") 911 cz = Charset("iso-8859-2") 912 utf8 = Charset("utf-8") 913 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 914 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 915 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 916 b'bef\xf6rdert. ') 917 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 918 b'd\xf9vtipu.. ') 919 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 920 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 921 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 922 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 923 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 924 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 925 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 926 '\u3044\u307e\u3059\u3002') 927 h = Header(g_head, g, header_name='Subject') 928 h.append(cz_head, cz) 929 h.append(utf8_head, utf8) 930 msg = Message() 931 msg['Subject'] = h 932 sfp = StringIO() 933 g = Generator(sfp) 934 g.flatten(msg) 935 eq(sfp.getvalue(), """\ 936Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 937 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 938 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 939 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 940 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 941 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 942 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 943 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 944 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 945 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 946 =?utf-8?b?44CC?= 947 948""") 949 eq(h.encode(maxlinelen=76), """\ 950=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 951 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 952 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 953 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 954 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 955 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 956 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 957 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 958 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 959 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 960 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 961 962 def test_long_header_encode(self): 963 eq = self.ndiffAssertEqual 964 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 965 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 966 header_name='X-Foobar-Spoink-Defrobnit') 967 eq(h.encode(), '''\ 968wasnipoop; giraffes="very-long-necked-animals"; 969 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 970 971 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 972 eq = self.ndiffAssertEqual 973 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 974 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 975 header_name='X-Foobar-Spoink-Defrobnit', 976 continuation_ws='\t') 977 eq(h.encode(), '''\ 978wasnipoop; giraffes="very-long-necked-animals"; 979 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 980 981 def test_long_header_encode_with_tab_continuation(self): 982 eq = self.ndiffAssertEqual 983 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 984 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 985 header_name='X-Foobar-Spoink-Defrobnit', 986 continuation_ws='\t') 987 eq(h.encode(), '''\ 988wasnipoop; giraffes="very-long-necked-animals"; 989\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 990 991 def test_header_encode_with_different_output_charset(self): 992 h = Header('文', 'euc-jp') 993 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 994 995 def test_long_header_encode_with_different_output_charset(self): 996 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 997 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 998 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 999 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 1000 res = """\ 1001=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 1002 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 1003 self.assertEqual(h.encode(), res) 1004 1005 def test_header_splitter(self): 1006 eq = self.ndiffAssertEqual 1007 msg = MIMEText('') 1008 # It'd be great if we could use add_header() here, but that doesn't 1009 # guarantee an order of the parameters. 1010 msg['X-Foobar-Spoink-Defrobnit'] = ( 1011 'wasnipoop; giraffes="very-long-necked-animals"; ' 1012 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 1013 sfp = StringIO() 1014 g = Generator(sfp) 1015 g.flatten(msg) 1016 eq(sfp.getvalue(), '''\ 1017Content-Type: text/plain; charset="us-ascii" 1018MIME-Version: 1.0 1019Content-Transfer-Encoding: 7bit 1020X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 1021 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 1022 1023''') 1024 1025 def test_no_semis_header_splitter(self): 1026 eq = self.ndiffAssertEqual 1027 msg = Message() 1028 msg['From'] = 'test@dom.ain' 1029 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) 1030 msg.set_payload('Test') 1031 sfp = StringIO() 1032 g = Generator(sfp) 1033 g.flatten(msg) 1034 eq(sfp.getvalue(), """\ 1035From: test@dom.ain 1036References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> 1037 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> 1038 1039Test""") 1040 1041 def test_last_split_chunk_does_not_fit(self): 1042 eq = self.ndiffAssertEqual 1043 h = Header('Subject: the first part of this is short, but_the_second' 1044 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1045 '_all_by_itself') 1046 eq(h.encode(), """\ 1047Subject: the first part of this is short, 1048 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1049 1050 def test_splittable_leading_char_followed_by_overlong_unsplittable(self): 1051 eq = self.ndiffAssertEqual 1052 h = Header(', but_the_second' 1053 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1054 '_all_by_itself') 1055 eq(h.encode(), """\ 1056, 1057 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1058 1059 def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self): 1060 eq = self.ndiffAssertEqual 1061 h = Header(', , but_the_second' 1062 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1063 '_all_by_itself') 1064 eq(h.encode(), """\ 1065, , 1066 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1067 1068 def test_trailing_splittable_on_overlong_unsplittable(self): 1069 eq = self.ndiffAssertEqual 1070 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1071 'be_on_a_line_all_by_itself;') 1072 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1073 "be_on_a_line_all_by_itself;") 1074 1075 def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self): 1076 eq = self.ndiffAssertEqual 1077 h = Header('; ' 1078 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1079 'be_on_a_line_all_by_itself; ') 1080 eq(h.encode(), """\ 1081; 1082 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1083 1084 def test_long_header_with_multiple_sequential_split_chars(self): 1085 eq = self.ndiffAssertEqual 1086 h = Header('This is a long line that has two whitespaces in a row. ' 1087 'This used to cause truncation of the header when folded') 1088 eq(h.encode(), """\ 1089This is a long line that has two whitespaces in a row. This used to cause 1090 truncation of the header when folded""") 1091 1092 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1093 eq = self.ndiffAssertEqual 1094 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1095 'they;arenotlegal;fold,points') 1096 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1097 "arenotlegal;fold,points") 1098 1099 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1100 eq = self.ndiffAssertEqual 1101 h = Header('this is a test where we need to have more than one line ' 1102 'before; our final line that is just too big to fit;; ' 1103 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1104 'be_on_a_line_all_by_itself;') 1105 eq(h.encode(), """\ 1106this is a test where we need to have more than one line before; 1107 our final line that is just too big to fit;; 1108 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1109 1110 def test_overlong_last_part_followed_by_split_point(self): 1111 eq = self.ndiffAssertEqual 1112 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1113 'be_on_a_line_all_by_itself ') 1114 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1115 "should_be_on_a_line_all_by_itself ") 1116 1117 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1118 eq = self.ndiffAssertEqual 1119 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1120 'before_our_final_line_; ; ' 1121 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1122 'be_on_a_line_all_by_itself; ') 1123 eq(h.encode(), """\ 1124this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1125 ; 1126 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1127 1128 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1129 eq = self.ndiffAssertEqual 1130 h = Header('this is a test where we need to have more than one line ' 1131 'before our final line; ; ' 1132 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1133 'be_on_a_line_all_by_itself; ') 1134 eq(h.encode(), """\ 1135this is a test where we need to have more than one line before our final line; 1136 ; 1137 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1138 1139 def test_long_header_with_whitespace_runs(self): 1140 eq = self.ndiffAssertEqual 1141 msg = Message() 1142 msg['From'] = 'test@dom.ain' 1143 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) 1144 msg.set_payload('Test') 1145 sfp = StringIO() 1146 g = Generator(sfp) 1147 g.flatten(msg) 1148 eq(sfp.getvalue(), """\ 1149From: test@dom.ain 1150References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1151 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1152 <foo@dom.ain> <foo@dom.ain>\x20\x20 1153 1154Test""") 1155 1156 def test_long_run_with_semi_header_splitter(self): 1157 eq = self.ndiffAssertEqual 1158 msg = Message() 1159 msg['From'] = 'test@dom.ain' 1160 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' 1161 msg.set_payload('Test') 1162 sfp = StringIO() 1163 g = Generator(sfp) 1164 g.flatten(msg) 1165 eq(sfp.getvalue(), """\ 1166From: test@dom.ain 1167References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1168 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1169 <foo@dom.ain>; abc 1170 1171Test""") 1172 1173 def test_splitter_split_on_punctuation_only_if_fws(self): 1174 eq = self.ndiffAssertEqual 1175 msg = Message() 1176 msg['From'] = 'test@dom.ain' 1177 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1178 'they;arenotlegal;fold,points') 1179 msg.set_payload('Test') 1180 sfp = StringIO() 1181 g = Generator(sfp) 1182 g.flatten(msg) 1183 # XXX the space after the header should not be there. 1184 eq(sfp.getvalue(), """\ 1185From: test@dom.ain 1186References:\x20 1187 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1188 1189Test""") 1190 1191 def test_no_split_long_header(self): 1192 eq = self.ndiffAssertEqual 1193 hstr = 'References: ' + 'x' * 80 1194 h = Header(hstr) 1195 # These come on two lines because Headers are really field value 1196 # classes and don't really know about their field names. 1197 eq(h.encode(), """\ 1198References: 1199 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1200 h = Header('x' * 80) 1201 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1202 1203 def test_splitting_multiple_long_lines(self): 1204 eq = self.ndiffAssertEqual 1205 hstr = """\ 1206from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1207\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1208\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1209""" 1210 h = Header(hstr, continuation_ws='\t') 1211 eq(h.encode(), """\ 1212from babylon.socal-raves.org (localhost [127.0.0.1]); 1213 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1214 for <mailman-admin@babylon.socal-raves.org>; 1215 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1216\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1217 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1218 for <mailman-admin@babylon.socal-raves.org>; 1219 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1220\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1221 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1222 for <mailman-admin@babylon.socal-raves.org>; 1223 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1224 1225 def test_splitting_first_line_only_is_long(self): 1226 eq = self.ndiffAssertEqual 1227 hstr = """\ 1228from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1229\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1230\tid 17k4h5-00034i-00 1231\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1232 h = Header(hstr, maxlinelen=78, header_name='Received', 1233 continuation_ws='\t') 1234 eq(h.encode(), """\ 1235from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1236 helo=cthulhu.gerg.ca) 1237\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1238\tid 17k4h5-00034i-00 1239\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1240 1241 def test_long_8bit_header(self): 1242 eq = self.ndiffAssertEqual 1243 msg = Message() 1244 h = Header('Britische Regierung gibt', 'iso-8859-1', 1245 header_name='Subject') 1246 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1247 eq(h.encode(maxlinelen=76), """\ 1248=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1249 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1250 msg['Subject'] = h 1251 eq(msg.as_string(maxheaderlen=76), """\ 1252Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1253 =?iso-8859-1?q?hore-Windkraftprojekte?= 1254 1255""") 1256 eq(msg.as_string(maxheaderlen=0), """\ 1257Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1258 1259""") 1260 1261 def test_long_8bit_header_no_charset(self): 1262 eq = self.ndiffAssertEqual 1263 msg = Message() 1264 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1265 'f\xfcr Offshore-Windkraftprojekte ' 1266 '<a-very-long-address@example.com>') 1267 msg['Reply-To'] = header_string 1268 eq(msg.as_string(maxheaderlen=78), """\ 1269Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1270 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1271 1272""") 1273 msg = Message() 1274 msg['Reply-To'] = Header(header_string, 1275 header_name='Reply-To') 1276 eq(msg.as_string(maxheaderlen=78), """\ 1277Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1278 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1279 1280""") 1281 1282 def test_long_to_header(self): 1283 eq = self.ndiffAssertEqual 1284 to = ('"Someone Test #A" <someone@eecs.umich.edu>,' 1285 '<someone@eecs.umich.edu>, ' 1286 '"Someone Test #B" <someone@umich.edu>, ' 1287 '"Someone Test #C" <someone@eecs.umich.edu>, ' 1288 '"Someone Test #D" <someone@eecs.umich.edu>') 1289 msg = Message() 1290 msg['To'] = to 1291 eq(msg.as_string(maxheaderlen=78), '''\ 1292To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, 1293 "Someone Test #B" <someone@umich.edu>, 1294 "Someone Test #C" <someone@eecs.umich.edu>, 1295 "Someone Test #D" <someone@eecs.umich.edu> 1296 1297''') 1298 1299 def test_long_line_after_append(self): 1300 eq = self.ndiffAssertEqual 1301 s = 'This is an example of string which has almost the limit of header length.' 1302 h = Header(s) 1303 h.append('Add another line.') 1304 eq(h.encode(maxlinelen=76), """\ 1305This is an example of string which has almost the limit of header length. 1306 Add another line.""") 1307 1308 def test_shorter_line_with_append(self): 1309 eq = self.ndiffAssertEqual 1310 s = 'This is a shorter line.' 1311 h = Header(s) 1312 h.append('Add another sentence. (Surprise?)') 1313 eq(h.encode(), 1314 'This is a shorter line. Add another sentence. (Surprise?)') 1315 1316 def test_long_field_name(self): 1317 eq = self.ndiffAssertEqual 1318 fn = 'X-Very-Very-Very-Long-Header-Name' 1319 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1320 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1321 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1322 'bef\xf6rdert. ') 1323 h = Header(gs, 'iso-8859-1', header_name=fn) 1324 # BAW: this seems broken because the first line is too long 1325 eq(h.encode(maxlinelen=76), """\ 1326=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1327 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1328 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1329 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1330 1331 def test_long_received_header(self): 1332 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1333 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1334 'Wed, 05 Mar 2003 18:10:18 -0700') 1335 msg = Message() 1336 msg['Received-1'] = Header(h, continuation_ws='\t') 1337 msg['Received-2'] = h 1338 # This should be splitting on spaces not semicolons. 1339 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1340Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1341 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1342 Wed, 05 Mar 2003 18:10:18 -0700 1343Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1344 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1345 Wed, 05 Mar 2003 18:10:18 -0700 1346 1347""") 1348 1349 def test_string_headerinst_eq(self): 1350 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' 1351 'tu-muenchen.de> (David Bremner\'s message of ' 1352 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1353 msg = Message() 1354 msg['Received-1'] = Header(h, header_name='Received-1', 1355 continuation_ws='\t') 1356 msg['Received-2'] = h 1357 # XXX The space after the ':' should not be there. 1358 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1359Received-1:\x20 1360 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1361 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1362Received-2:\x20 1363 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1364 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1365 1366""") 1367 1368 def test_long_unbreakable_lines_with_continuation(self): 1369 eq = self.ndiffAssertEqual 1370 msg = Message() 1371 t = """\ 1372iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1373 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1374 msg['Face-1'] = t 1375 msg['Face-2'] = Header(t, header_name='Face-2') 1376 msg['Face-3'] = ' ' + t 1377 # XXX This splitting is all wrong. It the first value line should be 1378 # snug against the field name or the space after the header not there. 1379 eq(msg.as_string(maxheaderlen=78), """\ 1380Face-1:\x20 1381 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1382 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1383Face-2:\x20 1384 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1385 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1386Face-3:\x20 1387 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1388 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1389 1390""") 1391 1392 def test_another_long_multiline_header(self): 1393 eq = self.ndiffAssertEqual 1394 m = ('Received: from siimage.com ' 1395 '([172.25.1.3]) by zima.siliconimage.com with ' 1396 'Microsoft SMTPSVC(5.0.2195.4905); ' 1397 'Wed, 16 Oct 2002 07:41:11 -0700') 1398 msg = email.message_from_string(m) 1399 eq(msg.as_string(maxheaderlen=78), '''\ 1400Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1401 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1402 1403''') 1404 1405 def test_long_lines_with_different_header(self): 1406 eq = self.ndiffAssertEqual 1407 h = ('List-Unsubscribe: ' 1408 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1409 ' <mailto:spamassassin-talk-request@lists.sourceforge.net' 1410 '?subject=unsubscribe>') 1411 msg = Message() 1412 msg['List'] = h 1413 msg['List'] = Header(h, header_name='List') 1414 eq(msg.as_string(maxheaderlen=78), """\ 1415List: List-Unsubscribe: 1416 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1417 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1418List: List-Unsubscribe: 1419 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1420 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1421 1422""") 1423 1424 def test_long_rfc2047_header_with_embedded_fws(self): 1425 h = Header(textwrap.dedent("""\ 1426 We're going to pretend this header is in a non-ascii character set 1427 \tto see if line wrapping with encoded words and embedded 1428 folding white space works"""), 1429 charset='utf-8', 1430 header_name='Test') 1431 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1432 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1433 =?utf-8?q?cter_set?= 1434 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1435 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1436 1437 1438 1439# Test mangling of "From " lines in the body of a message 1440class TestFromMangling(unittest.TestCase): 1441 def setUp(self): 1442 self.msg = Message() 1443 self.msg['From'] = 'aaa@bbb.org' 1444 self.msg.set_payload("""\ 1445From the desk of A.A.A.: 1446Blah blah blah 1447""") 1448 1449 def test_mangled_from(self): 1450 s = StringIO() 1451 g = Generator(s, mangle_from_=True) 1452 g.flatten(self.msg) 1453 self.assertEqual(s.getvalue(), """\ 1454From: aaa@bbb.org 1455 1456>From the desk of A.A.A.: 1457Blah blah blah 1458""") 1459 1460 def test_dont_mangle_from(self): 1461 s = StringIO() 1462 g = Generator(s, mangle_from_=False) 1463 g.flatten(self.msg) 1464 self.assertEqual(s.getvalue(), """\ 1465From: aaa@bbb.org 1466 1467From the desk of A.A.A.: 1468Blah blah blah 1469""") 1470 1471 def test_mangle_from_in_preamble_and_epilog(self): 1472 s = StringIO() 1473 g = Generator(s, mangle_from_=True) 1474 msg = email.message_from_string(textwrap.dedent("""\ 1475 From: foo@bar.com 1476 Mime-Version: 1.0 1477 Content-Type: multipart/mixed; boundary=XXX 1478 1479 From somewhere unknown 1480 1481 --XXX 1482 Content-Type: text/plain 1483 1484 foo 1485 1486 --XXX-- 1487 1488 From somewhere unknowable 1489 """)) 1490 g.flatten(msg) 1491 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1492 if x.startswith('>From ')]), 2) 1493 1494 def test_mangled_from_with_bad_bytes(self): 1495 source = textwrap.dedent("""\ 1496 Content-Type: text/plain; charset="utf-8" 1497 MIME-Version: 1.0 1498 Content-Transfer-Encoding: 8bit 1499 From: aaa@bbb.org 1500 1501 """).encode('utf-8') 1502 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1503 b = BytesIO() 1504 g = BytesGenerator(b, mangle_from_=True) 1505 g.flatten(msg) 1506 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1507 1508 def test_multipart_with_bad_bytes_in_cte(self): 1509 # bpo30835 1510 source = textwrap.dedent("""\ 1511 From: aperson@example.com 1512 Content-Type: multipart/mixed; boundary="1" 1513 Content-Transfer-Encoding: \xc8 1514 """).encode('utf-8') 1515 msg = email.message_from_bytes(source) 1516 1517 1518# Test the basic MIMEAudio class 1519class TestMIMEAudio(unittest.TestCase): 1520 def _make_audio(self, ext): 1521 with openfile(f'sndhdr.{ext}', 'rb') as fp: 1522 self._audiodata = fp.read() 1523 self._au = MIMEAudio(self._audiodata) 1524 1525 def test_guess_minor_type(self): 1526 for ext, subtype in { 1527 'aifc': 'x-aiff', 1528 'aiff': 'x-aiff', 1529 'wav': 'x-wav', 1530 'au': 'basic', 1531 }.items(): 1532 self._make_audio(ext) 1533 subtype = ext if subtype is None else subtype 1534 self.assertEqual(self._au.get_content_type(), f'audio/{subtype}') 1535 1536 def test_encoding(self): 1537 self._make_audio('au') 1538 payload = self._au.get_payload() 1539 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1540 self._audiodata) 1541 1542 def test_checkSetMinor(self): 1543 self._make_audio('au') 1544 au = MIMEAudio(self._audiodata, 'fish') 1545 self.assertEqual(au.get_content_type(), 'audio/fish') 1546 1547 def test_add_header(self): 1548 self._make_audio('au') 1549 eq = self.assertEqual 1550 self._au.add_header('Content-Disposition', 'attachment', 1551 filename='sndhdr.au') 1552 eq(self._au['content-disposition'], 1553 'attachment; filename="sndhdr.au"') 1554 eq(self._au.get_params(header='content-disposition'), 1555 [('attachment', ''), ('filename', 'sndhdr.au')]) 1556 eq(self._au.get_param('filename', header='content-disposition'), 1557 'sndhdr.au') 1558 missing = [] 1559 eq(self._au.get_param('attachment', header='content-disposition'), '') 1560 self.assertIs(self._au.get_param( 1561 'foo', failobj=missing, 1562 header='content-disposition'), missing) 1563 # Try some missing stuff 1564 self.assertIs(self._au.get_param('foobar', missing), missing) 1565 self.assertIs(self._au.get_param('attachment', missing, 1566 header='foobar'), missing) 1567 1568 1569 1570# Test the basic MIMEImage class 1571class TestMIMEImage(unittest.TestCase): 1572 def _make_image(self, ext): 1573 with openfile(f'python.{ext}', 'rb') as fp: 1574 self._imgdata = fp.read() 1575 self._im = MIMEImage(self._imgdata) 1576 1577 def test_guess_minor_type(self): 1578 for ext, subtype in { 1579 'bmp': None, 1580 'exr': None, 1581 'gif': None, 1582 'jpg': 'jpeg', 1583 'pbm': None, 1584 'pgm': None, 1585 'png': None, 1586 'ppm': None, 1587 'ras': 'rast', 1588 'sgi': 'rgb', 1589 'tiff': None, 1590 'webp': None, 1591 'xbm': None, 1592 }.items(): 1593 self._make_image(ext) 1594 subtype = ext if subtype is None else subtype 1595 self.assertEqual(self._im.get_content_type(), f'image/{subtype}') 1596 1597 def test_encoding(self): 1598 self._make_image('gif') 1599 payload = self._im.get_payload() 1600 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1601 self._imgdata) 1602 1603 def test_checkSetMinor(self): 1604 self._make_image('gif') 1605 im = MIMEImage(self._imgdata, 'fish') 1606 self.assertEqual(im.get_content_type(), 'image/fish') 1607 1608 def test_add_header(self): 1609 self._make_image('gif') 1610 eq = self.assertEqual 1611 self._im.add_header('Content-Disposition', 'attachment', 1612 filename='dingusfish.gif') 1613 eq(self._im['content-disposition'], 1614 'attachment; filename="dingusfish.gif"') 1615 eq(self._im.get_params(header='content-disposition'), 1616 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1617 eq(self._im.get_param('filename', header='content-disposition'), 1618 'dingusfish.gif') 1619 missing = [] 1620 eq(self._im.get_param('attachment', header='content-disposition'), '') 1621 self.assertIs(self._im.get_param('foo', failobj=missing, 1622 header='content-disposition'), missing) 1623 # Try some missing stuff 1624 self.assertIs(self._im.get_param('foobar', missing), missing) 1625 self.assertIs(self._im.get_param('attachment', missing, 1626 header='foobar'), missing) 1627 1628 1629# Test the basic MIMEApplication class 1630class TestMIMEApplication(unittest.TestCase): 1631 def test_headers(self): 1632 eq = self.assertEqual 1633 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1634 eq(msg.get_content_type(), 'application/octet-stream') 1635 eq(msg['content-transfer-encoding'], 'base64') 1636 1637 def test_body(self): 1638 eq = self.assertEqual 1639 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1640 msg = MIMEApplication(bytesdata) 1641 # whitespace in the cte encoded block is RFC-irrelevant. 1642 eq(msg.get_payload().strip(), '+vv8/f7/') 1643 eq(msg.get_payload(decode=True), bytesdata) 1644 1645 def test_binary_body_with_encode_7or8bit(self): 1646 # Issue 17171. 1647 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1648 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1649 # Treated as a string, this will be invalid code points. 1650 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1651 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1652 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1653 s = BytesIO() 1654 g = BytesGenerator(s) 1655 g.flatten(msg) 1656 wireform = s.getvalue() 1657 msg2 = email.message_from_bytes(wireform) 1658 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1659 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1660 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1661 1662 def test_binary_body_with_encode_noop(self): 1663 # Issue 16564: This does not produce an RFC valid message, since to be 1664 # valid it should have a CTE of binary. But the below works in 1665 # Python2, and is documented as working this way. 1666 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1667 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1668 # Treated as a string, this will be invalid code points. 1669 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1670 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1671 s = BytesIO() 1672 g = BytesGenerator(s) 1673 g.flatten(msg) 1674 wireform = s.getvalue() 1675 msg2 = email.message_from_bytes(wireform) 1676 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1677 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1678 1679 def test_binary_body_with_unicode_linend_encode_noop(self): 1680 # Issue 19003: This is a variation on #16564. 1681 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1682 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1683 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1684 s = BytesIO() 1685 g = BytesGenerator(s) 1686 g.flatten(msg) 1687 wireform = s.getvalue() 1688 msg2 = email.message_from_bytes(wireform) 1689 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1690 1691 def test_binary_body_with_encode_quopri(self): 1692 # Issue 14360. 1693 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1694 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1695 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1696 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1697 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1698 s = BytesIO() 1699 g = BytesGenerator(s) 1700 g.flatten(msg) 1701 wireform = s.getvalue() 1702 msg2 = email.message_from_bytes(wireform) 1703 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1704 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1705 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1706 1707 def test_binary_body_with_encode_base64(self): 1708 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1709 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1710 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1711 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1712 s = BytesIO() 1713 g = BytesGenerator(s) 1714 g.flatten(msg) 1715 wireform = s.getvalue() 1716 msg2 = email.message_from_bytes(wireform) 1717 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1718 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1719 1720 1721# Test the basic MIMEText class 1722class TestMIMEText(unittest.TestCase): 1723 def setUp(self): 1724 self._msg = MIMEText('hello there') 1725 1726 def test_types(self): 1727 eq = self.assertEqual 1728 eq(self._msg.get_content_type(), 'text/plain') 1729 eq(self._msg.get_param('charset'), 'us-ascii') 1730 missing = [] 1731 self.assertIs(self._msg.get_param('foobar', missing), missing) 1732 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1733 missing) 1734 1735 def test_payload(self): 1736 self.assertEqual(self._msg.get_payload(), 'hello there') 1737 self.assertFalse(self._msg.is_multipart()) 1738 1739 def test_charset(self): 1740 eq = self.assertEqual 1741 msg = MIMEText('hello there', _charset='us-ascii') 1742 eq(msg.get_charset().input_charset, 'us-ascii') 1743 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1744 # Also accept a Charset instance 1745 charset = Charset('utf-8') 1746 charset.body_encoding = None 1747 msg = MIMEText('hello there', _charset=charset) 1748 eq(msg.get_charset().input_charset, 'utf-8') 1749 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1750 eq(msg.get_payload(), 'hello there') 1751 1752 def test_7bit_input(self): 1753 eq = self.assertEqual 1754 msg = MIMEText('hello there', _charset='us-ascii') 1755 eq(msg.get_charset().input_charset, 'us-ascii') 1756 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1757 1758 def test_7bit_input_no_charset(self): 1759 eq = self.assertEqual 1760 msg = MIMEText('hello there') 1761 eq(msg.get_charset(), 'us-ascii') 1762 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1763 self.assertIn('hello there', msg.as_string()) 1764 1765 def test_utf8_input(self): 1766 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1767 eq = self.assertEqual 1768 msg = MIMEText(teststr, _charset='utf-8') 1769 eq(msg.get_charset().output_charset, 'utf-8') 1770 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1771 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1772 1773 @unittest.skip("can't fix because of backward compat in email5, " 1774 "will fix in email6") 1775 def test_utf8_input_no_charset(self): 1776 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1777 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1778 1779 1780 1781# Test complicated multipart/* messages 1782class TestMultipart(TestEmailBase): 1783 def setUp(self): 1784 with openfile('python.gif', 'rb') as fp: 1785 data = fp.read() 1786 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1787 image = MIMEImage(data, name='dingusfish.gif') 1788 image.add_header('content-disposition', 'attachment', 1789 filename='dingusfish.gif') 1790 intro = MIMEText('''\ 1791Hi there, 1792 1793This is the dingus fish. 1794''') 1795 container.attach(intro) 1796 container.attach(image) 1797 container['From'] = 'Barry <barry@digicool.com>' 1798 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' 1799 container['Subject'] = 'Here is your dingus fish' 1800 1801 now = 987809702.54848599 1802 timetuple = time.localtime(now) 1803 if timetuple[-1] == 0: 1804 tzsecs = time.timezone 1805 else: 1806 tzsecs = time.altzone 1807 if tzsecs > 0: 1808 sign = '-' 1809 else: 1810 sign = '+' 1811 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1812 container['Date'] = time.strftime( 1813 '%a, %d %b %Y %H:%M:%S', 1814 time.localtime(now)) + tzoffset 1815 self._msg = container 1816 self._im = image 1817 self._txt = intro 1818 1819 def test_hierarchy(self): 1820 # convenience 1821 eq = self.assertEqual 1822 raises = self.assertRaises 1823 # tests 1824 m = self._msg 1825 self.assertTrue(m.is_multipart()) 1826 eq(m.get_content_type(), 'multipart/mixed') 1827 eq(len(m.get_payload()), 2) 1828 raises(IndexError, m.get_payload, 2) 1829 m0 = m.get_payload(0) 1830 m1 = m.get_payload(1) 1831 self.assertIs(m0, self._txt) 1832 self.assertIs(m1, self._im) 1833 eq(m.get_payload(), [m0, m1]) 1834 self.assertFalse(m0.is_multipart()) 1835 self.assertFalse(m1.is_multipart()) 1836 1837 def test_empty_multipart_idempotent(self): 1838 text = """\ 1839Content-Type: multipart/mixed; boundary="BOUNDARY" 1840MIME-Version: 1.0 1841Subject: A subject 1842To: aperson@dom.ain 1843From: bperson@dom.ain 1844 1845 1846--BOUNDARY 1847 1848 1849--BOUNDARY-- 1850""" 1851 msg = Parser().parsestr(text) 1852 self.ndiffAssertEqual(text, msg.as_string()) 1853 1854 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1855 outer = MIMEBase('multipart', 'mixed') 1856 outer['Subject'] = 'A subject' 1857 outer['To'] = 'aperson@dom.ain' 1858 outer['From'] = 'bperson@dom.ain' 1859 outer.set_boundary('BOUNDARY') 1860 self.ndiffAssertEqual(outer.as_string(), '''\ 1861Content-Type: multipart/mixed; boundary="BOUNDARY" 1862MIME-Version: 1.0 1863Subject: A subject 1864To: aperson@dom.ain 1865From: bperson@dom.ain 1866 1867--BOUNDARY 1868 1869--BOUNDARY-- 1870''') 1871 1872 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1873 outer = MIMEBase('multipart', 'mixed') 1874 outer['Subject'] = 'A subject' 1875 outer['To'] = 'aperson@dom.ain' 1876 outer['From'] = 'bperson@dom.ain' 1877 outer.preamble = '' 1878 outer.epilogue = '' 1879 outer.set_boundary('BOUNDARY') 1880 self.ndiffAssertEqual(outer.as_string(), '''\ 1881Content-Type: multipart/mixed; boundary="BOUNDARY" 1882MIME-Version: 1.0 1883Subject: A subject 1884To: aperson@dom.ain 1885From: bperson@dom.ain 1886 1887 1888--BOUNDARY 1889 1890--BOUNDARY-- 1891''') 1892 1893 def test_one_part_in_a_multipart(self): 1894 eq = self.ndiffAssertEqual 1895 outer = MIMEBase('multipart', 'mixed') 1896 outer['Subject'] = 'A subject' 1897 outer['To'] = 'aperson@dom.ain' 1898 outer['From'] = 'bperson@dom.ain' 1899 outer.set_boundary('BOUNDARY') 1900 msg = MIMEText('hello world') 1901 outer.attach(msg) 1902 eq(outer.as_string(), '''\ 1903Content-Type: multipart/mixed; boundary="BOUNDARY" 1904MIME-Version: 1.0 1905Subject: A subject 1906To: aperson@dom.ain 1907From: bperson@dom.ain 1908 1909--BOUNDARY 1910Content-Type: text/plain; charset="us-ascii" 1911MIME-Version: 1.0 1912Content-Transfer-Encoding: 7bit 1913 1914hello world 1915--BOUNDARY-- 1916''') 1917 1918 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1919 eq = self.ndiffAssertEqual 1920 outer = MIMEBase('multipart', 'mixed') 1921 outer['Subject'] = 'A subject' 1922 outer['To'] = 'aperson@dom.ain' 1923 outer['From'] = 'bperson@dom.ain' 1924 outer.preamble = '' 1925 msg = MIMEText('hello world') 1926 outer.attach(msg) 1927 outer.set_boundary('BOUNDARY') 1928 eq(outer.as_string(), '''\ 1929Content-Type: multipart/mixed; boundary="BOUNDARY" 1930MIME-Version: 1.0 1931Subject: A subject 1932To: aperson@dom.ain 1933From: bperson@dom.ain 1934 1935 1936--BOUNDARY 1937Content-Type: text/plain; charset="us-ascii" 1938MIME-Version: 1.0 1939Content-Transfer-Encoding: 7bit 1940 1941hello world 1942--BOUNDARY-- 1943''') 1944 1945 1946 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1947 eq = self.ndiffAssertEqual 1948 outer = MIMEBase('multipart', 'mixed') 1949 outer['Subject'] = 'A subject' 1950 outer['To'] = 'aperson@dom.ain' 1951 outer['From'] = 'bperson@dom.ain' 1952 outer.preamble = None 1953 msg = MIMEText('hello world') 1954 outer.attach(msg) 1955 outer.set_boundary('BOUNDARY') 1956 eq(outer.as_string(), '''\ 1957Content-Type: multipart/mixed; boundary="BOUNDARY" 1958MIME-Version: 1.0 1959Subject: A subject 1960To: aperson@dom.ain 1961From: bperson@dom.ain 1962 1963--BOUNDARY 1964Content-Type: text/plain; charset="us-ascii" 1965MIME-Version: 1.0 1966Content-Transfer-Encoding: 7bit 1967 1968hello world 1969--BOUNDARY-- 1970''') 1971 1972 1973 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1974 eq = self.ndiffAssertEqual 1975 outer = MIMEBase('multipart', 'mixed') 1976 outer['Subject'] = 'A subject' 1977 outer['To'] = 'aperson@dom.ain' 1978 outer['From'] = 'bperson@dom.ain' 1979 outer.epilogue = None 1980 msg = MIMEText('hello world') 1981 outer.attach(msg) 1982 outer.set_boundary('BOUNDARY') 1983 eq(outer.as_string(), '''\ 1984Content-Type: multipart/mixed; boundary="BOUNDARY" 1985MIME-Version: 1.0 1986Subject: A subject 1987To: aperson@dom.ain 1988From: bperson@dom.ain 1989 1990--BOUNDARY 1991Content-Type: text/plain; charset="us-ascii" 1992MIME-Version: 1.0 1993Content-Transfer-Encoding: 7bit 1994 1995hello world 1996--BOUNDARY-- 1997''') 1998 1999 2000 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 2001 eq = self.ndiffAssertEqual 2002 outer = MIMEBase('multipart', 'mixed') 2003 outer['Subject'] = 'A subject' 2004 outer['To'] = 'aperson@dom.ain' 2005 outer['From'] = 'bperson@dom.ain' 2006 outer.epilogue = '' 2007 msg = MIMEText('hello world') 2008 outer.attach(msg) 2009 outer.set_boundary('BOUNDARY') 2010 eq(outer.as_string(), '''\ 2011Content-Type: multipart/mixed; boundary="BOUNDARY" 2012MIME-Version: 1.0 2013Subject: A subject 2014To: aperson@dom.ain 2015From: bperson@dom.ain 2016 2017--BOUNDARY 2018Content-Type: text/plain; charset="us-ascii" 2019MIME-Version: 1.0 2020Content-Transfer-Encoding: 7bit 2021 2022hello world 2023--BOUNDARY-- 2024''') 2025 2026 2027 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 2028 eq = self.ndiffAssertEqual 2029 outer = MIMEBase('multipart', 'mixed') 2030 outer['Subject'] = 'A subject' 2031 outer['To'] = 'aperson@dom.ain' 2032 outer['From'] = 'bperson@dom.ain' 2033 outer.epilogue = '\n' 2034 msg = MIMEText('hello world') 2035 outer.attach(msg) 2036 outer.set_boundary('BOUNDARY') 2037 eq(outer.as_string(), '''\ 2038Content-Type: multipart/mixed; boundary="BOUNDARY" 2039MIME-Version: 1.0 2040Subject: A subject 2041To: aperson@dom.ain 2042From: bperson@dom.ain 2043 2044--BOUNDARY 2045Content-Type: text/plain; charset="us-ascii" 2046MIME-Version: 1.0 2047Content-Transfer-Encoding: 7bit 2048 2049hello world 2050--BOUNDARY-- 2051 2052''') 2053 2054 def test_message_external_body(self): 2055 eq = self.assertEqual 2056 msg = self._msgobj('msg_36.txt') 2057 eq(len(msg.get_payload()), 2) 2058 msg1 = msg.get_payload(1) 2059 eq(msg1.get_content_type(), 'multipart/alternative') 2060 eq(len(msg1.get_payload()), 2) 2061 for subpart in msg1.get_payload(): 2062 eq(subpart.get_content_type(), 'message/external-body') 2063 eq(len(subpart.get_payload()), 1) 2064 subsubpart = subpart.get_payload(0) 2065 eq(subsubpart.get_content_type(), 'text/plain') 2066 2067 def test_double_boundary(self): 2068 # msg_37.txt is a multipart that contains two dash-boundary's in a 2069 # row. Our interpretation of RFC 2046 calls for ignoring the second 2070 # and subsequent boundaries. 2071 msg = self._msgobj('msg_37.txt') 2072 self.assertEqual(len(msg.get_payload()), 3) 2073 2074 def test_nested_inner_contains_outer_boundary(self): 2075 eq = self.ndiffAssertEqual 2076 # msg_38.txt has an inner part that contains outer boundaries. My 2077 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2078 # these are illegal and should be interpreted as unterminated inner 2079 # parts. 2080 msg = self._msgobj('msg_38.txt') 2081 sfp = StringIO() 2082 iterators._structure(msg, sfp) 2083 eq(sfp.getvalue(), """\ 2084multipart/mixed 2085 multipart/mixed 2086 multipart/alternative 2087 text/plain 2088 text/plain 2089 text/plain 2090 text/plain 2091""") 2092 2093 def test_nested_with_same_boundary(self): 2094 eq = self.ndiffAssertEqual 2095 # msg 39.txt is similarly evil in that it's got inner parts that use 2096 # the same boundary as outer parts. Again, I believe the way this is 2097 # parsed is closest to the spirit of RFC 2046 2098 msg = self._msgobj('msg_39.txt') 2099 sfp = StringIO() 2100 iterators._structure(msg, sfp) 2101 eq(sfp.getvalue(), """\ 2102multipart/mixed 2103 multipart/mixed 2104 multipart/alternative 2105 application/octet-stream 2106 application/octet-stream 2107 text/plain 2108""") 2109 2110 def test_boundary_in_non_multipart(self): 2111 msg = self._msgobj('msg_40.txt') 2112 self.assertEqual(msg.as_string(), '''\ 2113MIME-Version: 1.0 2114Content-Type: text/html; boundary="--961284236552522269" 2115 2116----961284236552522269 2117Content-Type: text/html; 2118Content-Transfer-Encoding: 7Bit 2119 2120<html></html> 2121 2122----961284236552522269-- 2123''') 2124 2125 def test_boundary_with_leading_space(self): 2126 eq = self.assertEqual 2127 msg = email.message_from_string('''\ 2128MIME-Version: 1.0 2129Content-Type: multipart/mixed; boundary=" XXXX" 2130 2131-- XXXX 2132Content-Type: text/plain 2133 2134 2135-- XXXX 2136Content-Type: text/plain 2137 2138-- XXXX-- 2139''') 2140 self.assertTrue(msg.is_multipart()) 2141 eq(msg.get_boundary(), ' XXXX') 2142 eq(len(msg.get_payload()), 2) 2143 2144 def test_boundary_without_trailing_newline(self): 2145 m = Parser().parsestr("""\ 2146Content-Type: multipart/mixed; boundary="===============0012394164==" 2147MIME-Version: 1.0 2148 2149--===============0012394164== 2150Content-Type: image/file1.jpg 2151MIME-Version: 1.0 2152Content-Transfer-Encoding: base64 2153 2154YXNkZg== 2155--===============0012394164==--""") 2156 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2157 2158 def test_mimebase_default_policy(self): 2159 m = MIMEBase('multipart', 'mixed') 2160 self.assertIs(m.policy, email.policy.compat32) 2161 2162 def test_mimebase_custom_policy(self): 2163 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2164 self.assertIs(m.policy, email.policy.default) 2165 2166# Test some badly formatted messages 2167class TestNonConformant(TestEmailBase): 2168 2169 def test_parse_missing_minor_type(self): 2170 eq = self.assertEqual 2171 msg = self._msgobj('msg_14.txt') 2172 eq(msg.get_content_type(), 'text/plain') 2173 eq(msg.get_content_maintype(), 'text') 2174 eq(msg.get_content_subtype(), 'plain') 2175 2176 # test_defect_handling 2177 def test_same_boundary_inner_outer(self): 2178 msg = self._msgobj('msg_15.txt') 2179 # XXX We can probably eventually do better 2180 inner = msg.get_payload(0) 2181 self.assertTrue(hasattr(inner, 'defects')) 2182 self.assertEqual(len(inner.defects), 1) 2183 self.assertIsInstance(inner.defects[0], 2184 errors.StartBoundaryNotFoundDefect) 2185 2186 # test_defect_handling 2187 def test_multipart_no_boundary(self): 2188 msg = self._msgobj('msg_25.txt') 2189 self.assertIsInstance(msg.get_payload(), str) 2190 self.assertEqual(len(msg.defects), 2) 2191 self.assertIsInstance(msg.defects[0], 2192 errors.NoBoundaryInMultipartDefect) 2193 self.assertIsInstance(msg.defects[1], 2194 errors.MultipartInvariantViolationDefect) 2195 2196 multipart_msg = textwrap.dedent("""\ 2197 Date: Wed, 14 Nov 2007 12:56:23 GMT 2198 From: foo@bar.invalid 2199 To: foo@bar.invalid 2200 Subject: Content-Transfer-Encoding: base64 and multipart 2201 MIME-Version: 1.0 2202 Content-Type: multipart/mixed; 2203 boundary="===============3344438784458119861=="{} 2204 2205 --===============3344438784458119861== 2206 Content-Type: text/plain 2207 2208 Test message 2209 2210 --===============3344438784458119861== 2211 Content-Type: application/octet-stream 2212 Content-Transfer-Encoding: base64 2213 2214 YWJj 2215 2216 --===============3344438784458119861==-- 2217 """) 2218 2219 # test_defect_handling 2220 def test_multipart_invalid_cte(self): 2221 msg = self._str_msg( 2222 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2223 self.assertEqual(len(msg.defects), 1) 2224 self.assertIsInstance(msg.defects[0], 2225 errors.InvalidMultipartContentTransferEncodingDefect) 2226 2227 # test_defect_handling 2228 def test_multipart_no_cte_no_defect(self): 2229 msg = self._str_msg(self.multipart_msg.format('')) 2230 self.assertEqual(len(msg.defects), 0) 2231 2232 # test_defect_handling 2233 def test_multipart_valid_cte_no_defect(self): 2234 for cte in ('7bit', '8bit', 'BINary'): 2235 msg = self._str_msg( 2236 self.multipart_msg.format( 2237 "\nContent-Transfer-Encoding: {}".format(cte))) 2238 self.assertEqual(len(msg.defects), 0) 2239 2240 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2241 def test_invalid_content_type(self): 2242 eq = self.assertEqual 2243 neq = self.ndiffAssertEqual 2244 msg = Message() 2245 # RFC 2045, $5.2 says invalid yields text/plain 2246 msg['Content-Type'] = 'text' 2247 eq(msg.get_content_maintype(), 'text') 2248 eq(msg.get_content_subtype(), 'plain') 2249 eq(msg.get_content_type(), 'text/plain') 2250 # Clear the old value and try something /really/ invalid 2251 del msg['content-type'] 2252 msg['Content-Type'] = 'foo' 2253 eq(msg.get_content_maintype(), 'text') 2254 eq(msg.get_content_subtype(), 'plain') 2255 eq(msg.get_content_type(), 'text/plain') 2256 # Still, make sure that the message is idempotently generated 2257 s = StringIO() 2258 g = Generator(s) 2259 g.flatten(msg) 2260 neq(s.getvalue(), 'Content-Type: foo\n\n') 2261 2262 def test_no_start_boundary(self): 2263 eq = self.ndiffAssertEqual 2264 msg = self._msgobj('msg_31.txt') 2265 eq(msg.get_payload(), """\ 2266--BOUNDARY 2267Content-Type: text/plain 2268 2269message 1 2270 2271--BOUNDARY 2272Content-Type: text/plain 2273 2274message 2 2275 2276--BOUNDARY-- 2277""") 2278 2279 def test_no_separating_blank_line(self): 2280 eq = self.ndiffAssertEqual 2281 msg = self._msgobj('msg_35.txt') 2282 eq(msg.as_string(), """\ 2283From: aperson@dom.ain 2284To: bperson@dom.ain 2285Subject: here's something interesting 2286 2287counter to RFC 2822, there's no separating newline here 2288""") 2289 2290 # test_defect_handling 2291 def test_lying_multipart(self): 2292 msg = self._msgobj('msg_41.txt') 2293 self.assertTrue(hasattr(msg, 'defects')) 2294 self.assertEqual(len(msg.defects), 2) 2295 self.assertIsInstance(msg.defects[0], 2296 errors.NoBoundaryInMultipartDefect) 2297 self.assertIsInstance(msg.defects[1], 2298 errors.MultipartInvariantViolationDefect) 2299 2300 # test_defect_handling 2301 def test_missing_start_boundary(self): 2302 outer = self._msgobj('msg_42.txt') 2303 # The message structure is: 2304 # 2305 # multipart/mixed 2306 # text/plain 2307 # message/rfc822 2308 # multipart/mixed [*] 2309 # 2310 # [*] This message is missing its start boundary 2311 bad = outer.get_payload(1).get_payload(0) 2312 self.assertEqual(len(bad.defects), 1) 2313 self.assertIsInstance(bad.defects[0], 2314 errors.StartBoundaryNotFoundDefect) 2315 2316 # test_defect_handling 2317 def test_first_line_is_continuation_header(self): 2318 eq = self.assertEqual 2319 m = ' Line 1\nSubject: test\n\nbody' 2320 msg = email.message_from_string(m) 2321 eq(msg.keys(), ['Subject']) 2322 eq(msg.get_payload(), 'body') 2323 eq(len(msg.defects), 1) 2324 self.assertDefectsEqual(msg.defects, 2325 [errors.FirstHeaderLineIsContinuationDefect]) 2326 eq(msg.defects[0].line, ' Line 1\n') 2327 2328 # test_defect_handling 2329 def test_missing_header_body_separator(self): 2330 # Our heuristic if we see a line that doesn't look like a header (no 2331 # leading whitespace but no ':') is to assume that the blank line that 2332 # separates the header from the body is missing, and to stop parsing 2333 # headers and start parsing the body. 2334 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2335 self.assertEqual(msg.keys(), ['Subject']) 2336 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2337 self.assertDefectsEqual(msg.defects, 2338 [errors.MissingHeaderBodySeparatorDefect]) 2339 2340 2341# Test RFC 2047 header encoding and decoding 2342class TestRFC2047(TestEmailBase): 2343 def test_rfc2047_multiline(self): 2344 eq = self.assertEqual 2345 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2346 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2347 dh = decode_header(s) 2348 eq(dh, [ 2349 (b'Re: ', None), 2350 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2351 (b' baz foo bar ', None), 2352 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2353 header = make_header(dh) 2354 eq(str(header), 2355 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2356 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2357Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2358 =?mac-iceland?q?=9Arg=8Cs?=""") 2359 2360 def test_whitespace_keeper_unicode(self): 2361 eq = self.assertEqual 2362 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' 2363 dh = decode_header(s) 2364 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2365 (b' Pirard <pirard@dom.ain>', None)]) 2366 header = str(make_header(dh)) 2367 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 2368 2369 def test_whitespace_keeper_unicode_2(self): 2370 eq = self.assertEqual 2371 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2372 dh = decode_header(s) 2373 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2374 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2375 hu = str(make_header(dh)) 2376 eq(hu, 'The quick brown fox jumped over the lazy dog') 2377 2378 def test_rfc2047_missing_whitespace(self): 2379 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2380 dh = decode_header(s) 2381 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2382 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2383 (b'sbord', None)]) 2384 2385 def test_rfc2047_with_whitespace(self): 2386 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2387 dh = decode_header(s) 2388 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2389 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2390 (b' sbord', None)]) 2391 2392 def test_rfc2047_B_bad_padding(self): 2393 s = '=?iso-8859-1?B?%s?=' 2394 data = [ # only test complete bytes 2395 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2396 ('dmk=', b'vi'), ('dmk', b'vi') 2397 ] 2398 for q, a in data: 2399 dh = decode_header(s % q) 2400 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2401 2402 def test_rfc2047_Q_invalid_digits(self): 2403 # issue 10004. 2404 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2405 self.assertEqual(decode_header(s), 2406 [(b'andr\xe9=zz', 'iso-8859-1')]) 2407 2408 def test_rfc2047_rfc2047_1(self): 2409 # 1st testcase at end of rfc2047 2410 s = '(=?ISO-8859-1?Q?a?=)' 2411 self.assertEqual(decode_header(s), 2412 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2413 2414 def test_rfc2047_rfc2047_2(self): 2415 # 2nd testcase at end of rfc2047 2416 s = '(=?ISO-8859-1?Q?a?= b)' 2417 self.assertEqual(decode_header(s), 2418 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2419 2420 def test_rfc2047_rfc2047_3(self): 2421 # 3rd testcase at end of rfc2047 2422 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2423 self.assertEqual(decode_header(s), 2424 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2425 2426 def test_rfc2047_rfc2047_4(self): 2427 # 4th testcase at end of rfc2047 2428 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2429 self.assertEqual(decode_header(s), 2430 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2431 2432 def test_rfc2047_rfc2047_5a(self): 2433 # 5th testcase at end of rfc2047 newline is \r\n 2434 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2435 self.assertEqual(decode_header(s), 2436 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2437 2438 def test_rfc2047_rfc2047_5b(self): 2439 # 5th testcase at end of rfc2047 newline is \n 2440 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2441 self.assertEqual(decode_header(s), 2442 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2443 2444 def test_rfc2047_rfc2047_6(self): 2445 # 6th testcase at end of rfc2047 2446 s = '(=?ISO-8859-1?Q?a_b?=)' 2447 self.assertEqual(decode_header(s), 2448 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2449 2450 def test_rfc2047_rfc2047_7(self): 2451 # 7th testcase at end of rfc2047 2452 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2453 self.assertEqual(decode_header(s), 2454 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2455 (b')', None)]) 2456 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2457 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2458 2459 def test_multiline_header(self): 2460 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>' 2461 self.assertEqual(decode_header(s), 2462 [(b'"M\xfcller T"', 'windows-1252'), 2463 (b'<T.Mueller@xxx.com>', None)]) 2464 self.assertEqual(make_header(decode_header(s)).encode(), 2465 ''.join(s.splitlines())) 2466 self.assertEqual(str(make_header(decode_header(s))), 2467 '"Müller T" <T.Mueller@xxx.com>') 2468 2469 2470# Test the MIMEMessage class 2471class TestMIMEMessage(TestEmailBase): 2472 def setUp(self): 2473 with openfile('msg_11.txt', encoding="utf-8") as fp: 2474 self._text = fp.read() 2475 2476 def test_type_error(self): 2477 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2478 2479 def test_valid_argument(self): 2480 eq = self.assertEqual 2481 subject = 'A sub-message' 2482 m = Message() 2483 m['Subject'] = subject 2484 r = MIMEMessage(m) 2485 eq(r.get_content_type(), 'message/rfc822') 2486 payload = r.get_payload() 2487 self.assertIsInstance(payload, list) 2488 eq(len(payload), 1) 2489 subpart = payload[0] 2490 self.assertIs(subpart, m) 2491 eq(subpart['subject'], subject) 2492 2493 def test_bad_multipart(self): 2494 msg1 = Message() 2495 msg1['Subject'] = 'subpart 1' 2496 msg2 = Message() 2497 msg2['Subject'] = 'subpart 2' 2498 r = MIMEMessage(msg1) 2499 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2500 2501 def test_generate(self): 2502 # First craft the message to be encapsulated 2503 m = Message() 2504 m['Subject'] = 'An enclosed message' 2505 m.set_payload('Here is the body of the message.\n') 2506 r = MIMEMessage(m) 2507 r['Subject'] = 'The enclosing message' 2508 s = StringIO() 2509 g = Generator(s) 2510 g.flatten(r) 2511 self.assertEqual(s.getvalue(), """\ 2512Content-Type: message/rfc822 2513MIME-Version: 1.0 2514Subject: The enclosing message 2515 2516Subject: An enclosed message 2517 2518Here is the body of the message. 2519""") 2520 2521 def test_parse_message_rfc822(self): 2522 eq = self.assertEqual 2523 msg = self._msgobj('msg_11.txt') 2524 eq(msg.get_content_type(), 'message/rfc822') 2525 payload = msg.get_payload() 2526 self.assertIsInstance(payload, list) 2527 eq(len(payload), 1) 2528 submsg = payload[0] 2529 self.assertIsInstance(submsg, Message) 2530 eq(submsg['subject'], 'An enclosed message') 2531 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2532 2533 def test_dsn(self): 2534 eq = self.assertEqual 2535 # msg 16 is a Delivery Status Notification, see RFC 1894 2536 msg = self._msgobj('msg_16.txt') 2537 eq(msg.get_content_type(), 'multipart/report') 2538 self.assertTrue(msg.is_multipart()) 2539 eq(len(msg.get_payload()), 3) 2540 # Subpart 1 is a text/plain, human readable section 2541 subpart = msg.get_payload(0) 2542 eq(subpart.get_content_type(), 'text/plain') 2543 eq(subpart.get_payload(), """\ 2544This report relates to a message you sent with the following header fields: 2545 2546 Message-id: <002001c144a6$8752e060$56104586@oxy.edu> 2547 Date: Sun, 23 Sep 2001 20:10:55 -0700 2548 From: "Ian T. Henry" <henryi@oxy.edu> 2549 To: SoCal Raves <scr@socal-raves.org> 2550 Subject: [scr] yeah for Ians!! 2551 2552Your message cannot be delivered to the following recipients: 2553 2554 Recipient address: jangel1@cougar.noc.ucla.edu 2555 Reason: recipient reached disk quota 2556 2557""") 2558 # Subpart 2 contains the machine parsable DSN information. It 2559 # consists of two blocks of headers, represented by two nested Message 2560 # objects. 2561 subpart = msg.get_payload(1) 2562 eq(subpart.get_content_type(), 'message/delivery-status') 2563 eq(len(subpart.get_payload()), 2) 2564 # message/delivery-status should treat each block as a bunch of 2565 # headers, i.e. a bunch of Message objects. 2566 dsn1 = subpart.get_payload(0) 2567 self.assertIsInstance(dsn1, Message) 2568 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') 2569 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2570 # Try a missing one <wink> 2571 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2572 dsn2 = subpart.get_payload(1) 2573 self.assertIsInstance(dsn2, Message) 2574 eq(dsn2['action'], 'failed') 2575 eq(dsn2.get_params(header='original-recipient'), 2576 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) 2577 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2578 # Subpart 3 is the original message 2579 subpart = msg.get_payload(2) 2580 eq(subpart.get_content_type(), 'message/rfc822') 2581 payload = subpart.get_payload() 2582 self.assertIsInstance(payload, list) 2583 eq(len(payload), 1) 2584 subsubpart = payload[0] 2585 self.assertIsInstance(subsubpart, Message) 2586 eq(subsubpart.get_content_type(), 'text/plain') 2587 eq(subsubpart['message-id'], 2588 '<002001c144a6$8752e060$56104586@oxy.edu>') 2589 2590 def test_epilogue(self): 2591 eq = self.ndiffAssertEqual 2592 with openfile('msg_21.txt', encoding="utf-8") as fp: 2593 text = fp.read() 2594 msg = Message() 2595 msg['From'] = 'aperson@dom.ain' 2596 msg['To'] = 'bperson@dom.ain' 2597 msg['Subject'] = 'Test' 2598 msg.preamble = 'MIME message' 2599 msg.epilogue = 'End of MIME message\n' 2600 msg1 = MIMEText('One') 2601 msg2 = MIMEText('Two') 2602 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2603 msg.attach(msg1) 2604 msg.attach(msg2) 2605 sfp = StringIO() 2606 g = Generator(sfp) 2607 g.flatten(msg) 2608 eq(sfp.getvalue(), text) 2609 2610 def test_no_nl_preamble(self): 2611 eq = self.ndiffAssertEqual 2612 msg = Message() 2613 msg['From'] = 'aperson@dom.ain' 2614 msg['To'] = 'bperson@dom.ain' 2615 msg['Subject'] = 'Test' 2616 msg.preamble = 'MIME message' 2617 msg.epilogue = '' 2618 msg1 = MIMEText('One') 2619 msg2 = MIMEText('Two') 2620 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2621 msg.attach(msg1) 2622 msg.attach(msg2) 2623 eq(msg.as_string(), """\ 2624From: aperson@dom.ain 2625To: bperson@dom.ain 2626Subject: Test 2627Content-Type: multipart/mixed; boundary="BOUNDARY" 2628 2629MIME message 2630--BOUNDARY 2631Content-Type: text/plain; charset="us-ascii" 2632MIME-Version: 1.0 2633Content-Transfer-Encoding: 7bit 2634 2635One 2636--BOUNDARY 2637Content-Type: text/plain; charset="us-ascii" 2638MIME-Version: 1.0 2639Content-Transfer-Encoding: 7bit 2640 2641Two 2642--BOUNDARY-- 2643""") 2644 2645 def test_default_type(self): 2646 eq = self.assertEqual 2647 with openfile('msg_30.txt', encoding="utf-8") as fp: 2648 msg = email.message_from_file(fp) 2649 container1 = msg.get_payload(0) 2650 eq(container1.get_default_type(), 'message/rfc822') 2651 eq(container1.get_content_type(), 'message/rfc822') 2652 container2 = msg.get_payload(1) 2653 eq(container2.get_default_type(), 'message/rfc822') 2654 eq(container2.get_content_type(), 'message/rfc822') 2655 container1a = container1.get_payload(0) 2656 eq(container1a.get_default_type(), 'text/plain') 2657 eq(container1a.get_content_type(), 'text/plain') 2658 container2a = container2.get_payload(0) 2659 eq(container2a.get_default_type(), 'text/plain') 2660 eq(container2a.get_content_type(), 'text/plain') 2661 2662 def test_default_type_with_explicit_container_type(self): 2663 eq = self.assertEqual 2664 with openfile('msg_28.txt', encoding="utf-8") as fp: 2665 msg = email.message_from_file(fp) 2666 container1 = msg.get_payload(0) 2667 eq(container1.get_default_type(), 'message/rfc822') 2668 eq(container1.get_content_type(), 'message/rfc822') 2669 container2 = msg.get_payload(1) 2670 eq(container2.get_default_type(), 'message/rfc822') 2671 eq(container2.get_content_type(), 'message/rfc822') 2672 container1a = container1.get_payload(0) 2673 eq(container1a.get_default_type(), 'text/plain') 2674 eq(container1a.get_content_type(), 'text/plain') 2675 container2a = container2.get_payload(0) 2676 eq(container2a.get_default_type(), 'text/plain') 2677 eq(container2a.get_content_type(), 'text/plain') 2678 2679 def test_default_type_non_parsed(self): 2680 eq = self.assertEqual 2681 neq = self.ndiffAssertEqual 2682 # Set up container 2683 container = MIMEMultipart('digest', 'BOUNDARY') 2684 container.epilogue = '' 2685 # Set up subparts 2686 subpart1a = MIMEText('message 1\n') 2687 subpart2a = MIMEText('message 2\n') 2688 subpart1 = MIMEMessage(subpart1a) 2689 subpart2 = MIMEMessage(subpart2a) 2690 container.attach(subpart1) 2691 container.attach(subpart2) 2692 eq(subpart1.get_content_type(), 'message/rfc822') 2693 eq(subpart1.get_default_type(), 'message/rfc822') 2694 eq(subpart2.get_content_type(), 'message/rfc822') 2695 eq(subpart2.get_default_type(), 'message/rfc822') 2696 neq(container.as_string(0), '''\ 2697Content-Type: multipart/digest; boundary="BOUNDARY" 2698MIME-Version: 1.0 2699 2700--BOUNDARY 2701Content-Type: message/rfc822 2702MIME-Version: 1.0 2703 2704Content-Type: text/plain; charset="us-ascii" 2705MIME-Version: 1.0 2706Content-Transfer-Encoding: 7bit 2707 2708message 1 2709 2710--BOUNDARY 2711Content-Type: message/rfc822 2712MIME-Version: 1.0 2713 2714Content-Type: text/plain; charset="us-ascii" 2715MIME-Version: 1.0 2716Content-Transfer-Encoding: 7bit 2717 2718message 2 2719 2720--BOUNDARY-- 2721''') 2722 del subpart1['content-type'] 2723 del subpart1['mime-version'] 2724 del subpart2['content-type'] 2725 del subpart2['mime-version'] 2726 eq(subpart1.get_content_type(), 'message/rfc822') 2727 eq(subpart1.get_default_type(), 'message/rfc822') 2728 eq(subpart2.get_content_type(), 'message/rfc822') 2729 eq(subpart2.get_default_type(), 'message/rfc822') 2730 neq(container.as_string(0), '''\ 2731Content-Type: multipart/digest; boundary="BOUNDARY" 2732MIME-Version: 1.0 2733 2734--BOUNDARY 2735 2736Content-Type: text/plain; charset="us-ascii" 2737MIME-Version: 1.0 2738Content-Transfer-Encoding: 7bit 2739 2740message 1 2741 2742--BOUNDARY 2743 2744Content-Type: text/plain; charset="us-ascii" 2745MIME-Version: 1.0 2746Content-Transfer-Encoding: 7bit 2747 2748message 2 2749 2750--BOUNDARY-- 2751''') 2752 2753 def test_mime_attachments_in_constructor(self): 2754 eq = self.assertEqual 2755 text1 = MIMEText('') 2756 text2 = MIMEText('') 2757 msg = MIMEMultipart(_subparts=(text1, text2)) 2758 eq(len(msg.get_payload()), 2) 2759 eq(msg.get_payload(0), text1) 2760 eq(msg.get_payload(1), text2) 2761 2762 def test_default_multipart_constructor(self): 2763 msg = MIMEMultipart() 2764 self.assertTrue(msg.is_multipart()) 2765 2766 def test_multipart_default_policy(self): 2767 msg = MIMEMultipart() 2768 msg['To'] = 'a@b.com' 2769 msg['To'] = 'c@d.com' 2770 self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com']) 2771 2772 def test_multipart_custom_policy(self): 2773 msg = MIMEMultipart(policy=email.policy.default) 2774 msg['To'] = 'a@b.com' 2775 with self.assertRaises(ValueError) as cm: 2776 msg['To'] = 'c@d.com' 2777 self.assertEqual(str(cm.exception), 2778 'There may be at most 1 To headers in a message') 2779 2780 2781# Test the NonMultipart class 2782class TestNonMultipart(TestEmailBase): 2783 def test_nonmultipart_is_not_multipart(self): 2784 msg = MIMENonMultipart('text', 'plain') 2785 self.assertFalse(msg.is_multipart()) 2786 2787 def test_attach_raises_exception(self): 2788 msg = Message() 2789 msg['Subject'] = 'subpart 1' 2790 r = MIMENonMultipart('text', 'plain') 2791 self.assertRaises(errors.MultipartConversionError, r.attach, msg) 2792 2793 2794# A general test of parser->model->generator idempotency. IOW, read a message 2795# in, parse it into a message object tree, then without touching the tree, 2796# regenerate the plain text. The original text and the transformed text 2797# should be identical. Note: that we ignore the Unix-From since that may 2798# contain a changed date. 2799class TestIdempotent(TestEmailBase): 2800 2801 linesep = '\n' 2802 2803 def _msgobj(self, filename): 2804 with openfile(filename, encoding="utf-8") as fp: 2805 data = fp.read() 2806 msg = email.message_from_string(data) 2807 return msg, data 2808 2809 def _idempotent(self, msg, text, unixfrom=False): 2810 eq = self.ndiffAssertEqual 2811 s = StringIO() 2812 g = Generator(s, maxheaderlen=0) 2813 g.flatten(msg, unixfrom=unixfrom) 2814 eq(text, s.getvalue()) 2815 2816 def test_parse_text_message(self): 2817 eq = self.assertEqual 2818 msg, text = self._msgobj('msg_01.txt') 2819 eq(msg.get_content_type(), 'text/plain') 2820 eq(msg.get_content_maintype(), 'text') 2821 eq(msg.get_content_subtype(), 'plain') 2822 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2823 eq(msg.get_param('charset'), 'us-ascii') 2824 eq(msg.preamble, None) 2825 eq(msg.epilogue, None) 2826 self._idempotent(msg, text) 2827 2828 def test_parse_untyped_message(self): 2829 eq = self.assertEqual 2830 msg, text = self._msgobj('msg_03.txt') 2831 eq(msg.get_content_type(), 'text/plain') 2832 eq(msg.get_params(), None) 2833 eq(msg.get_param('charset'), None) 2834 self._idempotent(msg, text) 2835 2836 def test_simple_multipart(self): 2837 msg, text = self._msgobj('msg_04.txt') 2838 self._idempotent(msg, text) 2839 2840 def test_MIME_digest(self): 2841 msg, text = self._msgobj('msg_02.txt') 2842 self._idempotent(msg, text) 2843 2844 def test_long_header(self): 2845 msg, text = self._msgobj('msg_27.txt') 2846 self._idempotent(msg, text) 2847 2848 def test_MIME_digest_with_part_headers(self): 2849 msg, text = self._msgobj('msg_28.txt') 2850 self._idempotent(msg, text) 2851 2852 def test_mixed_with_image(self): 2853 msg, text = self._msgobj('msg_06.txt') 2854 self._idempotent(msg, text) 2855 2856 def test_multipart_report(self): 2857 msg, text = self._msgobj('msg_05.txt') 2858 self._idempotent(msg, text) 2859 2860 def test_dsn(self): 2861 msg, text = self._msgobj('msg_16.txt') 2862 self._idempotent(msg, text) 2863 2864 def test_preamble_epilogue(self): 2865 msg, text = self._msgobj('msg_21.txt') 2866 self._idempotent(msg, text) 2867 2868 def test_multipart_one_part(self): 2869 msg, text = self._msgobj('msg_23.txt') 2870 self._idempotent(msg, text) 2871 2872 def test_multipart_no_parts(self): 2873 msg, text = self._msgobj('msg_24.txt') 2874 self._idempotent(msg, text) 2875 2876 def test_no_start_boundary(self): 2877 msg, text = self._msgobj('msg_31.txt') 2878 self._idempotent(msg, text) 2879 2880 def test_rfc2231_charset(self): 2881 msg, text = self._msgobj('msg_32.txt') 2882 self._idempotent(msg, text) 2883 2884 def test_more_rfc2231_parameters(self): 2885 msg, text = self._msgobj('msg_33.txt') 2886 self._idempotent(msg, text) 2887 2888 def test_text_plain_in_a_multipart_digest(self): 2889 msg, text = self._msgobj('msg_34.txt') 2890 self._idempotent(msg, text) 2891 2892 def test_nested_multipart_mixeds(self): 2893 msg, text = self._msgobj('msg_12a.txt') 2894 self._idempotent(msg, text) 2895 2896 def test_message_external_body_idempotent(self): 2897 msg, text = self._msgobj('msg_36.txt') 2898 self._idempotent(msg, text) 2899 2900 def test_message_delivery_status(self): 2901 msg, text = self._msgobj('msg_43.txt') 2902 self._idempotent(msg, text, unixfrom=True) 2903 2904 def test_message_signed_idempotent(self): 2905 msg, text = self._msgobj('msg_45.txt') 2906 self._idempotent(msg, text) 2907 2908 def test_content_type(self): 2909 eq = self.assertEqual 2910 # Get a message object and reset the seek pointer for other tests 2911 msg, text = self._msgobj('msg_05.txt') 2912 eq(msg.get_content_type(), 'multipart/report') 2913 # Test the Content-Type: parameters 2914 params = {} 2915 for pk, pv in msg.get_params(): 2916 params[pk] = pv 2917 eq(params['report-type'], 'delivery-status') 2918 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2919 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2920 eq(msg.epilogue, self.linesep) 2921 eq(len(msg.get_payload()), 3) 2922 # Make sure the subparts are what we expect 2923 msg1 = msg.get_payload(0) 2924 eq(msg1.get_content_type(), 'text/plain') 2925 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2926 msg2 = msg.get_payload(1) 2927 eq(msg2.get_content_type(), 'text/plain') 2928 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2929 msg3 = msg.get_payload(2) 2930 eq(msg3.get_content_type(), 'message/rfc822') 2931 self.assertIsInstance(msg3, Message) 2932 payload = msg3.get_payload() 2933 self.assertIsInstance(payload, list) 2934 eq(len(payload), 1) 2935 msg4 = payload[0] 2936 self.assertIsInstance(msg4, Message) 2937 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2938 2939 def test_parser(self): 2940 eq = self.assertEqual 2941 msg, text = self._msgobj('msg_06.txt') 2942 # Check some of the outer headers 2943 eq(msg.get_content_type(), 'message/rfc822') 2944 # Make sure the payload is a list of exactly one sub-Message, and that 2945 # that submessage has a type of text/plain 2946 payload = msg.get_payload() 2947 self.assertIsInstance(payload, list) 2948 eq(len(payload), 1) 2949 msg1 = payload[0] 2950 self.assertIsInstance(msg1, Message) 2951 eq(msg1.get_content_type(), 'text/plain') 2952 self.assertIsInstance(msg1.get_payload(), str) 2953 eq(msg1.get_payload(), self.linesep) 2954 2955 2956 2957# Test various other bits of the package's functionality 2958class TestMiscellaneous(TestEmailBase): 2959 def test_message_from_string(self): 2960 with openfile('msg_01.txt', encoding="utf-8") as fp: 2961 text = fp.read() 2962 msg = email.message_from_string(text) 2963 s = StringIO() 2964 # Don't wrap/continue long headers since we're trying to test 2965 # idempotency. 2966 g = Generator(s, maxheaderlen=0) 2967 g.flatten(msg) 2968 self.assertEqual(text, s.getvalue()) 2969 2970 def test_message_from_file(self): 2971 with openfile('msg_01.txt', encoding="utf-8") as fp: 2972 text = fp.read() 2973 fp.seek(0) 2974 msg = email.message_from_file(fp) 2975 s = StringIO() 2976 # Don't wrap/continue long headers since we're trying to test 2977 # idempotency. 2978 g = Generator(s, maxheaderlen=0) 2979 g.flatten(msg) 2980 self.assertEqual(text, s.getvalue()) 2981 2982 def test_message_from_string_with_class(self): 2983 with openfile('msg_01.txt', encoding="utf-8") as fp: 2984 text = fp.read() 2985 2986 # Create a subclass 2987 class MyMessage(Message): 2988 pass 2989 2990 msg = email.message_from_string(text, MyMessage) 2991 self.assertIsInstance(msg, MyMessage) 2992 # Try something more complicated 2993 with openfile('msg_02.txt', encoding="utf-8") as fp: 2994 text = fp.read() 2995 msg = email.message_from_string(text, MyMessage) 2996 for subpart in msg.walk(): 2997 self.assertIsInstance(subpart, MyMessage) 2998 2999 def test_message_from_file_with_class(self): 3000 # Create a subclass 3001 class MyMessage(Message): 3002 pass 3003 3004 with openfile('msg_01.txt', encoding="utf-8") as fp: 3005 msg = email.message_from_file(fp, MyMessage) 3006 self.assertIsInstance(msg, MyMessage) 3007 # Try something more complicated 3008 with openfile('msg_02.txt', encoding="utf-8") as fp: 3009 msg = email.message_from_file(fp, MyMessage) 3010 for subpart in msg.walk(): 3011 self.assertIsInstance(subpart, MyMessage) 3012 3013 def test_custom_message_does_not_require_arguments(self): 3014 class MyMessage(Message): 3015 def __init__(self): 3016 super().__init__() 3017 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 3018 self.assertIsInstance(msg, MyMessage) 3019 3020 def test__all__(self): 3021 module = __import__('email') 3022 self.assertEqual(sorted(module.__all__), [ 3023 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 3024 'generator', 'header', 'iterators', 'message', 3025 'message_from_binary_file', 'message_from_bytes', 3026 'message_from_file', 'message_from_string', 'mime', 'parser', 3027 'quoprimime', 'utils', 3028 ]) 3029 3030 def test_formatdate(self): 3031 now = time.time() 3032 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 3033 time.gmtime(now)[:6]) 3034 3035 def test_formatdate_localtime(self): 3036 now = time.time() 3037 self.assertEqual( 3038 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 3039 time.localtime(now)[:6]) 3040 3041 def test_formatdate_usegmt(self): 3042 now = time.time() 3043 self.assertEqual( 3044 utils.formatdate(now, localtime=False), 3045 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 3046 self.assertEqual( 3047 utils.formatdate(now, localtime=False, usegmt=True), 3048 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 3049 3050 # parsedate and parsedate_tz will become deprecated interfaces someday 3051 def test_parsedate_returns_None_for_invalid_strings(self): 3052 # See also test_parsedate_to_datetime_with_invalid_raises_valueerror 3053 # in test_utils. 3054 invalid_dates = [ 3055 '', 3056 ' ', 3057 '0', 3058 'A Complete Waste of Time', 3059 'Wed, 3 Apr 2002 12.34.56.78+0800', 3060 '17 June , 2022', 3061 'Friday, -Nov-82 16:14:55 EST', 3062 'Friday, Nov--82 16:14:55 EST', 3063 'Friday, 19-Nov- 16:14:55 EST', 3064 ] 3065 for dtstr in invalid_dates: 3066 with self.subTest(dtstr=dtstr): 3067 self.assertIsNone(utils.parsedate(dtstr)) 3068 self.assertIsNone(utils.parsedate_tz(dtstr)) 3069 # Not a part of the spec but, but this has historically worked: 3070 self.assertIsNone(utils.parsedate(None)) 3071 self.assertIsNone(utils.parsedate_tz(None)) 3072 3073 def test_parsedate_compact(self): 3074 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26 +0800'), 3075 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3076 # The FWS after the comma is optional 3077 self.assertEqual(utils.parsedate_tz('Wed,3 Apr 2002 14:58:26 +0800'), 3078 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3079 # The comma is optional 3080 self.assertEqual(utils.parsedate_tz('Wed 3 Apr 2002 14:58:26 +0800'), 3081 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3082 3083 def test_parsedate_no_dayofweek(self): 3084 eq = self.assertEqual 3085 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 3086 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3087 eq(utils.parsedate_tz('February 5, 2003 13:47:26 -0800'), 3088 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3089 3090 def test_parsedate_no_space_before_positive_offset(self): 3091 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 3092 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3093 3094 def test_parsedate_no_space_before_negative_offset(self): 3095 # Issue 1155362: we already handled '+' for this case. 3096 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3097 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3098 3099 def test_parsedate_accepts_time_with_dots(self): 3100 eq = self.assertEqual 3101 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3102 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3103 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3104 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3105 3106 def test_parsedate_rfc_850(self): 3107 self.assertEqual(utils.parsedate_tz('Friday, 19-Nov-82 16:14:55 EST'), 3108 (1982, 11, 19, 16, 14, 55, 0, 1, -1, -18000)) 3109 3110 def test_parsedate_no_seconds(self): 3111 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58 +0800'), 3112 (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800)) 3113 3114 def test_parsedate_dot_time_delimiter(self): 3115 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58.26 +0800'), 3116 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3117 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58 +0800'), 3118 (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800)) 3119 3120 def test_parsedate_acceptable_to_time_functions(self): 3121 eq = self.assertEqual 3122 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3123 t = int(time.mktime(timetup)) 3124 eq(time.localtime(t)[:6], timetup[:6]) 3125 eq(int(time.strftime('%Y', timetup)), 2003) 3126 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3127 t = int(time.mktime(timetup[:9])) 3128 eq(time.localtime(t)[:6], timetup[:6]) 3129 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3130 3131 def test_mktime_tz(self): 3132 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3133 -1, -1, -1, 0)), 0) 3134 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3135 -1, -1, -1, 1234)), -1234) 3136 3137 def test_parsedate_y2k(self): 3138 """Test for parsing a date with a two-digit year. 3139 3140 Parsing a date with a two-digit year should return the correct 3141 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3142 obsoletes RFC822) requires four-digit years. 3143 3144 """ 3145 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3146 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3147 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3148 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3149 3150 def test_parseaddr_empty(self): 3151 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3152 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3153 3154 def test_parseaddr_multiple_domains(self): 3155 self.assertEqual( 3156 utils.parseaddr('a@b@c'), 3157 ('', '') 3158 ) 3159 self.assertEqual( 3160 utils.parseaddr('a@b.c@c'), 3161 ('', '') 3162 ) 3163 self.assertEqual( 3164 utils.parseaddr('a@172.17.0.1@c'), 3165 ('', '') 3166 ) 3167 3168 def test_noquote_dump(self): 3169 self.assertEqual( 3170 utils.formataddr(('A Silly Person', 'person@dom.ain')), 3171 'A Silly Person <person@dom.ain>') 3172 3173 def test_escape_dump(self): 3174 self.assertEqual( 3175 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), 3176 r'"A (Very) Silly Person" <person@dom.ain>') 3177 self.assertEqual( 3178 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'), 3179 ('A (Very) Silly Person', 'person@dom.ain')) 3180 a = r'A \(Special\) Person' 3181 b = 'person@dom.ain' 3182 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3183 3184 def test_escape_backslashes(self): 3185 self.assertEqual( 3186 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')), 3187 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') 3188 a = r'Arthur \Backslash\ Foobar' 3189 b = 'person@dom.ain' 3190 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3191 3192 def test_quotes_unicode_names(self): 3193 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3194 name = "H\u00e4ns W\u00fcrst" 3195 addr = 'person@dom.ain' 3196 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3197 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" 3198 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3199 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3200 latin1_quopri) 3201 3202 def test_accepts_any_charset_like_object(self): 3203 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3204 name = "H\u00e4ns W\u00fcrst" 3205 addr = 'person@dom.ain' 3206 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3207 foobar = "FOOBAR" 3208 class CharsetMock: 3209 def header_encode(self, string): 3210 return foobar 3211 mock = CharsetMock() 3212 mock_expected = "%s <%s>" % (foobar, addr) 3213 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3214 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3215 utf8_base64) 3216 3217 def test_invalid_charset_like_object_raises_error(self): 3218 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3219 name = "H\u00e4ns W\u00fcrst" 3220 addr = 'person@dom.ain' 3221 # An object without a header_encode method: 3222 bad_charset = object() 3223 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3224 bad_charset) 3225 3226 def test_unicode_address_raises_error(self): 3227 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3228 addr = 'pers\u00f6n@dom.in' 3229 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3230 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3231 3232 def test_name_with_dot(self): 3233 x = 'John X. Doe <jxd@example.com>' 3234 y = '"John X. Doe" <jxd@example.com>' 3235 a, b = ('John X. Doe', 'jxd@example.com') 3236 self.assertEqual(utils.parseaddr(x), (a, b)) 3237 self.assertEqual(utils.parseaddr(y), (a, b)) 3238 # formataddr() quotes the name if there's a dot in it 3239 self.assertEqual(utils.formataddr((a, b)), y) 3240 3241 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3242 # issue 10005. Note that in the third test the second pair of 3243 # backslashes is not actually a quoted pair because it is not inside a 3244 # comment or quoted string: the address being parsed has a quoted 3245 # string containing a quoted backslash, followed by 'example' and two 3246 # backslashes, followed by another quoted string containing a space and 3247 # the word 'example'. parseaddr copies those two backslashes 3248 # literally. Per rfc5322 this is not technically correct since a \ may 3249 # not appear in an address outside of a quoted string. It is probably 3250 # a sensible Postel interpretation, though. 3251 eq = self.assertEqual 3252 eq(utils.parseaddr('""example" example"@example.com'), 3253 ('', '""example" example"@example.com')) 3254 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3255 ('', '"\\"example\\" example"@example.com')) 3256 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3257 ('', '"\\\\"example\\\\" example"@example.com')) 3258 3259 def test_parseaddr_preserves_spaces_in_local_part(self): 3260 # issue 9286. A normal RFC5322 local part should not contain any 3261 # folding white space, but legacy local parts can (they are a sequence 3262 # of atoms, not dotatoms). On the other hand we strip whitespace from 3263 # before the @ and around dots, on the assumption that the whitespace 3264 # around the punctuation is a mistake in what would otherwise be 3265 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3266 self.assertEqual(('', "merwok wok@xample.com"), 3267 utils.parseaddr("merwok wok@xample.com")) 3268 self.assertEqual(('', "merwok wok@xample.com"), 3269 utils.parseaddr("merwok wok@xample.com")) 3270 self.assertEqual(('', "merwok wok@xample.com"), 3271 utils.parseaddr(" merwok wok @xample.com")) 3272 self.assertEqual(('', 'merwok"wok" wok@xample.com'), 3273 utils.parseaddr('merwok"wok" wok@xample.com')) 3274 self.assertEqual(('', 'merwok.wok.wok@xample.com'), 3275 utils.parseaddr('merwok. wok . wok@xample.com')) 3276 3277 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3278 addr = ("'foo@example.com' (foo@example.com)", 3279 'foo@example.com') 3280 addrstr = ('"\'foo@example.com\' ' 3281 '(foo@example.com)" <foo@example.com>') 3282 self.assertEqual(utils.parseaddr(addrstr), addr) 3283 self.assertEqual(utils.formataddr(addr), addrstr) 3284 3285 3286 def test_multiline_from_comment(self): 3287 x = """\ 3288Foo 3289\tBar <foo@example.com>""" 3290 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) 3291 3292 def test_quote_dump(self): 3293 self.assertEqual( 3294 utils.formataddr(('A Silly; Person', 'person@dom.ain')), 3295 r'"A Silly; Person" <person@dom.ain>') 3296 3297 def test_charset_richcomparisons(self): 3298 eq = self.assertEqual 3299 ne = self.assertNotEqual 3300 cset1 = Charset() 3301 cset2 = Charset() 3302 eq(cset1, 'us-ascii') 3303 eq(cset1, 'US-ASCII') 3304 eq(cset1, 'Us-AsCiI') 3305 eq('us-ascii', cset1) 3306 eq('US-ASCII', cset1) 3307 eq('Us-AsCiI', cset1) 3308 ne(cset1, 'usascii') 3309 ne(cset1, 'USASCII') 3310 ne(cset1, 'UsAsCiI') 3311 ne('usascii', cset1) 3312 ne('USASCII', cset1) 3313 ne('UsAsCiI', cset1) 3314 eq(cset1, cset2) 3315 eq(cset2, cset1) 3316 3317 def test_getaddresses(self): 3318 eq = self.assertEqual 3319 eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 3320 'Bud Person <bperson@dom.ain>']), 3321 [('Al Person', 'aperson@dom.ain'), 3322 ('Bud Person', 'bperson@dom.ain')]) 3323 3324 def test_getaddresses_comma_in_name(self): 3325 """GH-106669 regression test.""" 3326 self.assertEqual( 3327 utils.getaddresses( 3328 [ 3329 '"Bud, Person" <bperson@dom.ain>', 3330 'aperson@dom.ain (Al Person)', 3331 '"Mariusz Felisiak" <to@example.com>', 3332 ] 3333 ), 3334 [ 3335 ('Bud, Person', 'bperson@dom.ain'), 3336 ('Al Person', 'aperson@dom.ain'), 3337 ('Mariusz Felisiak', 'to@example.com'), 3338 ], 3339 ) 3340 3341 def test_parsing_errors(self): 3342 """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" 3343 alice = 'alice@example.org' 3344 bob = 'bob@example.com' 3345 empty = ('', '') 3346 3347 # Test utils.getaddresses() and utils.parseaddr() on malformed email 3348 # addresses: default behavior (strict=True) rejects malformed address, 3349 # and strict=False which tolerates malformed address. 3350 for invalid_separator, expected_non_strict in ( 3351 ('(', [(f'<{bob}>', alice)]), 3352 (')', [('', alice), empty, ('', bob)]), 3353 ('<', [('', alice), empty, ('', bob), empty]), 3354 ('>', [('', alice), empty, ('', bob)]), 3355 ('[', [('', f'{alice}[<{bob}>]')]), 3356 (']', [('', alice), empty, ('', bob)]), 3357 ('@', [empty, empty, ('', bob)]), 3358 (';', [('', alice), empty, ('', bob)]), 3359 (':', [('', alice), ('', bob)]), 3360 ('.', [('', alice + '.'), ('', bob)]), 3361 ('"', [('', alice), ('', f'<{bob}>')]), 3362 ): 3363 address = f'{alice}{invalid_separator}<{bob}>' 3364 with self.subTest(address=address): 3365 self.assertEqual(utils.getaddresses([address]), 3366 [empty]) 3367 self.assertEqual(utils.getaddresses([address], strict=False), 3368 expected_non_strict) 3369 3370 self.assertEqual(utils.parseaddr([address]), 3371 empty) 3372 self.assertEqual(utils.parseaddr([address], strict=False), 3373 ('', address)) 3374 3375 # Comma (',') is treated differently depending on strict parameter. 3376 # Comma without quotes. 3377 address = f'{alice},<{bob}>' 3378 self.assertEqual(utils.getaddresses([address]), 3379 [('', alice), ('', bob)]) 3380 self.assertEqual(utils.getaddresses([address], strict=False), 3381 [('', alice), ('', bob)]) 3382 self.assertEqual(utils.parseaddr([address]), 3383 empty) 3384 self.assertEqual(utils.parseaddr([address], strict=False), 3385 ('', address)) 3386 3387 # Real name between quotes containing comma. 3388 address = '"Alice, alice@example.org" <bob@example.com>' 3389 expected_strict = ('Alice, alice@example.org', 'bob@example.com') 3390 self.assertEqual(utils.getaddresses([address]), [expected_strict]) 3391 self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) 3392 self.assertEqual(utils.parseaddr([address]), expected_strict) 3393 self.assertEqual(utils.parseaddr([address], strict=False), 3394 ('', address)) 3395 3396 # Valid parenthesis in comments. 3397 address = 'alice@example.org (Alice)' 3398 expected_strict = ('Alice', 'alice@example.org') 3399 self.assertEqual(utils.getaddresses([address]), [expected_strict]) 3400 self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) 3401 self.assertEqual(utils.parseaddr([address]), expected_strict) 3402 self.assertEqual(utils.parseaddr([address], strict=False), 3403 ('', address)) 3404 3405 # Invalid parenthesis in comments. 3406 address = 'alice@example.org )Alice(' 3407 self.assertEqual(utils.getaddresses([address]), [empty]) 3408 self.assertEqual(utils.getaddresses([address], strict=False), 3409 [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) 3410 self.assertEqual(utils.parseaddr([address]), empty) 3411 self.assertEqual(utils.parseaddr([address], strict=False), 3412 ('', address)) 3413 3414 # Two addresses with quotes separated by comma. 3415 address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>' 3416 self.assertEqual(utils.getaddresses([address]), 3417 [('Jane Doe', 'jane@example.net'), 3418 ('John Doe', 'john@example.net')]) 3419 self.assertEqual(utils.getaddresses([address], strict=False), 3420 [('Jane Doe', 'jane@example.net'), 3421 ('John Doe', 'john@example.net')]) 3422 self.assertEqual(utils.parseaddr([address]), empty) 3423 self.assertEqual(utils.parseaddr([address], strict=False), 3424 ('', address)) 3425 3426 # Test email.utils.supports_strict_parsing attribute 3427 self.assertEqual(email.utils.supports_strict_parsing, True) 3428 3429 def test_getaddresses_nasty(self): 3430 for addresses, expected in ( 3431 (['"Sürname, Firstname" <to@example.com>'], 3432 [('Sürname, Firstname', 'to@example.com')]), 3433 3434 (['foo: ;'], 3435 [('', '')]), 3436 3437 (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'], 3438 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), 3439 3440 ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'], 3441 [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), 3442 3443 (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], 3444 [('', '')]), 3445 3446 (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], 3447 [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), 3448 3449 (['John Doe <jdoe@machine(comment). example>'], 3450 [('John Doe (comment)', 'jdoe@machine.example')]), 3451 3452 (['"Mary Smith: Personal Account" <smith@home.example>'], 3453 [('Mary Smith: Personal Account', 'smith@home.example')]), 3454 3455 (['Undisclosed recipients:;'], 3456 [('', '')]), 3457 3458 ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'], 3459 [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), 3460 ): 3461 with self.subTest(addresses=addresses): 3462 self.assertEqual(utils.getaddresses(addresses), 3463 expected) 3464 self.assertEqual(utils.getaddresses(addresses, strict=False), 3465 expected) 3466 3467 addresses = ['[]*-- =~$'] 3468 self.assertEqual(utils.getaddresses(addresses), 3469 [('', '')]) 3470 self.assertEqual(utils.getaddresses(addresses, strict=False), 3471 [('', ''), ('', ''), ('', '*--')]) 3472 3473 def test_getaddresses_embedded_comment(self): 3474 """Test proper handling of a nested comment""" 3475 eq = self.assertEqual 3476 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) 3477 eq(addrs[0][1], 'foo@bar.com') 3478 3479 def test_getaddresses_header_obj(self): 3480 """Test the handling of a Header object.""" 3481 addrs = utils.getaddresses([Header('Al Person <aperson@dom.ain>')]) 3482 self.assertEqual(addrs[0][1], 'aperson@dom.ain') 3483 3484 @threading_helper.requires_working_threading() 3485 def test_make_msgid_collisions(self): 3486 # Test make_msgid uniqueness, even with multiple threads 3487 class MsgidsThread(Thread): 3488 def run(self): 3489 # generate msgids for 3 seconds 3490 self.msgids = [] 3491 append = self.msgids.append 3492 make_msgid = utils.make_msgid 3493 clock = time.monotonic 3494 tfin = clock() + 3.0 3495 while clock() < tfin: 3496 append(make_msgid(domain='testdomain-string')) 3497 3498 threads = [MsgidsThread() for i in range(5)] 3499 with threading_helper.start_threads(threads): 3500 pass 3501 all_ids = sum([t.msgids for t in threads], []) 3502 self.assertEqual(len(set(all_ids)), len(all_ids)) 3503 3504 def test_utils_quote_unquote(self): 3505 eq = self.assertEqual 3506 msg = Message() 3507 msg.add_header('content-disposition', 'attachment', 3508 filename='foo\\wacky"name') 3509 eq(msg.get_filename(), 'foo\\wacky"name') 3510 3511 def test_get_body_encoding_with_bogus_charset(self): 3512 charset = Charset('not a charset') 3513 self.assertEqual(charset.get_body_encoding(), 'base64') 3514 3515 def test_get_body_encoding_with_uppercase_charset(self): 3516 eq = self.assertEqual 3517 msg = Message() 3518 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3519 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3520 charsets = msg.get_charsets() 3521 eq(len(charsets), 1) 3522 eq(charsets[0], 'utf-8') 3523 charset = Charset(charsets[0]) 3524 eq(charset.get_body_encoding(), 'base64') 3525 msg.set_payload(b'hello world', charset=charset) 3526 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3527 eq(msg.get_payload(decode=True), b'hello world') 3528 eq(msg['content-transfer-encoding'], 'base64') 3529 # Try another one 3530 msg = Message() 3531 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3532 charsets = msg.get_charsets() 3533 eq(len(charsets), 1) 3534 eq(charsets[0], 'us-ascii') 3535 charset = Charset(charsets[0]) 3536 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3537 msg.set_payload('hello world', charset=charset) 3538 eq(msg.get_payload(), 'hello world') 3539 eq(msg['content-transfer-encoding'], '7bit') 3540 3541 def test_charsets_case_insensitive(self): 3542 lc = Charset('us-ascii') 3543 uc = Charset('US-ASCII') 3544 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3545 3546 def test_partial_falls_inside_message_delivery_status(self): 3547 eq = self.ndiffAssertEqual 3548 # The Parser interface provides chunks of data to FeedParser in 8192 3549 # byte gulps. SF bug #1076485 found one of those chunks inside 3550 # message/delivery-status header block, which triggered an 3551 # unreadline() of NeedMoreData. 3552 msg = self._msgobj('msg_43.txt') 3553 sfp = StringIO() 3554 iterators._structure(msg, sfp) 3555 eq(sfp.getvalue(), """\ 3556multipart/report 3557 text/plain 3558 message/delivery-status 3559 text/plain 3560 text/plain 3561 text/plain 3562 text/plain 3563 text/plain 3564 text/plain 3565 text/plain 3566 text/plain 3567 text/plain 3568 text/plain 3569 text/plain 3570 text/plain 3571 text/plain 3572 text/plain 3573 text/plain 3574 text/plain 3575 text/plain 3576 text/plain 3577 text/plain 3578 text/plain 3579 text/plain 3580 text/plain 3581 text/plain 3582 text/plain 3583 text/plain 3584 text/plain 3585 text/rfc822-headers 3586""") 3587 3588 def test_make_msgid_domain(self): 3589 self.assertEqual( 3590 email.utils.make_msgid(domain='testdomain-string')[-19:], 3591 '@testdomain-string>') 3592 3593 def test_make_msgid_idstring(self): 3594 self.assertEqual( 3595 email.utils.make_msgid(idstring='test-idstring', 3596 domain='testdomain-string')[-33:], 3597 '.test-idstring@testdomain-string>') 3598 3599 def test_make_msgid_default_domain(self): 3600 with patch('socket.getfqdn') as mock_getfqdn: 3601 mock_getfqdn.return_value = domain = 'pythontest.example.com' 3602 self.assertTrue( 3603 email.utils.make_msgid().endswith( 3604 '@' + domain + '>')) 3605 3606 def test_Generator_linend(self): 3607 # Issue 14645. 3608 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f: 3609 msgtxt = f.read() 3610 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3611 msg = email.message_from_string(msgtxt) 3612 s = StringIO() 3613 g = email.generator.Generator(s) 3614 g.flatten(msg) 3615 self.assertEqual(s.getvalue(), msgtxt_nl) 3616 3617 def test_BytesGenerator_linend(self): 3618 # Issue 14645. 3619 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f: 3620 msgtxt = f.read() 3621 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3622 msg = email.message_from_string(msgtxt_nl) 3623 s = BytesIO() 3624 g = email.generator.BytesGenerator(s) 3625 g.flatten(msg, linesep='\r\n') 3626 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3627 3628 def test_BytesGenerator_linend_with_non_ascii(self): 3629 # Issue 14645. 3630 with openfile('msg_26.txt', 'rb') as f: 3631 msgtxt = f.read() 3632 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3633 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3634 msg = email.message_from_bytes(msgtxt_nl) 3635 s = BytesIO() 3636 g = email.generator.BytesGenerator(s) 3637 g.flatten(msg, linesep='\r\n') 3638 self.assertEqual(s.getvalue(), msgtxt) 3639 3640 def test_mime_classes_policy_argument(self): 3641 with openfile('sndhdr.au', 'rb') as fp: 3642 audiodata = fp.read() 3643 with openfile('python.gif', 'rb') as fp: 3644 bindata = fp.read() 3645 classes = [ 3646 (MIMEApplication, ('',)), 3647 (MIMEAudio, (audiodata,)), 3648 (MIMEImage, (bindata,)), 3649 (MIMEMessage, (Message(),)), 3650 (MIMENonMultipart, ('multipart', 'mixed')), 3651 (MIMEText, ('',)), 3652 ] 3653 for cls, constructor in classes: 3654 with self.subTest(cls=cls.__name__, policy='compat32'): 3655 m = cls(*constructor) 3656 self.assertIs(m.policy, email.policy.compat32) 3657 with self.subTest(cls=cls.__name__, policy='default'): 3658 m = cls(*constructor, policy=email.policy.default) 3659 self.assertIs(m.policy, email.policy.default) 3660 3661 def test_iter_escaped_chars(self): 3662 self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), 3663 [(0, 'a'), 3664 (2, '\\\\'), 3665 (3, 'b'), 3666 (5, '\\"'), 3667 (6, 'c'), 3668 (8, '\\\\'), 3669 (9, '"'), 3670 (10, 'd')]) 3671 self.assertEqual(list(utils._iter_escaped_chars('a\\')), 3672 [(0, 'a'), (1, '\\')]) 3673 3674 def test_strip_quoted_realnames(self): 3675 def check(addr, expected): 3676 self.assertEqual(utils._strip_quoted_realnames(addr), expected) 3677 3678 check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>', 3679 ' <jane@example.net>, <john@example.net>') 3680 check(r'"Jane \"Doe\"." <jane@example.net>', 3681 ' <jane@example.net>') 3682 3683 # special cases 3684 check(r'before"name"after', 'beforeafter') 3685 check(r'before"name"', 'before') 3686 check(r'b"name"', 'b') # single char 3687 check(r'"name"after', 'after') 3688 check(r'"name"a', 'a') # single char 3689 check(r'"name"', '') 3690 3691 # no change 3692 for addr in ( 3693 'Jane Doe <jane@example.net>, John Doe <john@example.net>', 3694 'lone " quote', 3695 ): 3696 self.assertEqual(utils._strip_quoted_realnames(addr), addr) 3697 3698 3699 def test_check_parenthesis(self): 3700 addr = 'alice@example.net' 3701 self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) 3702 self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) 3703 self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) 3704 self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) 3705 3706 # Ignore real name between quotes 3707 self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) 3708 3709# Test the iterator/generators 3710class TestIterators(TestEmailBase): 3711 def test_body_line_iterator(self): 3712 eq = self.assertEqual 3713 neq = self.ndiffAssertEqual 3714 # First a simple non-multipart message 3715 msg = self._msgobj('msg_01.txt') 3716 it = iterators.body_line_iterator(msg) 3717 lines = list(it) 3718 eq(len(lines), 6) 3719 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3720 # Now a more complicated multipart 3721 msg = self._msgobj('msg_02.txt') 3722 it = iterators.body_line_iterator(msg) 3723 lines = list(it) 3724 eq(len(lines), 43) 3725 with openfile('msg_19.txt', encoding="utf-8") as fp: 3726 neq(EMPTYSTRING.join(lines), fp.read()) 3727 3728 def test_typed_subpart_iterator(self): 3729 eq = self.assertEqual 3730 msg = self._msgobj('msg_04.txt') 3731 it = iterators.typed_subpart_iterator(msg, 'text') 3732 lines = [] 3733 subparts = 0 3734 for subpart in it: 3735 subparts += 1 3736 lines.append(subpart.get_payload()) 3737 eq(subparts, 2) 3738 eq(EMPTYSTRING.join(lines), """\ 3739a simple kind of mirror 3740to reflect upon our own 3741a simple kind of mirror 3742to reflect upon our own 3743""") 3744 3745 def test_typed_subpart_iterator_default_type(self): 3746 eq = self.assertEqual 3747 msg = self._msgobj('msg_03.txt') 3748 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3749 lines = [] 3750 subparts = 0 3751 for subpart in it: 3752 subparts += 1 3753 lines.append(subpart.get_payload()) 3754 eq(subparts, 1) 3755 eq(EMPTYSTRING.join(lines), """\ 3756 3757Hi, 3758 3759Do you like this message? 3760 3761-Me 3762""") 3763 3764 def test_pushCR_LF(self): 3765 '''FeedParser BufferedSubFile.push() assumed it received complete 3766 line endings. A CR ending one push() followed by a LF starting 3767 the next push() added an empty line. 3768 ''' 3769 imt = [ 3770 ("a\r \n", 2), 3771 ("b", 0), 3772 ("c\n", 1), 3773 ("", 0), 3774 ("d\r\n", 1), 3775 ("e\r", 0), 3776 ("\nf", 1), 3777 ("\r\n", 1), 3778 ] 3779 from email.feedparser import BufferedSubFile, NeedMoreData 3780 bsf = BufferedSubFile() 3781 om = [] 3782 nt = 0 3783 for il, n in imt: 3784 bsf.push(il) 3785 nt += n 3786 n1 = 0 3787 for ol in iter(bsf.readline, NeedMoreData): 3788 om.append(ol) 3789 n1 += 1 3790 self.assertEqual(n, n1) 3791 self.assertEqual(len(om), nt) 3792 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3793 3794 def test_push_random(self): 3795 from email.feedparser import BufferedSubFile, NeedMoreData 3796 3797 n = 10000 3798 chunksize = 5 3799 chars = 'abcd \t\r\n' 3800 3801 s = ''.join(choice(chars) for i in range(n)) + '\n' 3802 target = s.splitlines(True) 3803 3804 bsf = BufferedSubFile() 3805 lines = [] 3806 for i in range(0, len(s), chunksize): 3807 chunk = s[i:i+chunksize] 3808 bsf.push(chunk) 3809 lines.extend(iter(bsf.readline, NeedMoreData)) 3810 self.assertEqual(lines, target) 3811 3812 3813class TestFeedParsers(TestEmailBase): 3814 3815 def parse(self, chunks): 3816 feedparser = FeedParser() 3817 for chunk in chunks: 3818 feedparser.feed(chunk) 3819 return feedparser.close() 3820 3821 def test_empty_header_name_handled(self): 3822 # Issue 19996 3823 msg = self.parse("First: val\n: bad\nSecond: val") 3824 self.assertEqual(msg['First'], 'val') 3825 self.assertEqual(msg['Second'], 'val') 3826 3827 def test_newlines(self): 3828 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3829 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3830 m = self.parse(['a:\nb:\rc:\r\nd:']) 3831 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3832 m = self.parse(['a:\rb', 'c:\n']) 3833 self.assertEqual(m.keys(), ['a', 'bc']) 3834 m = self.parse(['a:\r', 'b:\n']) 3835 self.assertEqual(m.keys(), ['a', 'b']) 3836 m = self.parse(['a:\r', '\nb:\n']) 3837 self.assertEqual(m.keys(), ['a', 'b']) 3838 3839 # Only CR and LF should break header fields 3840 m = self.parse(['a:\x85b:\u2028c:\n']) 3841 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3842 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3843 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3844 3845 def test_long_lines(self): 3846 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3847 M, N = 1000, 20000 3848 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3849 self.assertEqual(m.items(), [('a', 'b')]) 3850 self.assertEqual(m.get_payload(), 'x'*M*N) 3851 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3852 self.assertEqual(m.items(), [('a', 'b')]) 3853 self.assertEqual(m.get_payload(), 'x'*M*N) 3854 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3855 self.assertEqual(m.items(), [('a', 'b')]) 3856 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3857 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3858 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3859 3860 3861class TestParsers(TestEmailBase): 3862 3863 def test_header_parser(self): 3864 eq = self.assertEqual 3865 # Parse only the headers of a complex multipart MIME document 3866 with openfile('msg_02.txt', encoding="utf-8") as fp: 3867 msg = HeaderParser().parse(fp) 3868 eq(msg['from'], 'ppp-request@zzz.org') 3869 eq(msg['to'], 'ppp@zzz.org') 3870 eq(msg.get_content_type(), 'multipart/mixed') 3871 self.assertFalse(msg.is_multipart()) 3872 self.assertIsInstance(msg.get_payload(), str) 3873 3874 def test_bytes_header_parser(self): 3875 eq = self.assertEqual 3876 # Parse only the headers of a complex multipart MIME document 3877 with openfile('msg_02.txt', 'rb') as fp: 3878 msg = email.parser.BytesHeaderParser().parse(fp) 3879 eq(msg['from'], 'ppp-request@zzz.org') 3880 eq(msg['to'], 'ppp@zzz.org') 3881 eq(msg.get_content_type(), 'multipart/mixed') 3882 self.assertFalse(msg.is_multipart()) 3883 self.assertIsInstance(msg.get_payload(), str) 3884 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3885 3886 def test_bytes_parser_does_not_close_file(self): 3887 with openfile('msg_02.txt', 'rb') as fp: 3888 email.parser.BytesParser().parse(fp) 3889 self.assertFalse(fp.closed) 3890 3891 def test_bytes_parser_on_exception_does_not_close_file(self): 3892 with openfile('msg_15.txt', 'rb') as fp: 3893 bytesParser = email.parser.BytesParser 3894 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3895 bytesParser(policy=email.policy.strict).parse, 3896 fp) 3897 self.assertFalse(fp.closed) 3898 3899 def test_parser_does_not_close_file(self): 3900 with openfile('msg_02.txt', encoding="utf-8") as fp: 3901 email.parser.Parser().parse(fp) 3902 self.assertFalse(fp.closed) 3903 3904 def test_parser_on_exception_does_not_close_file(self): 3905 with openfile('msg_15.txt', encoding="utf-8") as fp: 3906 parser = email.parser.Parser 3907 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3908 parser(policy=email.policy.strict).parse, fp) 3909 self.assertFalse(fp.closed) 3910 3911 def test_whitespace_continuation(self): 3912 eq = self.assertEqual 3913 # This message contains a line after the Subject: header that has only 3914 # whitespace, but it is not empty! 3915 msg = email.message_from_string("""\ 3916From: aperson@dom.ain 3917To: bperson@dom.ain 3918Subject: the next line has a space on it 3919\x20 3920Date: Mon, 8 Apr 2002 15:09:19 -0400 3921Message-ID: spam 3922 3923Here's the message body 3924""") 3925 eq(msg['subject'], 'the next line has a space on it\n ') 3926 eq(msg['message-id'], 'spam') 3927 eq(msg.get_payload(), "Here's the message body\n") 3928 3929 def test_whitespace_continuation_last_header(self): 3930 eq = self.assertEqual 3931 # Like the previous test, but the subject line is the last 3932 # header. 3933 msg = email.message_from_string("""\ 3934From: aperson@dom.ain 3935To: bperson@dom.ain 3936Date: Mon, 8 Apr 2002 15:09:19 -0400 3937Message-ID: spam 3938Subject: the next line has a space on it 3939\x20 3940 3941Here's the message body 3942""") 3943 eq(msg['subject'], 'the next line has a space on it\n ') 3944 eq(msg['message-id'], 'spam') 3945 eq(msg.get_payload(), "Here's the message body\n") 3946 3947 def test_crlf_separation(self): 3948 eq = self.assertEqual 3949 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp: 3950 msg = Parser().parse(fp) 3951 eq(len(msg.get_payload()), 2) 3952 part1 = msg.get_payload(0) 3953 eq(part1.get_content_type(), 'text/plain') 3954 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3955 part2 = msg.get_payload(1) 3956 eq(part2.get_content_type(), 'application/riscos') 3957 3958 def test_crlf_flatten(self): 3959 # Using newline='\n' preserves the crlfs in this input file. 3960 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp: 3961 text = fp.read() 3962 msg = email.message_from_string(text) 3963 s = StringIO() 3964 g = Generator(s) 3965 g.flatten(msg, linesep='\r\n') 3966 self.assertEqual(s.getvalue(), text) 3967 3968 maxDiff = None 3969 3970 def test_multipart_digest_with_extra_mime_headers(self): 3971 eq = self.assertEqual 3972 neq = self.ndiffAssertEqual 3973 with openfile('msg_28.txt', encoding="utf-8") as fp: 3974 msg = email.message_from_file(fp) 3975 # Structure is: 3976 # multipart/digest 3977 # message/rfc822 3978 # text/plain 3979 # message/rfc822 3980 # text/plain 3981 eq(msg.is_multipart(), 1) 3982 eq(len(msg.get_payload()), 2) 3983 part1 = msg.get_payload(0) 3984 eq(part1.get_content_type(), 'message/rfc822') 3985 eq(part1.is_multipart(), 1) 3986 eq(len(part1.get_payload()), 1) 3987 part1a = part1.get_payload(0) 3988 eq(part1a.is_multipart(), 0) 3989 eq(part1a.get_content_type(), 'text/plain') 3990 neq(part1a.get_payload(), 'message 1\n') 3991 # next message/rfc822 3992 part2 = msg.get_payload(1) 3993 eq(part2.get_content_type(), 'message/rfc822') 3994 eq(part2.is_multipart(), 1) 3995 eq(len(part2.get_payload()), 1) 3996 part2a = part2.get_payload(0) 3997 eq(part2a.is_multipart(), 0) 3998 eq(part2a.get_content_type(), 'text/plain') 3999 neq(part2a.get_payload(), 'message 2\n') 4000 4001 def test_three_lines(self): 4002 # A bug report by Andrew McNamara 4003 lines = ['From: Andrew Person <aperson@dom.ain', 4004 'Subject: Test', 4005 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 4006 msg = email.message_from_string(NL.join(lines)) 4007 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 4008 4009 def test_strip_line_feed_and_carriage_return_in_headers(self): 4010 eq = self.assertEqual 4011 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 4012 value1 = 'text' 4013 value2 = 'more text' 4014 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 4015 value1, value2) 4016 msg = email.message_from_string(m) 4017 eq(msg.get('Header'), value1) 4018 eq(msg.get('Next-Header'), value2) 4019 4020 def test_rfc2822_header_syntax(self): 4021 eq = self.assertEqual 4022 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 4023 msg = email.message_from_string(m) 4024 eq(len(msg), 3) 4025 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 4026 eq(msg.get_payload(), 'body') 4027 4028 def test_rfc2822_space_not_allowed_in_header(self): 4029 eq = self.assertEqual 4030 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 4031 msg = email.message_from_string(m) 4032 eq(len(msg.keys()), 0) 4033 4034 def test_rfc2822_one_character_header(self): 4035 eq = self.assertEqual 4036 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 4037 msg = email.message_from_string(m) 4038 headers = msg.keys() 4039 headers.sort() 4040 eq(headers, ['A', 'B', 'CC']) 4041 eq(msg.get_payload(), 'body') 4042 4043 def test_CRLFLF_at_end_of_part(self): 4044 # issue 5610: feedparser should not eat two chars from body part ending 4045 # with "\r\n\n". 4046 m = ( 4047 "From: foo@bar.com\n" 4048 "To: baz\n" 4049 "Mime-Version: 1.0\n" 4050 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 4051 "\n" 4052 "--BOUNDARY\n" 4053 "Content-Type: text/plain\n" 4054 "\n" 4055 "body ending with CRLF newline\r\n" 4056 "\n" 4057 "--BOUNDARY--\n" 4058 ) 4059 msg = email.message_from_string(m) 4060 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 4061 4062 4063class Test8BitBytesHandling(TestEmailBase): 4064 # In Python3 all input is string, but that doesn't work if the actual input 4065 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 4066 # decode byte streams using the surrogateescape error handler, and 4067 # reconvert to binary at appropriate places if we detect surrogates. This 4068 # doesn't allow us to transform headers with 8bit bytes (they get munged), 4069 # but it does allow us to parse and preserve them, and to decode body 4070 # parts that use an 8bit CTE. 4071 4072 bodytest_msg = textwrap.dedent("""\ 4073 From: foo@bar.com 4074 To: baz 4075 Mime-Version: 1.0 4076 Content-Type: text/plain; charset={charset} 4077 Content-Transfer-Encoding: {cte} 4078 4079 {bodyline} 4080 """) 4081 4082 def test_known_8bit_CTE(self): 4083 m = self.bodytest_msg.format(charset='utf-8', 4084 cte='8bit', 4085 bodyline='pöstal').encode('utf-8') 4086 msg = email.message_from_bytes(m) 4087 self.assertEqual(msg.get_payload(), "pöstal\n") 4088 self.assertEqual(msg.get_payload(decode=True), 4089 "pöstal\n".encode('utf-8')) 4090 4091 def test_unknown_8bit_CTE(self): 4092 m = self.bodytest_msg.format(charset='notavalidcharset', 4093 cte='8bit', 4094 bodyline='pöstal').encode('utf-8') 4095 msg = email.message_from_bytes(m) 4096 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 4097 self.assertEqual(msg.get_payload(decode=True), 4098 "pöstal\n".encode('utf-8')) 4099 4100 def test_8bit_in_quopri_body(self): 4101 # This is non-RFC compliant data...without 'decode' the library code 4102 # decodes the body using the charset from the headers, and because the 4103 # source byte really is utf-8 this works. This is likely to fail 4104 # against real dirty data (ie: produce mojibake), but the data is 4105 # invalid anyway so it is as good a guess as any. But this means that 4106 # this test just confirms the current behavior; that behavior is not 4107 # necessarily the best possible behavior. With 'decode' it is 4108 # returning the raw bytes, so that test should be of correct behavior, 4109 # or at least produce the same result that email4 did. 4110 m = self.bodytest_msg.format(charset='utf-8', 4111 cte='quoted-printable', 4112 bodyline='p=C3=B6stál').encode('utf-8') 4113 msg = email.message_from_bytes(m) 4114 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 4115 self.assertEqual(msg.get_payload(decode=True), 4116 'pöstál\n'.encode('utf-8')) 4117 4118 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 4119 # This is similar to the previous test, but proves that if the 8bit 4120 # byte is undecodeable in the specified charset, it gets replaced 4121 # by the unicode 'unknown' character. Again, this may or may not 4122 # be the ideal behavior. Note that if decode=False none of the 4123 # decoders will get involved, so this is the only test we need 4124 # for this behavior. 4125 m = self.bodytest_msg.format(charset='ascii', 4126 cte='quoted-printable', 4127 bodyline='p=C3=B6stál').encode('utf-8') 4128 msg = email.message_from_bytes(m) 4129 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 4130 self.assertEqual(msg.get_payload(decode=True), 4131 'pöstál\n'.encode('utf-8')) 4132 4133 # test_defect_handling:test_invalid_chars_in_base64_payload 4134 def test_8bit_in_base64_body(self): 4135 # If we get 8bit bytes in a base64 body, we can just ignore them 4136 # as being outside the base64 alphabet and decode anyway. But 4137 # we register a defect. 4138 m = self.bodytest_msg.format(charset='utf-8', 4139 cte='base64', 4140 bodyline='cMO2c3RhbAá=').encode('utf-8') 4141 msg = email.message_from_bytes(m) 4142 self.assertEqual(msg.get_payload(decode=True), 4143 'pöstal'.encode('utf-8')) 4144 self.assertIsInstance(msg.defects[0], 4145 errors.InvalidBase64CharactersDefect) 4146 4147 def test_8bit_in_uuencode_body(self): 4148 # Sticking an 8bit byte in a uuencode block makes it undecodable by 4149 # normal means, so the block is returned undecoded, but as bytes. 4150 m = self.bodytest_msg.format(charset='utf-8', 4151 cte='uuencode', 4152 bodyline='<,.V<W1A; á ').encode('utf-8') 4153 msg = email.message_from_bytes(m) 4154 self.assertEqual(msg.get_payload(decode=True), 4155 '<,.V<W1A; á \n'.encode('utf-8')) 4156 4157 4158 headertest_headers = ( 4159 ('From: foo@bar.com', ('From', 'foo@bar.com')), 4160 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 4161 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 4162 '\tJean de Baddie', 4163 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4164 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 4165 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 4166 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 4167 ) 4168 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 4169 '\nYes, they are flying.\n').encode('utf-8') 4170 4171 def test_get_8bit_header(self): 4172 msg = email.message_from_bytes(self.headertest_msg) 4173 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 4174 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 4175 4176 def test_print_8bit_headers(self): 4177 msg = email.message_from_bytes(self.headertest_msg) 4178 self.assertEqual(str(msg), 4179 textwrap.dedent("""\ 4180 From: {} 4181 To: {} 4182 Subject: {} 4183 From: {} 4184 4185 Yes, they are flying. 4186 """).format(*[expected[1] for (_, expected) in 4187 self.headertest_headers])) 4188 4189 def test_values_with_8bit_headers(self): 4190 msg = email.message_from_bytes(self.headertest_msg) 4191 self.assertListEqual([str(x) for x in msg.values()], 4192 ['foo@bar.com', 4193 'b\uFFFD\uFFFDz', 4194 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 4195 'coll\uFFFD\uFFFDgue, le pouf ' 4196 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 4197 '\tJean de Baddie', 4198 "g\uFFFD\uFFFDst"]) 4199 4200 def test_items_with_8bit_headers(self): 4201 msg = email.message_from_bytes(self.headertest_msg) 4202 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 4203 [('From', 'foo@bar.com'), 4204 ('To', 'b\uFFFD\uFFFDz'), 4205 ('Subject', 'Maintenant je vous ' 4206 'pr\uFFFD\uFFFDsente ' 4207 'mon coll\uFFFD\uFFFDgue, le pouf ' 4208 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 4209 '\tJean de Baddie'), 4210 ('From', 'g\uFFFD\uFFFDst')]) 4211 4212 def test_get_all_with_8bit_headers(self): 4213 msg = email.message_from_bytes(self.headertest_msg) 4214 self.assertListEqual([str(x) for x in msg.get_all('from')], 4215 ['foo@bar.com', 4216 'g\uFFFD\uFFFDst']) 4217 4218 def test_get_content_type_with_8bit(self): 4219 msg = email.message_from_bytes(textwrap.dedent("""\ 4220 Content-Type: text/pl\xA7in; charset=utf-8 4221 """).encode('latin-1')) 4222 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 4223 self.assertEqual(msg.get_content_maintype(), "text") 4224 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 4225 4226 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 4227 def test_get_params_with_8bit(self): 4228 msg = email.message_from_bytes( 4229 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 4230 self.assertEqual(msg.get_params(header='x-header'), 4231 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 4232 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 4233 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 4234 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 4235 4236 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 4237 def test_get_rfc2231_params_with_8bit(self): 4238 msg = email.message_from_bytes(textwrap.dedent("""\ 4239 Content-Type: text/plain; charset=us-ascii; 4240 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4241 ).encode('latin-1')) 4242 self.assertEqual(msg.get_param('title'), 4243 ('us-ascii', 'en', 'This is not f\uFFFDn')) 4244 4245 def test_set_rfc2231_params_with_8bit(self): 4246 msg = email.message_from_bytes(textwrap.dedent("""\ 4247 Content-Type: text/plain; charset=us-ascii; 4248 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4249 ).encode('latin-1')) 4250 msg.set_param('title', 'test') 4251 self.assertEqual(msg.get_param('title'), 'test') 4252 4253 def test_del_rfc2231_params_with_8bit(self): 4254 msg = email.message_from_bytes(textwrap.dedent("""\ 4255 Content-Type: text/plain; charset=us-ascii; 4256 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4257 ).encode('latin-1')) 4258 msg.del_param('title') 4259 self.assertEqual(msg.get_param('title'), None) 4260 self.assertEqual(msg.get_content_maintype(), 'text') 4261 4262 def test_get_payload_with_8bit_cte_header(self): 4263 msg = email.message_from_bytes(textwrap.dedent("""\ 4264 Content-Transfer-Encoding: b\xa7se64 4265 Content-Type: text/plain; charset=latin-1 4266 4267 payload 4268 """).encode('latin-1')) 4269 self.assertEqual(msg.get_payload(), 'payload\n') 4270 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 4271 4272 non_latin_bin_msg = textwrap.dedent("""\ 4273 From: foo@bar.com 4274 To: báz 4275 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 4276 \tJean de Baddie 4277 Mime-Version: 1.0 4278 Content-Type: text/plain; charset="utf-8" 4279 Content-Transfer-Encoding: 8bit 4280 4281 Да, они летят. 4282 """).encode('utf-8') 4283 4284 def test_bytes_generator(self): 4285 msg = email.message_from_bytes(self.non_latin_bin_msg) 4286 out = BytesIO() 4287 email.generator.BytesGenerator(out).flatten(msg) 4288 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 4289 4290 def test_bytes_generator_handles_None_body(self): 4291 #Issue 11019 4292 msg = email.message.Message() 4293 out = BytesIO() 4294 email.generator.BytesGenerator(out).flatten(msg) 4295 self.assertEqual(out.getvalue(), b"\n") 4296 4297 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 4298 From: foo@bar.com 4299 To: =?unknown-8bit?q?b=C3=A1z?= 4300 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 4301 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 4302 =?unknown-8bit?q?_Jean_de_Baddie?= 4303 Mime-Version: 1.0 4304 Content-Type: text/plain; charset="utf-8" 4305 Content-Transfer-Encoding: base64 4306 4307 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4308 """) 4309 4310 def test_generator_handles_8bit(self): 4311 msg = email.message_from_bytes(self.non_latin_bin_msg) 4312 out = StringIO() 4313 email.generator.Generator(out).flatten(msg) 4314 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4315 4316 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4317 msg = email.message_from_bytes(self.non_latin_bin_msg) 4318 out = BytesIO() 4319 BytesGenerator(out).flatten(msg) 4320 orig_value = out.getvalue() 4321 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4322 out = BytesIO() 4323 BytesGenerator(out).flatten(msg) 4324 self.assertEqual(out.getvalue(), orig_value) 4325 4326 def test_bytes_generator_with_unix_from(self): 4327 # The unixfrom contains a current date, so we can't check it 4328 # literally. Just make sure the first word is 'From' and the 4329 # rest of the message matches the input. 4330 msg = email.message_from_bytes(self.non_latin_bin_msg) 4331 out = BytesIO() 4332 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4333 lines = out.getvalue().split(b'\n') 4334 self.assertEqual(lines[0].split()[0], b'From') 4335 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4336 4337 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4338 non_latin_bin_msg_as7bit[2:4] = [ 4339 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4340 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4341 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4342 4343 def test_message_from_binary_file(self): 4344 fn = 'test.msg' 4345 self.addCleanup(unlink, fn) 4346 with open(fn, 'wb') as testfile: 4347 testfile.write(self.non_latin_bin_msg) 4348 with open(fn, 'rb') as testfile: 4349 m = email.parser.BytesParser().parse(testfile) 4350 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4351 4352 latin_bin_msg = textwrap.dedent("""\ 4353 From: foo@bar.com 4354 To: Dinsdale 4355 Subject: Nudge nudge, wink, wink 4356 Mime-Version: 1.0 4357 Content-Type: text/plain; charset="latin-1" 4358 Content-Transfer-Encoding: 8bit 4359 4360 oh là là, know what I mean, know what I mean? 4361 """).encode('latin-1') 4362 4363 latin_bin_msg_as7bit = textwrap.dedent("""\ 4364 From: foo@bar.com 4365 To: Dinsdale 4366 Subject: Nudge nudge, wink, wink 4367 Mime-Version: 1.0 4368 Content-Type: text/plain; charset="iso-8859-1" 4369 Content-Transfer-Encoding: quoted-printable 4370 4371 oh l=E0 l=E0, know what I mean, know what I mean? 4372 """) 4373 4374 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4375 m = email.message_from_bytes(self.latin_bin_msg) 4376 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4377 4378 def test_decoded_generator_emits_unicode_body(self): 4379 m = email.message_from_bytes(self.latin_bin_msg) 4380 out = StringIO() 4381 email.generator.DecodedGenerator(out).flatten(m) 4382 #DecodedHeader output contains an extra blank line compared 4383 #to the input message. RDM: not sure if this is a bug or not, 4384 #but it is not specific to the 8bit->7bit conversion. 4385 self.assertEqual(out.getvalue(), 4386 self.latin_bin_msg.decode('latin-1')+'\n') 4387 4388 def test_bytes_feedparser(self): 4389 bfp = email.feedparser.BytesFeedParser() 4390 for i in range(0, len(self.latin_bin_msg), 10): 4391 bfp.feed(self.latin_bin_msg[i:i+10]) 4392 m = bfp.close() 4393 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4394 4395 def test_crlf_flatten(self): 4396 with openfile('msg_26.txt', 'rb') as fp: 4397 text = fp.read() 4398 msg = email.message_from_bytes(text) 4399 s = BytesIO() 4400 g = email.generator.BytesGenerator(s) 4401 g.flatten(msg, linesep='\r\n') 4402 self.assertEqual(s.getvalue(), text) 4403 4404 def test_8bit_multipart(self): 4405 # Issue 11605 4406 source = textwrap.dedent("""\ 4407 Date: Fri, 18 Mar 2011 17:15:43 +0100 4408 To: foo@example.com 4409 From: foodwatch-Newsletter <bar@example.com> 4410 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4411 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> 4412 MIME-Version: 1.0 4413 Content-Type: multipart/alternative; 4414 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4415 4416 --b1_76a486bee62b0d200f33dc2ca08220ad 4417 Content-Type: text/plain; charset="utf-8" 4418 Content-Transfer-Encoding: 8bit 4419 4420 Guten Tag, , 4421 4422 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4423 Nachrichten aus Japan. 4424 4425 4426 --b1_76a486bee62b0d200f33dc2ca08220ad 4427 Content-Type: text/html; charset="utf-8" 4428 Content-Transfer-Encoding: 8bit 4429 4430 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4431 "http://www.w3.org/TR/html4/loose.dtd"> 4432 <html lang="de"> 4433 <head> 4434 <title>foodwatch - Newsletter</title> 4435 </head> 4436 <body> 4437 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4438 die Nachrichten aus Japan.</p> 4439 </body> 4440 </html> 4441 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4442 4443 """).encode('utf-8') 4444 msg = email.message_from_bytes(source) 4445 s = BytesIO() 4446 g = email.generator.BytesGenerator(s) 4447 g.flatten(msg) 4448 self.assertEqual(s.getvalue(), source) 4449 4450 def test_bytes_generator_b_encoding_linesep(self): 4451 # Issue 14062: b encoding was tacking on an extra \n. 4452 m = Message() 4453 # This has enough non-ascii that it should always end up b encoded. 4454 m['Subject'] = Header('žluťoučký kůň') 4455 s = BytesIO() 4456 g = email.generator.BytesGenerator(s) 4457 g.flatten(m, linesep='\r\n') 4458 self.assertEqual( 4459 s.getvalue(), 4460 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4461 4462 def test_generator_b_encoding_linesep(self): 4463 # Since this broke in ByteGenerator, test Generator for completeness. 4464 m = Message() 4465 # This has enough non-ascii that it should always end up b encoded. 4466 m['Subject'] = Header('žluťoučký kůň') 4467 s = StringIO() 4468 g = email.generator.Generator(s) 4469 g.flatten(m, linesep='\r\n') 4470 self.assertEqual( 4471 s.getvalue(), 4472 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4473 4474 maxDiff = None 4475 4476 4477class BaseTestBytesGeneratorIdempotent: 4478 4479 maxDiff = None 4480 4481 def _msgobj(self, filename): 4482 with openfile(filename, 'rb') as fp: 4483 data = fp.read() 4484 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4485 msg = email.message_from_bytes(data) 4486 return msg, data 4487 4488 def _idempotent(self, msg, data, unixfrom=False): 4489 b = BytesIO() 4490 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4491 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4492 self.assertEqual(data, b.getvalue()) 4493 4494 4495class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4496 TestIdempotent): 4497 linesep = '\n' 4498 blinesep = b'\n' 4499 normalize_linesep_regex = re.compile(br'\r\n') 4500 4501 4502class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4503 TestIdempotent): 4504 linesep = '\r\n' 4505 blinesep = b'\r\n' 4506 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4507 4508 4509class TestBase64(unittest.TestCase): 4510 def test_len(self): 4511 eq = self.assertEqual 4512 eq(base64mime.header_length('hello'), 4513 len(base64mime.body_encode(b'hello', eol=''))) 4514 for size in range(15): 4515 if size == 0 : bsize = 0 4516 elif size <= 3 : bsize = 4 4517 elif size <= 6 : bsize = 8 4518 elif size <= 9 : bsize = 12 4519 elif size <= 12: bsize = 16 4520 else : bsize = 20 4521 eq(base64mime.header_length('x' * size), bsize) 4522 4523 def test_decode(self): 4524 eq = self.assertEqual 4525 eq(base64mime.decode(''), b'') 4526 eq(base64mime.decode('aGVsbG8='), b'hello') 4527 4528 def test_encode(self): 4529 eq = self.assertEqual 4530 eq(base64mime.body_encode(b''), '') 4531 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4532 # Test the binary flag 4533 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4534 # Test the maxlinelen arg 4535 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4536eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4537eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4538eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4539eHh4eCB4eHh4IA== 4540""") 4541 # Test the eol argument 4542 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4543 """\ 4544eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4545eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4546eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4547eHh4eCB4eHh4IA==\r 4548""") 4549 4550 def test_header_encode(self): 4551 eq = self.assertEqual 4552 he = base64mime.header_encode 4553 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4554 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4555 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4556 # Test the charset option 4557 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4558 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4559 4560 4561class TestQuopri(unittest.TestCase): 4562 def setUp(self): 4563 # Set of characters (as byte integers) that don't need to be encoded 4564 # in headers. 4565 self.hlit = list(chain( 4566 range(ord('a'), ord('z') + 1), 4567 range(ord('A'), ord('Z') + 1), 4568 range(ord('0'), ord('9') + 1), 4569 (c for c in b'!*+-/'))) 4570 # Set of characters (as byte integers) that do need to be encoded in 4571 # headers. 4572 self.hnon = [c for c in range(256) if c not in self.hlit] 4573 assert len(self.hlit) + len(self.hnon) == 256 4574 # Set of characters (as byte integers) that don't need to be encoded 4575 # in bodies. 4576 self.blit = list(range(ord(' '), ord('~') + 1)) 4577 self.blit.append(ord('\t')) 4578 self.blit.remove(ord('=')) 4579 # Set of characters (as byte integers) that do need to be encoded in 4580 # bodies. 4581 self.bnon = [c for c in range(256) if c not in self.blit] 4582 assert len(self.blit) + len(self.bnon) == 256 4583 4584 def test_quopri_header_check(self): 4585 for c in self.hlit: 4586 self.assertFalse(quoprimime.header_check(c), 4587 'Should not be header quopri encoded: %s' % chr(c)) 4588 for c in self.hnon: 4589 self.assertTrue(quoprimime.header_check(c), 4590 'Should be header quopri encoded: %s' % chr(c)) 4591 4592 def test_quopri_body_check(self): 4593 for c in self.blit: 4594 self.assertFalse(quoprimime.body_check(c), 4595 'Should not be body quopri encoded: %s' % chr(c)) 4596 for c in self.bnon: 4597 self.assertTrue(quoprimime.body_check(c), 4598 'Should be body quopri encoded: %s' % chr(c)) 4599 4600 def test_header_quopri_len(self): 4601 eq = self.assertEqual 4602 eq(quoprimime.header_length(b'hello'), 5) 4603 # RFC 2047 chrome is not included in header_length(). 4604 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4605 quoprimime.header_length(b'hello') + 4606 # =?xxx?q?...?= means 10 extra characters 4607 10) 4608 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4609 # RFC 2047 chrome is not included in header_length(). 4610 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4611 quoprimime.header_length(b'h@e@l@l@o@') + 4612 # =?xxx?q?...?= means 10 extra characters 4613 10) 4614 for c in self.hlit: 4615 eq(quoprimime.header_length(bytes([c])), 1, 4616 'expected length 1 for %r' % chr(c)) 4617 for c in self.hnon: 4618 # Space is special; it's encoded to _ 4619 if c == ord(' '): 4620 continue 4621 eq(quoprimime.header_length(bytes([c])), 3, 4622 'expected length 3 for %r' % chr(c)) 4623 eq(quoprimime.header_length(b' '), 1) 4624 4625 def test_body_quopri_len(self): 4626 eq = self.assertEqual 4627 for c in self.blit: 4628 eq(quoprimime.body_length(bytes([c])), 1) 4629 for c in self.bnon: 4630 eq(quoprimime.body_length(bytes([c])), 3) 4631 4632 def test_quote_unquote_idempotent(self): 4633 for x in range(256): 4634 c = chr(x) 4635 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4636 4637 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4638 if charset is None: 4639 encoded_header = quoprimime.header_encode(header) 4640 else: 4641 encoded_header = quoprimime.header_encode(header, charset) 4642 self.assertEqual(encoded_header, expected_encoded_header) 4643 4644 def test_header_encode_null(self): 4645 self._test_header_encode(b'', '') 4646 4647 def test_header_encode_one_word(self): 4648 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4649 4650 def test_header_encode_two_lines(self): 4651 self._test_header_encode(b'hello\nworld', 4652 '=?iso-8859-1?q?hello=0Aworld?=') 4653 4654 def test_header_encode_non_ascii(self): 4655 self._test_header_encode(b'hello\xc7there', 4656 '=?iso-8859-1?q?hello=C7there?=') 4657 4658 def test_header_encode_alt_charset(self): 4659 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4660 charset='iso-8859-2') 4661 4662 def _test_header_decode(self, encoded_header, expected_decoded_header): 4663 decoded_header = quoprimime.header_decode(encoded_header) 4664 self.assertEqual(decoded_header, expected_decoded_header) 4665 4666 def test_header_decode_null(self): 4667 self._test_header_decode('', '') 4668 4669 def test_header_decode_one_word(self): 4670 self._test_header_decode('hello', 'hello') 4671 4672 def test_header_decode_two_lines(self): 4673 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4674 4675 def test_header_decode_non_ascii(self): 4676 self._test_header_decode('hello=C7there', 'hello\xc7there') 4677 4678 def test_header_decode_re_bug_18380(self): 4679 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4680 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4681 4682 def _test_decode(self, encoded, expected_decoded, eol=None): 4683 if eol is None: 4684 decoded = quoprimime.decode(encoded) 4685 else: 4686 decoded = quoprimime.decode(encoded, eol=eol) 4687 self.assertEqual(decoded, expected_decoded) 4688 4689 def test_decode_null_word(self): 4690 self._test_decode('', '') 4691 4692 def test_decode_null_line_null_word(self): 4693 self._test_decode('\r\n', '\n') 4694 4695 def test_decode_one_word(self): 4696 self._test_decode('hello', 'hello') 4697 4698 def test_decode_one_word_eol(self): 4699 self._test_decode('hello', 'hello', eol='X') 4700 4701 def test_decode_one_line(self): 4702 self._test_decode('hello\r\n', 'hello\n') 4703 4704 def test_decode_one_line_lf(self): 4705 self._test_decode('hello\n', 'hello\n') 4706 4707 def test_decode_one_line_cr(self): 4708 self._test_decode('hello\r', 'hello\n') 4709 4710 def test_decode_one_line_nl(self): 4711 self._test_decode('hello\n', 'helloX', eol='X') 4712 4713 def test_decode_one_line_crnl(self): 4714 self._test_decode('hello\r\n', 'helloX', eol='X') 4715 4716 def test_decode_one_line_one_word(self): 4717 self._test_decode('hello\r\nworld', 'hello\nworld') 4718 4719 def test_decode_one_line_one_word_eol(self): 4720 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4721 4722 def test_decode_two_lines(self): 4723 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4724 4725 def test_decode_two_lines_eol(self): 4726 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4727 4728 def test_decode_one_long_line(self): 4729 self._test_decode('Spam' * 250, 'Spam' * 250) 4730 4731 def test_decode_one_space(self): 4732 self._test_decode(' ', '') 4733 4734 def test_decode_multiple_spaces(self): 4735 self._test_decode(' ' * 5, '') 4736 4737 def test_decode_one_line_trailing_spaces(self): 4738 self._test_decode('hello \r\n', 'hello\n') 4739 4740 def test_decode_two_lines_trailing_spaces(self): 4741 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4742 4743 def test_decode_quoted_word(self): 4744 self._test_decode('=22quoted=20words=22', '"quoted words"') 4745 4746 def test_decode_uppercase_quoting(self): 4747 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4748 4749 def test_decode_lowercase_quoting(self): 4750 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4751 4752 def test_decode_soft_line_break(self): 4753 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4754 4755 def test_decode_false_quoting(self): 4756 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4757 4758 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4759 kwargs = {} 4760 if maxlinelen is None: 4761 # Use body_encode's default. 4762 maxlinelen = 76 4763 else: 4764 kwargs['maxlinelen'] = maxlinelen 4765 if eol is None: 4766 # Use body_encode's default. 4767 eol = '\n' 4768 else: 4769 kwargs['eol'] = eol 4770 encoded_body = quoprimime.body_encode(body, **kwargs) 4771 self.assertEqual(encoded_body, expected_encoded_body) 4772 if eol == '\n' or eol == '\r\n': 4773 # We know how to split the result back into lines, so maxlinelen 4774 # can be checked. 4775 for line in encoded_body.splitlines(): 4776 self.assertLessEqual(len(line), maxlinelen) 4777 4778 def test_encode_null(self): 4779 self._test_encode('', '') 4780 4781 def test_encode_null_lines(self): 4782 self._test_encode('\n\n', '\n\n') 4783 4784 def test_encode_one_line(self): 4785 self._test_encode('hello\n', 'hello\n') 4786 4787 def test_encode_one_line_crlf(self): 4788 self._test_encode('hello\r\n', 'hello\n') 4789 4790 def test_encode_one_line_eol(self): 4791 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4792 4793 def test_encode_one_line_eol_after_non_ascii(self): 4794 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4795 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4796 'hello=CF=85\r\n', eol='\r\n') 4797 4798 def test_encode_one_space(self): 4799 self._test_encode(' ', '=20') 4800 4801 def test_encode_one_line_one_space(self): 4802 self._test_encode(' \n', '=20\n') 4803 4804# XXX: body_encode() expect strings, but uses ord(char) from these strings 4805# to index into a 256-entry list. For code points above 255, this will fail. 4806# Should there be a check for 8-bit only ord() values in body, or at least 4807# a comment about the expected input? 4808 4809 def test_encode_two_lines_one_space(self): 4810 self._test_encode(' \n \n', '=20\n=20\n') 4811 4812 def test_encode_one_word_trailing_spaces(self): 4813 self._test_encode('hello ', 'hello =20') 4814 4815 def test_encode_one_line_trailing_spaces(self): 4816 self._test_encode('hello \n', 'hello =20\n') 4817 4818 def test_encode_one_word_trailing_tab(self): 4819 self._test_encode('hello \t', 'hello =09') 4820 4821 def test_encode_one_line_trailing_tab(self): 4822 self._test_encode('hello \t\n', 'hello =09\n') 4823 4824 def test_encode_trailing_space_before_maxlinelen(self): 4825 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4826 4827 def test_encode_trailing_space_at_maxlinelen(self): 4828 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4829 4830 def test_encode_trailing_space_beyond_maxlinelen(self): 4831 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4832 4833 def test_encode_whitespace_lines(self): 4834 self._test_encode(' \n' * 5, '=20\n' * 5) 4835 4836 def test_encode_quoted_equals(self): 4837 self._test_encode('a = b', 'a =3D b') 4838 4839 def test_encode_one_long_string(self): 4840 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4841 4842 def test_encode_one_long_line(self): 4843 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4844 4845 def test_encode_one_very_long_line(self): 4846 self._test_encode('x' * 200 + '\n', 4847 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4848 4849 def test_encode_shortest_maxlinelen(self): 4850 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4851 4852 def test_encode_maxlinelen_too_small(self): 4853 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4854 4855 def test_encode(self): 4856 eq = self.assertEqual 4857 eq(quoprimime.body_encode(''), '') 4858 eq(quoprimime.body_encode('hello'), 'hello') 4859 # Test the binary flag 4860 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4861 # Test the maxlinelen arg 4862 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4863xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4864 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4865x xxxx xxxx xxxx xxxx=20""") 4866 # Test the eol argument 4867 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4868 """\ 4869xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4870 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4871x xxxx xxxx xxxx xxxx=20""") 4872 eq(quoprimime.body_encode("""\ 4873one line 4874 4875two line"""), """\ 4876one line 4877 4878two line""") 4879 4880 4881 4882# Test the Charset class 4883class TestCharset(unittest.TestCase): 4884 def tearDown(self): 4885 from email import charset as CharsetModule 4886 try: 4887 del CharsetModule.CHARSETS['fake'] 4888 except KeyError: 4889 pass 4890 4891 def test_codec_encodeable(self): 4892 eq = self.assertEqual 4893 # Make sure us-ascii = no Unicode conversion 4894 c = Charset('us-ascii') 4895 eq(c.header_encode('Hello World!'), 'Hello World!') 4896 # Test 8-bit idempotency with us-ascii 4897 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4898 self.assertRaises(UnicodeError, c.header_encode, s) 4899 c = Charset('utf-8') 4900 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4901 4902 def test_body_encode(self): 4903 eq = self.assertEqual 4904 # Try a charset with QP body encoding 4905 c = Charset('iso-8859-1') 4906 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4907 # Try a charset with Base64 body encoding 4908 c = Charset('utf-8') 4909 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4910 # Try a charset with None body encoding 4911 c = Charset('us-ascii') 4912 eq('hello world', c.body_encode('hello world')) 4913 # Try the convert argument, where input codec != output codec 4914 c = Charset('euc-jp') 4915 # With apologies to Tokio Kikuchi ;) 4916 # XXX FIXME 4917## try: 4918## eq('\x1b$B5FCO;~IW\x1b(B', 4919## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4920## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4921## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4922## except LookupError: 4923## # We probably don't have the Japanese codecs installed 4924## pass 4925 # Testing SF bug #625509, which we have to fake, since there are no 4926 # built-in encodings where the header encoding is QP but the body 4927 # encoding is not. 4928 from email import charset as CharsetModule 4929 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4930 c = Charset('fake') 4931 eq('hello world', c.body_encode('hello world')) 4932 4933 def test_unicode_charset_name(self): 4934 charset = Charset('us-ascii') 4935 self.assertEqual(str(charset), 'us-ascii') 4936 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4937 4938 4939 4940# Test multilingual MIME headers. 4941class TestHeader(TestEmailBase): 4942 def test_simple(self): 4943 eq = self.ndiffAssertEqual 4944 h = Header('Hello World!') 4945 eq(h.encode(), 'Hello World!') 4946 h.append(' Goodbye World!') 4947 eq(h.encode(), 'Hello World! Goodbye World!') 4948 4949 def test_simple_surprise(self): 4950 eq = self.ndiffAssertEqual 4951 h = Header('Hello World!') 4952 eq(h.encode(), 'Hello World!') 4953 h.append('Goodbye World!') 4954 eq(h.encode(), 'Hello World! Goodbye World!') 4955 4956 def test_header_needs_no_decoding(self): 4957 h = 'no decoding needed' 4958 self.assertEqual(decode_header(h), [(h, None)]) 4959 4960 def test_long(self): 4961 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4962 maxlinelen=76) 4963 for l in h.encode(splitchars=' ').split('\n '): 4964 self.assertLessEqual(len(l), 76) 4965 4966 def test_multilingual(self): 4967 eq = self.ndiffAssertEqual 4968 g = Charset("iso-8859-1") 4969 cz = Charset("iso-8859-2") 4970 utf8 = Charset("utf-8") 4971 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4972 b'Foerderband komfortabel den Korridor entlang, ' 4973 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4974 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4975 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4976 b'd\xf9vtipu.. ') 4977 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4978 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4979 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4980 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4981 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4982 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4983 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4984 '\u3044\u307e\u3059\u3002') 4985 h = Header(g_head, g) 4986 h.append(cz_head, cz) 4987 h.append(utf8_head, utf8) 4988 enc = h.encode(maxlinelen=76) 4989 eq(enc, """\ 4990=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4991 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4992 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4993 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4994 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4995 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4996 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4997 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4998 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4999 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 5000 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 5001 decoded = decode_header(enc) 5002 eq(len(decoded), 3) 5003 eq(decoded[0], (g_head, 'iso-8859-1')) 5004 eq(decoded[1], (cz_head, 'iso-8859-2')) 5005 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 5006 ustr = str(h) 5007 eq(ustr, 5008 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 5009 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 5010 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 5011 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 5012 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 5013 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 5014 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 5015 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 5016 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 5017 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 5018 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 5019 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 5020 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 5021 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 5022 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 5023 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 5024 ).decode('utf-8')) 5025 # Test make_header() 5026 newh = make_header(decode_header(enc)) 5027 eq(newh, h) 5028 5029 def test_empty_header_encode(self): 5030 h = Header() 5031 self.assertEqual(h.encode(), '') 5032 5033 def test_header_ctor_default_args(self): 5034 eq = self.ndiffAssertEqual 5035 h = Header() 5036 eq(h, '') 5037 h.append('foo', Charset('iso-8859-1')) 5038 eq(h, 'foo') 5039 5040 def test_explicit_maxlinelen(self): 5041 eq = self.ndiffAssertEqual 5042 hstr = ('A very long line that must get split to something other ' 5043 'than at the 76th character boundary to test the non-default ' 5044 'behavior') 5045 h = Header(hstr) 5046 eq(h.encode(), '''\ 5047A very long line that must get split to something other than at the 76th 5048 character boundary to test the non-default behavior''') 5049 eq(str(h), hstr) 5050 h = Header(hstr, header_name='Subject') 5051 eq(h.encode(), '''\ 5052A very long line that must get split to something other than at the 5053 76th character boundary to test the non-default behavior''') 5054 eq(str(h), hstr) 5055 h = Header(hstr, maxlinelen=1024, header_name='Subject') 5056 eq(h.encode(), hstr) 5057 eq(str(h), hstr) 5058 5059 def test_quopri_splittable(self): 5060 eq = self.ndiffAssertEqual 5061 h = Header(charset='iso-8859-1', maxlinelen=20) 5062 x = 'xxxx ' * 20 5063 h.append(x) 5064 s = h.encode() 5065 eq(s, """\ 5066=?iso-8859-1?q?xxx?= 5067 =?iso-8859-1?q?x_?= 5068 =?iso-8859-1?q?xx?= 5069 =?iso-8859-1?q?xx?= 5070 =?iso-8859-1?q?_x?= 5071 =?iso-8859-1?q?xx?= 5072 =?iso-8859-1?q?x_?= 5073 =?iso-8859-1?q?xx?= 5074 =?iso-8859-1?q?xx?= 5075 =?iso-8859-1?q?_x?= 5076 =?iso-8859-1?q?xx?= 5077 =?iso-8859-1?q?x_?= 5078 =?iso-8859-1?q?xx?= 5079 =?iso-8859-1?q?xx?= 5080 =?iso-8859-1?q?_x?= 5081 =?iso-8859-1?q?xx?= 5082 =?iso-8859-1?q?x_?= 5083 =?iso-8859-1?q?xx?= 5084 =?iso-8859-1?q?xx?= 5085 =?iso-8859-1?q?_x?= 5086 =?iso-8859-1?q?xx?= 5087 =?iso-8859-1?q?x_?= 5088 =?iso-8859-1?q?xx?= 5089 =?iso-8859-1?q?xx?= 5090 =?iso-8859-1?q?_x?= 5091 =?iso-8859-1?q?xx?= 5092 =?iso-8859-1?q?x_?= 5093 =?iso-8859-1?q?xx?= 5094 =?iso-8859-1?q?xx?= 5095 =?iso-8859-1?q?_x?= 5096 =?iso-8859-1?q?xx?= 5097 =?iso-8859-1?q?x_?= 5098 =?iso-8859-1?q?xx?= 5099 =?iso-8859-1?q?xx?= 5100 =?iso-8859-1?q?_x?= 5101 =?iso-8859-1?q?xx?= 5102 =?iso-8859-1?q?x_?= 5103 =?iso-8859-1?q?xx?= 5104 =?iso-8859-1?q?xx?= 5105 =?iso-8859-1?q?_x?= 5106 =?iso-8859-1?q?xx?= 5107 =?iso-8859-1?q?x_?= 5108 =?iso-8859-1?q?xx?= 5109 =?iso-8859-1?q?xx?= 5110 =?iso-8859-1?q?_x?= 5111 =?iso-8859-1?q?xx?= 5112 =?iso-8859-1?q?x_?= 5113 =?iso-8859-1?q?xx?= 5114 =?iso-8859-1?q?xx?= 5115 =?iso-8859-1?q?_?=""") 5116 eq(x, str(make_header(decode_header(s)))) 5117 h = Header(charset='iso-8859-1', maxlinelen=40) 5118 h.append('xxxx ' * 20) 5119 s = h.encode() 5120 eq(s, """\ 5121=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 5122 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 5123 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 5124 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 5125 =?iso-8859-1?q?_xxxx_xxxx_?=""") 5126 eq(x, str(make_header(decode_header(s)))) 5127 5128 def test_base64_splittable(self): 5129 eq = self.ndiffAssertEqual 5130 h = Header(charset='koi8-r', maxlinelen=20) 5131 x = 'xxxx ' * 20 5132 h.append(x) 5133 s = h.encode() 5134 eq(s, """\ 5135=?koi8-r?b?eHh4?= 5136 =?koi8-r?b?eCB4?= 5137 =?koi8-r?b?eHh4?= 5138 =?koi8-r?b?IHh4?= 5139 =?koi8-r?b?eHgg?= 5140 =?koi8-r?b?eHh4?= 5141 =?koi8-r?b?eCB4?= 5142 =?koi8-r?b?eHh4?= 5143 =?koi8-r?b?IHh4?= 5144 =?koi8-r?b?eHgg?= 5145 =?koi8-r?b?eHh4?= 5146 =?koi8-r?b?eCB4?= 5147 =?koi8-r?b?eHh4?= 5148 =?koi8-r?b?IHh4?= 5149 =?koi8-r?b?eHgg?= 5150 =?koi8-r?b?eHh4?= 5151 =?koi8-r?b?eCB4?= 5152 =?koi8-r?b?eHh4?= 5153 =?koi8-r?b?IHh4?= 5154 =?koi8-r?b?eHgg?= 5155 =?koi8-r?b?eHh4?= 5156 =?koi8-r?b?eCB4?= 5157 =?koi8-r?b?eHh4?= 5158 =?koi8-r?b?IHh4?= 5159 =?koi8-r?b?eHgg?= 5160 =?koi8-r?b?eHh4?= 5161 =?koi8-r?b?eCB4?= 5162 =?koi8-r?b?eHh4?= 5163 =?koi8-r?b?IHh4?= 5164 =?koi8-r?b?eHgg?= 5165 =?koi8-r?b?eHh4?= 5166 =?koi8-r?b?eCB4?= 5167 =?koi8-r?b?eHh4?= 5168 =?koi8-r?b?IA==?=""") 5169 eq(x, str(make_header(decode_header(s)))) 5170 h = Header(charset='koi8-r', maxlinelen=40) 5171 h.append(x) 5172 s = h.encode() 5173 eq(s, """\ 5174=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 5175 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 5176 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 5177 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 5178 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 5179 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 5180 eq(x, str(make_header(decode_header(s)))) 5181 5182 def test_us_ascii_header(self): 5183 eq = self.assertEqual 5184 s = 'hello' 5185 x = decode_header(s) 5186 eq(x, [('hello', None)]) 5187 h = make_header(x) 5188 eq(s, h.encode()) 5189 5190 def test_string_charset(self): 5191 eq = self.assertEqual 5192 h = Header() 5193 h.append('hello', 'iso-8859-1') 5194 eq(h, 'hello') 5195 5196## def test_unicode_error(self): 5197## raises = self.assertRaises 5198## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 5199## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 5200## h = Header() 5201## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 5202## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 5203## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 5204 5205 def test_utf8_shortest(self): 5206 eq = self.assertEqual 5207 h = Header('p\xf6stal', 'utf-8') 5208 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 5209 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 5210 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 5211 5212 def test_bad_8bit_header(self): 5213 raises = self.assertRaises 5214 eq = self.assertEqual 5215 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5216 raises(UnicodeError, Header, x) 5217 h = Header() 5218 raises(UnicodeError, h.append, x) 5219 e = x.decode('utf-8', 'replace') 5220 eq(str(Header(x, errors='replace')), e) 5221 h.append(x, errors='replace') 5222 eq(str(h), e) 5223 5224 def test_escaped_8bit_header(self): 5225 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5226 e = x.decode('ascii', 'surrogateescape') 5227 h = Header(e, charset=email.charset.UNKNOWN8BIT) 5228 self.assertEqual(str(h), 5229 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5230 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 5231 5232 def test_header_handles_binary_unknown8bit(self): 5233 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5234 h = Header(x, charset=email.charset.UNKNOWN8BIT) 5235 self.assertEqual(str(h), 5236 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5237 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 5238 5239 def test_make_header_handles_binary_unknown8bit(self): 5240 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5241 h = Header(x, charset=email.charset.UNKNOWN8BIT) 5242 h2 = email.header.make_header(email.header.decode_header(h)) 5243 self.assertEqual(str(h2), 5244 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5245 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 5246 5247 def test_modify_returned_list_does_not_change_header(self): 5248 h = Header('test') 5249 chunks = email.header.decode_header(h) 5250 chunks.append(('ascii', 'test2')) 5251 self.assertEqual(str(h), 'test') 5252 5253 def test_encoded_adjacent_nonencoded(self): 5254 eq = self.assertEqual 5255 h = Header() 5256 h.append('hello', 'iso-8859-1') 5257 h.append('world') 5258 s = h.encode() 5259 eq(s, '=?iso-8859-1?q?hello?= world') 5260 h = make_header(decode_header(s)) 5261 eq(h.encode(), s) 5262 5263 def test_whitespace_keeper(self): 5264 eq = self.assertEqual 5265 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 5266 parts = decode_header(s) 5267 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 5268 hdr = make_header(parts) 5269 eq(hdr.encode(), 5270 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 5271 5272 def test_broken_base64_header(self): 5273 raises = self.assertRaises 5274 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 5275 raises(errors.HeaderParseError, decode_header, s) 5276 5277 def test_shift_jis_charset(self): 5278 h = Header('文', charset='shift_jis') 5279 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 5280 5281 def test_flatten_header_with_no_value(self): 5282 # Issue 11401 (regression from email 4.x) Note that the space after 5283 # the header doesn't reflect the input, but this is also the way 5284 # email 4.x behaved. At some point it would be nice to fix that. 5285 msg = email.message_from_string("EmptyHeader:") 5286 self.assertEqual(str(msg), "EmptyHeader: \n\n") 5287 5288 def test_encode_preserves_leading_ws_on_value(self): 5289 msg = Message() 5290 msg['SomeHeader'] = ' value with leading ws' 5291 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 5292 5293 def test_whitespace_header(self): 5294 self.assertEqual(Header(' ').encode(), ' ') 5295 5296 5297 5298# Test RFC 2231 header parameters (en/de)coding 5299class TestRFC2231(TestEmailBase): 5300 5301 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5302 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5303 def test_get_param(self): 5304 eq = self.assertEqual 5305 msg = self._msgobj('msg_29.txt') 5306 eq(msg.get_param('title'), 5307 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5308 eq(msg.get_param('title', unquote=False), 5309 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5310 5311 def test_set_param(self): 5312 eq = self.ndiffAssertEqual 5313 msg = Message() 5314 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5315 charset='us-ascii') 5316 eq(msg.get_param('title'), 5317 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5318 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5319 charset='us-ascii', language='en') 5320 eq(msg.get_param('title'), 5321 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5322 msg = self._msgobj('msg_01.txt') 5323 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5324 charset='us-ascii', language='en') 5325 eq(msg.as_string(maxheaderlen=78), """\ 5326Return-Path: <bbb@zzz.org> 5327Delivered-To: bbb@zzz.org 5328Received: by mail.zzz.org (Postfix, from userid 889) 5329\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5330MIME-Version: 1.0 5331Content-Transfer-Encoding: 7bit 5332Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5333From: bbb@ddd.com (John X. Doe) 5334To: bbb@zzz.org 5335Subject: This is a test message 5336Date: Fri, 4 May 2001 14:05:44 -0400 5337Content-Type: text/plain; charset=us-ascii; 5338 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5339 5340 5341Hi, 5342 5343Do you like this message? 5344 5345-Me 5346""") 5347 5348 def test_set_param_requote(self): 5349 msg = Message() 5350 msg.set_param('title', 'foo') 5351 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5352 msg.set_param('title', 'bar', requote=False) 5353 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5354 # tspecial is still quoted. 5355 msg.set_param('title', "(bar)bell", requote=False) 5356 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5357 5358 def test_del_param(self): 5359 eq = self.ndiffAssertEqual 5360 msg = self._msgobj('msg_01.txt') 5361 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5362 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5363 charset='us-ascii', language='en') 5364 msg.del_param('foo', header='Content-Type') 5365 eq(msg.as_string(maxheaderlen=78), """\ 5366Return-Path: <bbb@zzz.org> 5367Delivered-To: bbb@zzz.org 5368Received: by mail.zzz.org (Postfix, from userid 889) 5369\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5370MIME-Version: 1.0 5371Content-Transfer-Encoding: 7bit 5372Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5373From: bbb@ddd.com (John X. Doe) 5374To: bbb@zzz.org 5375Subject: This is a test message 5376Date: Fri, 4 May 2001 14:05:44 -0400 5377Content-Type: text/plain; charset="us-ascii"; 5378 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5379 5380 5381Hi, 5382 5383Do you like this message? 5384 5385-Me 5386""") 5387 5388 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5389 # I changed the charset name, though, because the one in the file isn't 5390 # a legal charset name. Should add a test for an illegal charset. 5391 def test_rfc2231_get_content_charset(self): 5392 eq = self.assertEqual 5393 msg = self._msgobj('msg_32.txt') 5394 eq(msg.get_content_charset(), 'us-ascii') 5395 5396 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5397 def test_rfc2231_parse_rfc_quoting(self): 5398 m = textwrap.dedent('''\ 5399 Content-Disposition: inline; 5400 \tfilename*0*=''This%20is%20even%20more%20; 5401 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5402 \tfilename*2="is it not.pdf" 5403 5404 ''') 5405 msg = email.message_from_string(m) 5406 self.assertEqual(msg.get_filename(), 5407 'This is even more ***fun*** is it not.pdf') 5408 self.assertEqual(m, msg.as_string()) 5409 5410 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5411 def test_rfc2231_parse_extra_quoting(self): 5412 m = textwrap.dedent('''\ 5413 Content-Disposition: inline; 5414 \tfilename*0*="''This%20is%20even%20more%20"; 5415 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5416 \tfilename*2="is it not.pdf" 5417 5418 ''') 5419 msg = email.message_from_string(m) 5420 self.assertEqual(msg.get_filename(), 5421 'This is even more ***fun*** is it not.pdf') 5422 self.assertEqual(m, msg.as_string()) 5423 5424 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5425 # but new test uses *0* because otherwise lang/charset is not valid. 5426 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5427 def test_rfc2231_no_language_or_charset(self): 5428 m = '''\ 5429Content-Transfer-Encoding: 8bit 5430Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5431Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5432 5433''' 5434 msg = email.message_from_string(m) 5435 param = msg.get_param('NAME') 5436 self.assertNotIsInstance(param, tuple) 5437 self.assertEqual( 5438 param, 5439 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5440 5441 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5442 def test_rfc2231_no_language_or_charset_in_filename(self): 5443 m = '''\ 5444Content-Disposition: inline; 5445\tfilename*0*="''This%20is%20even%20more%20"; 5446\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5447\tfilename*2="is it not.pdf" 5448 5449''' 5450 msg = email.message_from_string(m) 5451 self.assertEqual(msg.get_filename(), 5452 'This is even more ***fun*** is it not.pdf') 5453 5454 # Duplicate of previous test? 5455 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5456 m = '''\ 5457Content-Disposition: inline; 5458\tfilename*0*="''This%20is%20even%20more%20"; 5459\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5460\tfilename*2="is it not.pdf" 5461 5462''' 5463 msg = email.message_from_string(m) 5464 self.assertEqual(msg.get_filename(), 5465 'This is even more ***fun*** is it not.pdf') 5466 5467 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5468 # but the test below is wrong (the first part should be decoded). 5469 def test_rfc2231_partly_encoded(self): 5470 m = '''\ 5471Content-Disposition: inline; 5472\tfilename*0="''This%20is%20even%20more%20"; 5473\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5474\tfilename*2="is it not.pdf" 5475 5476''' 5477 msg = email.message_from_string(m) 5478 self.assertEqual( 5479 msg.get_filename(), 5480 'This%20is%20even%20more%20***fun*** is it not.pdf') 5481 5482 def test_rfc2231_partly_nonencoded(self): 5483 m = '''\ 5484Content-Disposition: inline; 5485\tfilename*0="This%20is%20even%20more%20"; 5486\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5487\tfilename*2="is it not.pdf" 5488 5489''' 5490 msg = email.message_from_string(m) 5491 self.assertEqual( 5492 msg.get_filename(), 5493 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5494 5495 def test_rfc2231_no_language_or_charset_in_boundary(self): 5496 m = '''\ 5497Content-Type: multipart/alternative; 5498\tboundary*0*="''This%20is%20even%20more%20"; 5499\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5500\tboundary*2="is it not.pdf" 5501 5502''' 5503 msg = email.message_from_string(m) 5504 self.assertEqual(msg.get_boundary(), 5505 'This is even more ***fun*** is it not.pdf') 5506 5507 def test_rfc2231_no_language_or_charset_in_charset(self): 5508 # This is a nonsensical charset value, but tests the code anyway 5509 m = '''\ 5510Content-Type: text/plain; 5511\tcharset*0*="This%20is%20even%20more%20"; 5512\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5513\tcharset*2="is it not.pdf" 5514 5515''' 5516 msg = email.message_from_string(m) 5517 self.assertEqual(msg.get_content_charset(), 5518 'this is even more ***fun*** is it not.pdf') 5519 5520 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5521 def test_rfc2231_bad_encoding_in_filename(self): 5522 m = '''\ 5523Content-Disposition: inline; 5524\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5525\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5526\tfilename*2="is it not.pdf" 5527 5528''' 5529 msg = email.message_from_string(m) 5530 self.assertEqual(msg.get_filename(), 5531 'This is even more ***fun*** is it not.pdf') 5532 5533 def test_rfc2231_bad_encoding_in_charset(self): 5534 m = """\ 5535Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5536 5537""" 5538 msg = email.message_from_string(m) 5539 # This should return None because non-ascii characters in the charset 5540 # are not allowed. 5541 self.assertEqual(msg.get_content_charset(), None) 5542 5543 def test_rfc2231_bad_character_in_charset(self): 5544 m = """\ 5545Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5546 5547""" 5548 msg = email.message_from_string(m) 5549 # This should return None because non-ascii characters in the charset 5550 # are not allowed. 5551 self.assertEqual(msg.get_content_charset(), None) 5552 5553 def test_rfc2231_bad_character_in_filename(self): 5554 m = '''\ 5555Content-Disposition: inline; 5556\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5557\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5558\tfilename*2*="is it not.pdf%E2" 5559 5560''' 5561 msg = email.message_from_string(m) 5562 self.assertEqual(msg.get_filename(), 5563 'This is even more ***fun*** is it not.pdf\ufffd') 5564 5565 def test_rfc2231_unknown_encoding(self): 5566 m = """\ 5567Content-Transfer-Encoding: 8bit 5568Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5569 5570""" 5571 msg = email.message_from_string(m) 5572 self.assertEqual(msg.get_filename(), 'myfile.txt') 5573 5574 def test_rfc2231_bad_character_in_encoding(self): 5575 m = """\ 5576Content-Transfer-Encoding: 8bit 5577Content-Disposition: inline; filename*=utf-8\udce2\udc80\udc9d''myfile.txt 5578 5579""" 5580 msg = email.message_from_string(m) 5581 self.assertEqual(msg.get_filename(), 'myfile.txt') 5582 5583 def test_rfc2231_single_tick_in_filename_extended(self): 5584 eq = self.assertEqual 5585 m = """\ 5586Content-Type: application/x-foo; 5587\tname*0*=\"Frank's\"; name*1*=\" Document\" 5588 5589""" 5590 msg = email.message_from_string(m) 5591 charset, language, s = msg.get_param('name') 5592 eq(charset, None) 5593 eq(language, None) 5594 eq(s, "Frank's Document") 5595 5596 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5597 def test_rfc2231_single_tick_in_filename(self): 5598 m = """\ 5599Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5600 5601""" 5602 msg = email.message_from_string(m) 5603 param = msg.get_param('name') 5604 self.assertNotIsInstance(param, tuple) 5605 self.assertEqual(param, "Frank's Document") 5606 5607 def test_rfc2231_missing_tick(self): 5608 m = '''\ 5609Content-Disposition: inline; 5610\tfilename*0*="'This%20is%20broken"; 5611''' 5612 msg = email.message_from_string(m) 5613 self.assertEqual( 5614 msg.get_filename(), 5615 "'This is broken") 5616 5617 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5618 m = '''\ 5619Content-Disposition: inline; 5620\tfilename*0*="'This%20is%E2broken"; 5621''' 5622 msg = email.message_from_string(m) 5623 self.assertEqual( 5624 msg.get_filename(), 5625 "'This is\ufffdbroken") 5626 5627 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5628 def test_rfc2231_tick_attack_extended(self): 5629 eq = self.assertEqual 5630 m = """\ 5631Content-Type: application/x-foo; 5632\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5633 5634""" 5635 msg = email.message_from_string(m) 5636 charset, language, s = msg.get_param('name') 5637 eq(charset, 'us-ascii') 5638 eq(language, 'en-us') 5639 eq(s, "Frank's Document") 5640 5641 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5642 def test_rfc2231_tick_attack(self): 5643 m = """\ 5644Content-Type: application/x-foo; 5645\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5646 5647""" 5648 msg = email.message_from_string(m) 5649 param = msg.get_param('name') 5650 self.assertNotIsInstance(param, tuple) 5651 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5652 5653 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5654 def test_rfc2231_no_extended_values(self): 5655 eq = self.assertEqual 5656 m = """\ 5657Content-Type: application/x-foo; name=\"Frank's Document\" 5658 5659""" 5660 msg = email.message_from_string(m) 5661 eq(msg.get_param('name'), "Frank's Document") 5662 5663 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5664 def test_rfc2231_encoded_then_unencoded_segments(self): 5665 eq = self.assertEqual 5666 m = """\ 5667Content-Type: application/x-foo; 5668\tname*0*=\"us-ascii'en-us'My\"; 5669\tname*1=\" Document\"; 5670\tname*2*=\" For You\" 5671 5672""" 5673 msg = email.message_from_string(m) 5674 charset, language, s = msg.get_param('name') 5675 eq(charset, 'us-ascii') 5676 eq(language, 'en-us') 5677 eq(s, 'My Document For You') 5678 5679 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5680 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5681 def test_rfc2231_unencoded_then_encoded_segments(self): 5682 eq = self.assertEqual 5683 m = """\ 5684Content-Type: application/x-foo; 5685\tname*0=\"us-ascii'en-us'My\"; 5686\tname*1*=\" Document\"; 5687\tname*2*=\" For You\" 5688 5689""" 5690 msg = email.message_from_string(m) 5691 charset, language, s = msg.get_param('name') 5692 eq(charset, 'us-ascii') 5693 eq(language, 'en-us') 5694 eq(s, 'My Document For You') 5695 5696 def test_should_not_hang_on_invalid_ew_messages(self): 5697 messages = ["""From: user@host.com 5698To: user@host.com 5699Bad-Header: 5700 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5701 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5702 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5703 5704Hello! 5705""", """From: ����� �������� <xxx@xxx> 5706To: "xxx" <xxx@xxx> 5707Subject: ��� ���������� ����� ����� � ��������� �� ���� 5708MIME-Version: 1.0 5709Content-Type: text/plain; charset="windows-1251"; 5710Content-Transfer-Encoding: 8bit 5711 5712�� ����� � ���� ������ ��� �������� 5713"""] 5714 for m in messages: 5715 with self.subTest(m=m): 5716 msg = email.message_from_string(m) 5717 5718 5719# Tests to ensure that signed parts of an email are completely preserved, as 5720# required by RFC1847 section 2.1. Note that these are incomplete, because the 5721# email package does not currently always preserve the body. See issue 1670765. 5722class TestSigned(TestEmailBase): 5723 5724 def _msg_and_obj(self, filename): 5725 with openfile(filename, encoding="utf-8") as fp: 5726 original = fp.read() 5727 msg = email.message_from_string(original) 5728 return original, msg 5729 5730 def _signed_parts_eq(self, original, result): 5731 # Extract the first mime part of each message 5732 import re 5733 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5734 inpart = repart.search(original).group(2) 5735 outpart = repart.search(result).group(2) 5736 self.assertEqual(outpart, inpart) 5737 5738 def test_long_headers_as_string(self): 5739 original, msg = self._msg_and_obj('msg_45.txt') 5740 result = msg.as_string() 5741 self._signed_parts_eq(original, result) 5742 5743 def test_long_headers_as_string_maxheaderlen(self): 5744 original, msg = self._msg_and_obj('msg_45.txt') 5745 result = msg.as_string(maxheaderlen=60) 5746 self._signed_parts_eq(original, result) 5747 5748 def test_long_headers_flatten(self): 5749 original, msg = self._msg_and_obj('msg_45.txt') 5750 fp = StringIO() 5751 Generator(fp).flatten(msg) 5752 result = fp.getvalue() 5753 self._signed_parts_eq(original, result) 5754 5755class TestHeaderRegistry(TestEmailBase): 5756 # See issue gh-93010. 5757 def test_HeaderRegistry(self): 5758 reg = HeaderRegistry() 5759 a = reg('Content-Disposition', 'attachment; 0*00="foo"') 5760 self.assertIsInstance(a.defects[0], errors.InvalidHeaderDefect) 5761 5762if __name__ == '__main__': 5763 unittest.main() 5764