1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: email-sig@python.org 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10 11from io import StringIO, BytesIO 12from itertools import chain 13from random import choice 14from threading import Thread 15from unittest.mock import patch 16 17import email 18import email.policy 19 20from email.charset import Charset 21from email.header import Header, decode_header, make_header 22from email.parser import Parser, HeaderParser 23from email.generator import Generator, DecodedGenerator, BytesGenerator 24from email.message import Message 25from email.mime.application import MIMEApplication 26from email.mime.audio import MIMEAudio 27from email.mime.text import MIMEText 28from email.mime.image import MIMEImage 29from email.mime.base import MIMEBase 30from email.mime.message import MIMEMessage 31from email.mime.multipart import MIMEMultipart 32from email.mime.nonmultipart import MIMENonMultipart 33from email import utils 34from email import errors 35from email import encoders 36from email import iterators 37from email import base64mime 38from email import quoprimime 39 40from test.support import threading_helper 41from test.support.os_helper import unlink 42from test.test_email import openfile, TestEmailBase 43 44# These imports are documented to work, but we are testing them using a 45# different path, so we import them here just to make sure they are importable. 46from email.parser import FeedParser, BytesFeedParser 47 48NL = '\n' 49EMPTYSTRING = '' 50SPACE = ' ' 51 52 53# Test various aspects of the Message class's API 54class TestMessageAPI(TestEmailBase): 55 def test_get_all(self): 56 eq = self.assertEqual 57 msg = self._msgobj('msg_20.txt') 58 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org']) 59 eq(msg.get_all('xx', 'n/a'), 'n/a') 60 61 def test_getset_charset(self): 62 eq = self.assertEqual 63 msg = Message() 64 eq(msg.get_charset(), None) 65 charset = Charset('iso-8859-1') 66 msg.set_charset(charset) 67 eq(msg['mime-version'], '1.0') 68 eq(msg.get_content_type(), 'text/plain') 69 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 70 eq(msg.get_param('charset'), 'iso-8859-1') 71 eq(msg['content-transfer-encoding'], 'quoted-printable') 72 eq(msg.get_charset().input_charset, 'iso-8859-1') 73 # Remove the charset 74 msg.set_charset(None) 75 eq(msg.get_charset(), None) 76 eq(msg['content-type'], 'text/plain') 77 # Try adding a charset when there's already MIME headers present 78 msg = Message() 79 msg['MIME-Version'] = '2.0' 80 msg['Content-Type'] = 'text/x-weird' 81 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 82 msg.set_charset(charset) 83 eq(msg['mime-version'], '2.0') 84 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 85 eq(msg['content-transfer-encoding'], 'quinted-puntable') 86 87 def test_set_charset_from_string(self): 88 eq = self.assertEqual 89 msg = Message() 90 msg.set_charset('us-ascii') 91 eq(msg.get_charset().input_charset, 'us-ascii') 92 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 93 94 def test_set_payload_with_charset(self): 95 msg = Message() 96 charset = Charset('iso-8859-1') 97 msg.set_payload('This is a string payload', charset) 98 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 99 100 def test_set_payload_with_8bit_data_and_charset(self): 101 data = b'\xd0\x90\xd0\x91\xd0\x92' 102 charset = Charset('utf-8') 103 msg = Message() 104 msg.set_payload(data, charset) 105 self.assertEqual(msg['content-transfer-encoding'], 'base64') 106 self.assertEqual(msg.get_payload(decode=True), data) 107 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 108 109 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 110 data = b'\xd0\x90\xd0\x91\xd0\x92' 111 charset = Charset('utf-8') 112 charset.body_encoding = None # Disable base64 encoding 113 msg = Message() 114 msg.set_payload(data.decode('utf-8'), charset) 115 self.assertEqual(msg['content-transfer-encoding'], '8bit') 116 self.assertEqual(msg.get_payload(decode=True), data) 117 118 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 119 data = b'\xd0\x90\xd0\x91\xd0\x92' 120 charset = Charset('utf-8') 121 charset.body_encoding = None # Disable base64 encoding 122 msg = Message() 123 msg.set_payload(data, charset) 124 self.assertEqual(msg['content-transfer-encoding'], '8bit') 125 self.assertEqual(msg.get_payload(decode=True), data) 126 127 def test_set_payload_to_list(self): 128 msg = Message() 129 msg.set_payload([]) 130 self.assertEqual(msg.get_payload(), []) 131 132 def test_attach_when_payload_is_string(self): 133 msg = Message() 134 msg['Content-Type'] = 'multipart/mixed' 135 msg.set_payload('string payload') 136 sub_msg = MIMEMessage(Message()) 137 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 138 msg.attach, sub_msg) 139 140 def test_get_charsets(self): 141 eq = self.assertEqual 142 143 msg = self._msgobj('msg_08.txt') 144 charsets = msg.get_charsets() 145 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 146 147 msg = self._msgobj('msg_09.txt') 148 charsets = msg.get_charsets('dingbat') 149 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 150 'koi8-r']) 151 152 msg = self._msgobj('msg_12.txt') 153 charsets = msg.get_charsets() 154 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 155 'iso-8859-3', 'us-ascii', 'koi8-r']) 156 157 def test_get_filename(self): 158 eq = self.assertEqual 159 160 msg = self._msgobj('msg_04.txt') 161 filenames = [p.get_filename() for p in msg.get_payload()] 162 eq(filenames, ['msg.txt', 'msg.txt']) 163 164 msg = self._msgobj('msg_07.txt') 165 subpart = msg.get_payload(1) 166 eq(subpart.get_filename(), 'dingusfish.gif') 167 168 def test_get_filename_with_name_parameter(self): 169 eq = self.assertEqual 170 171 msg = self._msgobj('msg_44.txt') 172 filenames = [p.get_filename() for p in msg.get_payload()] 173 eq(filenames, ['msg.txt', 'msg.txt']) 174 175 def test_get_boundary(self): 176 eq = self.assertEqual 177 msg = self._msgobj('msg_07.txt') 178 # No quotes! 179 eq(msg.get_boundary(), 'BOUNDARY') 180 181 def test_set_boundary(self): 182 eq = self.assertEqual 183 # This one has no existing boundary parameter, but the Content-Type: 184 # header appears fifth. 185 msg = self._msgobj('msg_01.txt') 186 msg.set_boundary('BOUNDARY') 187 header, value = msg.items()[4] 188 eq(header.lower(), 'content-type') 189 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 190 # This one has a Content-Type: header, with a boundary, stuck in the 191 # middle of its headers. Make sure the order is preserved; it should 192 # be fifth. 193 msg = self._msgobj('msg_04.txt') 194 msg.set_boundary('BOUNDARY') 195 header, value = msg.items()[4] 196 eq(header.lower(), 'content-type') 197 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 198 # And this one has no Content-Type: header at all. 199 msg = self._msgobj('msg_03.txt') 200 self.assertRaises(errors.HeaderParseError, 201 msg.set_boundary, 'BOUNDARY') 202 203 def test_make_boundary(self): 204 msg = MIMEMultipart('form-data') 205 # Note that when the boundary gets created is an implementation 206 # detail and might change. 207 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 208 # Trigger creation of boundary 209 msg.as_string() 210 self.assertEqual(msg.items()[0][1][:33], 211 'multipart/form-data; boundary="==') 212 # XXX: there ought to be tests of the uniqueness of the boundary, too. 213 214 def test_message_rfc822_only(self): 215 # Issue 7970: message/rfc822 not in multipart parsed by 216 # HeaderParser caused an exception when flattened. 217 with openfile('msg_46.txt', encoding="utf-8") as fp: 218 msgdata = fp.read() 219 parser = HeaderParser() 220 msg = parser.parsestr(msgdata) 221 out = StringIO() 222 gen = Generator(out, True, 0) 223 gen.flatten(msg, False) 224 self.assertEqual(out.getvalue(), msgdata) 225 226 def test_byte_message_rfc822_only(self): 227 # Make sure new bytes header parser also passes this. 228 with openfile('msg_46.txt', encoding="utf-8") as fp: 229 msgdata = fp.read().encode('ascii') 230 parser = email.parser.BytesHeaderParser() 231 msg = parser.parsebytes(msgdata) 232 out = BytesIO() 233 gen = email.generator.BytesGenerator(out) 234 gen.flatten(msg) 235 self.assertEqual(out.getvalue(), msgdata) 236 237 def test_get_decoded_payload(self): 238 eq = self.assertEqual 239 msg = self._msgobj('msg_10.txt') 240 # The outer message is a multipart 241 eq(msg.get_payload(decode=True), None) 242 # Subpart 1 is 7bit encoded 243 eq(msg.get_payload(0).get_payload(decode=True), 244 b'This is a 7bit encoded message.\n') 245 # Subpart 2 is quopri 246 eq(msg.get_payload(1).get_payload(decode=True), 247 b'\xa1This is a Quoted Printable encoded message!\n') 248 # Subpart 3 is base64 249 eq(msg.get_payload(2).get_payload(decode=True), 250 b'This is a Base64 encoded message.') 251 # Subpart 4 is base64 with a trailing newline, which 252 # used to be stripped (issue 7143). 253 eq(msg.get_payload(3).get_payload(decode=True), 254 b'This is a Base64 encoded message.\n') 255 # Subpart 5 has no Content-Transfer-Encoding: header. 256 eq(msg.get_payload(4).get_payload(decode=True), 257 b'This has no Content-Transfer-Encoding: header.\n') 258 259 def test_get_decoded_uu_payload(self): 260 eq = self.assertEqual 261 msg = Message() 262 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 263 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 264 msg['content-transfer-encoding'] = cte 265 eq(msg.get_payload(decode=True), b'hello world') 266 # Now try some bogus data 267 msg.set_payload('foo') 268 eq(msg.get_payload(decode=True), b'foo') 269 270 def test_get_payload_n_raises_on_non_multipart(self): 271 msg = Message() 272 self.assertRaises(TypeError, msg.get_payload, 1) 273 274 def test_decoded_generator(self): 275 eq = self.assertEqual 276 msg = self._msgobj('msg_07.txt') 277 with openfile('msg_17.txt', encoding="utf-8") as fp: 278 text = fp.read() 279 s = StringIO() 280 g = DecodedGenerator(s) 281 g.flatten(msg) 282 eq(s.getvalue(), text) 283 284 def test__contains__(self): 285 msg = Message() 286 msg['From'] = 'Me' 287 msg['to'] = 'You' 288 # Check for case insensitivity 289 self.assertIn('from', msg) 290 self.assertIn('From', msg) 291 self.assertIn('FROM', msg) 292 self.assertIn('to', msg) 293 self.assertIn('To', msg) 294 self.assertIn('TO', msg) 295 296 def test_as_string(self): 297 msg = self._msgobj('msg_01.txt') 298 with openfile('msg_01.txt', encoding="utf-8") as fp: 299 text = fp.read() 300 self.assertEqual(text, str(msg)) 301 fullrepr = msg.as_string(unixfrom=True) 302 lines = fullrepr.split('\n') 303 self.assertTrue(lines[0].startswith('From ')) 304 self.assertEqual(text, NL.join(lines[1:])) 305 306 def test_as_string_policy(self): 307 msg = self._msgobj('msg_01.txt') 308 newpolicy = msg.policy.clone(linesep='\r\n') 309 fullrepr = msg.as_string(policy=newpolicy) 310 s = StringIO() 311 g = Generator(s, policy=newpolicy) 312 g.flatten(msg) 313 self.assertEqual(fullrepr, s.getvalue()) 314 315 def test_nonascii_as_string_without_cte(self): 316 m = textwrap.dedent("""\ 317 MIME-Version: 1.0 318 Content-type: text/plain; charset="iso-8859-1" 319 320 Test if non-ascii messages with no Content-Transfer-Encoding set 321 can be as_string'd: 322 Föö bär 323 """) 324 source = m.encode('iso-8859-1') 325 expected = textwrap.dedent("""\ 326 MIME-Version: 1.0 327 Content-type: text/plain; charset="iso-8859-1" 328 Content-Transfer-Encoding: quoted-printable 329 330 Test if non-ascii messages with no Content-Transfer-Encoding set 331 can be as_string'd: 332 F=F6=F6 b=E4r 333 """) 334 msg = email.message_from_bytes(source) 335 self.assertEqual(msg.as_string(), expected) 336 337 def test_nonascii_as_string_without_content_type_and_cte(self): 338 m = textwrap.dedent("""\ 339 MIME-Version: 1.0 340 341 Test if non-ascii messages with no Content-Type nor 342 Content-Transfer-Encoding set can be as_string'd: 343 Föö bär 344 """) 345 source = m.encode('iso-8859-1') 346 expected = source.decode('ascii', 'replace') 347 msg = email.message_from_bytes(source) 348 self.assertEqual(msg.as_string(), expected) 349 350 def test_as_bytes(self): 351 msg = self._msgobj('msg_01.txt') 352 with openfile('msg_01.txt', encoding="utf-8") as fp: 353 data = fp.read().encode('ascii') 354 self.assertEqual(data, bytes(msg)) 355 fullrepr = msg.as_bytes(unixfrom=True) 356 lines = fullrepr.split(b'\n') 357 self.assertTrue(lines[0].startswith(b'From ')) 358 self.assertEqual(data, b'\n'.join(lines[1:])) 359 360 def test_as_bytes_policy(self): 361 msg = self._msgobj('msg_01.txt') 362 newpolicy = msg.policy.clone(linesep='\r\n') 363 fullrepr = msg.as_bytes(policy=newpolicy) 364 s = BytesIO() 365 g = BytesGenerator(s,policy=newpolicy) 366 g.flatten(msg) 367 self.assertEqual(fullrepr, s.getvalue()) 368 369 # test_headerregistry.TestContentTypeHeader.bad_params 370 def test_bad_param(self): 371 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 372 self.assertEqual(msg.get_param('baz'), '') 373 374 def test_missing_filename(self): 375 msg = email.message_from_string("From: foo\n") 376 self.assertEqual(msg.get_filename(), None) 377 378 def test_bogus_filename(self): 379 msg = email.message_from_string( 380 "Content-Disposition: blarg; filename\n") 381 self.assertEqual(msg.get_filename(), '') 382 383 def test_missing_boundary(self): 384 msg = email.message_from_string("From: foo\n") 385 self.assertEqual(msg.get_boundary(), None) 386 387 def test_get_params(self): 388 eq = self.assertEqual 389 msg = email.message_from_string( 390 'X-Header: foo=one; bar=two; baz=three\n') 391 eq(msg.get_params(header='x-header'), 392 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 393 msg = email.message_from_string( 394 'X-Header: foo; bar=one; baz=two\n') 395 eq(msg.get_params(header='x-header'), 396 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 397 eq(msg.get_params(), None) 398 msg = email.message_from_string( 399 'X-Header: foo; bar="one"; baz=two\n') 400 eq(msg.get_params(header='x-header'), 401 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 402 403 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 404 def test_get_param_liberal(self): 405 msg = Message() 406 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 407 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 408 409 def test_get_param(self): 410 eq = self.assertEqual 411 msg = email.message_from_string( 412 "X-Header: foo=one; bar=two; baz=three\n") 413 eq(msg.get_param('bar', header='x-header'), 'two') 414 eq(msg.get_param('quuz', header='x-header'), None) 415 eq(msg.get_param('quuz'), None) 416 msg = email.message_from_string( 417 'X-Header: foo; bar="one"; baz=two\n') 418 eq(msg.get_param('foo', header='x-header'), '') 419 eq(msg.get_param('bar', header='x-header'), 'one') 420 eq(msg.get_param('baz', header='x-header'), 'two') 421 # XXX: We are not RFC-2045 compliant! We cannot parse: 422 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 423 # msg.get_param("weird") 424 # yet. 425 426 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 427 def test_get_param_funky_continuation_lines(self): 428 msg = self._msgobj('msg_22.txt') 429 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 430 431 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 432 def test_get_param_with_semis_in_quotes(self): 433 msg = email.message_from_string( 434 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 435 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 436 self.assertEqual(msg.get_param('name', unquote=False), 437 '"Jim&&Jill"') 438 439 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 440 def test_get_param_with_quotes(self): 441 msg = email.message_from_string( 442 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 443 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 444 msg = email.message_from_string( 445 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 446 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 447 448 def test_field_containment(self): 449 msg = email.message_from_string('Header: exists') 450 self.assertIn('header', msg) 451 self.assertIn('Header', msg) 452 self.assertIn('HEADER', msg) 453 self.assertNotIn('headerx', msg) 454 455 def test_set_param(self): 456 eq = self.assertEqual 457 msg = Message() 458 msg.set_param('charset', 'iso-2022-jp') 459 eq(msg.get_param('charset'), 'iso-2022-jp') 460 msg.set_param('importance', 'high value') 461 eq(msg.get_param('importance'), 'high value') 462 eq(msg.get_param('importance', unquote=False), '"high value"') 463 eq(msg.get_params(), [('text/plain', ''), 464 ('charset', 'iso-2022-jp'), 465 ('importance', 'high value')]) 466 eq(msg.get_params(unquote=False), [('text/plain', ''), 467 ('charset', '"iso-2022-jp"'), 468 ('importance', '"high value"')]) 469 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 470 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 471 472 def test_del_param(self): 473 eq = self.assertEqual 474 msg = self._msgobj('msg_05.txt') 475 eq(msg.get_params(), 476 [('multipart/report', ''), ('report-type', 'delivery-status'), 477 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 478 old_val = msg.get_param("report-type") 479 msg.del_param("report-type") 480 eq(msg.get_params(), 481 [('multipart/report', ''), 482 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 483 msg.set_param("report-type", old_val) 484 eq(msg.get_params(), 485 [('multipart/report', ''), 486 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 487 ('report-type', old_val)]) 488 489 def test_del_param_on_other_header(self): 490 msg = Message() 491 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 492 msg.del_param('filename', 'content-disposition') 493 self.assertEqual(msg['content-disposition'], 'attachment') 494 495 def test_del_param_on_nonexistent_header(self): 496 msg = Message() 497 # Deleting param on empty msg should not raise exception. 498 msg.del_param('filename', 'content-disposition') 499 500 def test_del_nonexistent_param(self): 501 msg = Message() 502 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 503 existing_header = msg['Content-Type'] 504 msg.del_param('foobar', header='Content-Type') 505 self.assertEqual(msg['Content-Type'], existing_header) 506 507 def test_set_type(self): 508 eq = self.assertEqual 509 msg = Message() 510 self.assertRaises(ValueError, msg.set_type, 'text') 511 msg.set_type('text/plain') 512 eq(msg['content-type'], 'text/plain') 513 msg.set_param('charset', 'us-ascii') 514 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 515 msg.set_type('text/html') 516 eq(msg['content-type'], 'text/html; charset="us-ascii"') 517 518 def test_set_type_on_other_header(self): 519 msg = Message() 520 msg['X-Content-Type'] = 'text/plain' 521 msg.set_type('application/octet-stream', 'X-Content-Type') 522 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 523 524 def test_get_content_type_missing(self): 525 msg = Message() 526 self.assertEqual(msg.get_content_type(), 'text/plain') 527 528 def test_get_content_type_missing_with_default_type(self): 529 msg = Message() 530 msg.set_default_type('message/rfc822') 531 self.assertEqual(msg.get_content_type(), 'message/rfc822') 532 533 def test_get_content_type_from_message_implicit(self): 534 msg = self._msgobj('msg_30.txt') 535 self.assertEqual(msg.get_payload(0).get_content_type(), 536 'message/rfc822') 537 538 def test_get_content_type_from_message_explicit(self): 539 msg = self._msgobj('msg_28.txt') 540 self.assertEqual(msg.get_payload(0).get_content_type(), 541 'message/rfc822') 542 543 def test_get_content_type_from_message_text_plain_implicit(self): 544 msg = self._msgobj('msg_03.txt') 545 self.assertEqual(msg.get_content_type(), 'text/plain') 546 547 def test_get_content_type_from_message_text_plain_explicit(self): 548 msg = self._msgobj('msg_01.txt') 549 self.assertEqual(msg.get_content_type(), 'text/plain') 550 551 def test_get_content_maintype_missing(self): 552 msg = Message() 553 self.assertEqual(msg.get_content_maintype(), 'text') 554 555 def test_get_content_maintype_missing_with_default_type(self): 556 msg = Message() 557 msg.set_default_type('message/rfc822') 558 self.assertEqual(msg.get_content_maintype(), 'message') 559 560 def test_get_content_maintype_from_message_implicit(self): 561 msg = self._msgobj('msg_30.txt') 562 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 563 564 def test_get_content_maintype_from_message_explicit(self): 565 msg = self._msgobj('msg_28.txt') 566 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 567 568 def test_get_content_maintype_from_message_text_plain_implicit(self): 569 msg = self._msgobj('msg_03.txt') 570 self.assertEqual(msg.get_content_maintype(), 'text') 571 572 def test_get_content_maintype_from_message_text_plain_explicit(self): 573 msg = self._msgobj('msg_01.txt') 574 self.assertEqual(msg.get_content_maintype(), 'text') 575 576 def test_get_content_subtype_missing(self): 577 msg = Message() 578 self.assertEqual(msg.get_content_subtype(), 'plain') 579 580 def test_get_content_subtype_missing_with_default_type(self): 581 msg = Message() 582 msg.set_default_type('message/rfc822') 583 self.assertEqual(msg.get_content_subtype(), 'rfc822') 584 585 def test_get_content_subtype_from_message_implicit(self): 586 msg = self._msgobj('msg_30.txt') 587 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 588 589 def test_get_content_subtype_from_message_explicit(self): 590 msg = self._msgobj('msg_28.txt') 591 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 592 593 def test_get_content_subtype_from_message_text_plain_implicit(self): 594 msg = self._msgobj('msg_03.txt') 595 self.assertEqual(msg.get_content_subtype(), 'plain') 596 597 def test_get_content_subtype_from_message_text_plain_explicit(self): 598 msg = self._msgobj('msg_01.txt') 599 self.assertEqual(msg.get_content_subtype(), 'plain') 600 601 def test_get_content_maintype_error(self): 602 msg = Message() 603 msg['Content-Type'] = 'no-slash-in-this-string' 604 self.assertEqual(msg.get_content_maintype(), 'text') 605 606 def test_get_content_subtype_error(self): 607 msg = Message() 608 msg['Content-Type'] = 'no-slash-in-this-string' 609 self.assertEqual(msg.get_content_subtype(), 'plain') 610 611 def test_replace_header(self): 612 eq = self.assertEqual 613 msg = Message() 614 msg.add_header('First', 'One') 615 msg.add_header('Second', 'Two') 616 msg.add_header('Third', 'Three') 617 eq(msg.keys(), ['First', 'Second', 'Third']) 618 eq(msg.values(), ['One', 'Two', 'Three']) 619 msg.replace_header('Second', 'Twenty') 620 eq(msg.keys(), ['First', 'Second', 'Third']) 621 eq(msg.values(), ['One', 'Twenty', 'Three']) 622 msg.add_header('First', 'Eleven') 623 msg.replace_header('First', 'One Hundred') 624 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 625 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 626 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 627 628 def test_get_content_disposition(self): 629 msg = Message() 630 self.assertIsNone(msg.get_content_disposition()) 631 msg.add_header('Content-Disposition', 'attachment', 632 filename='random.avi') 633 self.assertEqual(msg.get_content_disposition(), 'attachment') 634 msg.replace_header('Content-Disposition', 'inline') 635 self.assertEqual(msg.get_content_disposition(), 'inline') 636 msg.replace_header('Content-Disposition', 'InlinE') 637 self.assertEqual(msg.get_content_disposition(), 'inline') 638 639 # test_defect_handling:test_invalid_chars_in_base64_payload 640 def test_broken_base64_payload(self): 641 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 642 msg = Message() 643 msg['content-type'] = 'audio/x-midi' 644 msg['content-transfer-encoding'] = 'base64' 645 msg.set_payload(x) 646 self.assertEqual(msg.get_payload(decode=True), 647 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 648 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 649 self.assertIsInstance(msg.defects[0], 650 errors.InvalidBase64CharactersDefect) 651 652 def test_broken_unicode_payload(self): 653 # This test improves coverage but is not a compliance test. 654 # The behavior in this situation is currently undefined by the API. 655 x = 'this is a br\xf6ken thing to do' 656 msg = Message() 657 msg['content-type'] = 'text/plain' 658 msg['content-transfer-encoding'] = '8bit' 659 msg.set_payload(x) 660 self.assertEqual(msg.get_payload(decode=True), 661 bytes(x, 'raw-unicode-escape')) 662 663 def test_questionable_bytes_payload(self): 664 # This test improves coverage but is not a compliance test, 665 # since it involves poking inside the black box. 666 x = 'this is a quéstionable thing to do'.encode('utf-8') 667 msg = Message() 668 msg['content-type'] = 'text/plain; charset="utf-8"' 669 msg['content-transfer-encoding'] = '8bit' 670 msg._payload = x 671 self.assertEqual(msg.get_payload(decode=True), x) 672 673 # Issue 1078919 674 def test_ascii_add_header(self): 675 msg = Message() 676 msg.add_header('Content-Disposition', 'attachment', 677 filename='bud.gif') 678 self.assertEqual('attachment; filename="bud.gif"', 679 msg['Content-Disposition']) 680 681 def test_noascii_add_header(self): 682 msg = Message() 683 msg.add_header('Content-Disposition', 'attachment', 684 filename="Fußballer.ppt") 685 self.assertEqual( 686 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 687 msg['Content-Disposition']) 688 689 def test_nonascii_add_header_via_triple(self): 690 msg = Message() 691 msg.add_header('Content-Disposition', 'attachment', 692 filename=('iso-8859-1', '', 'Fußballer.ppt')) 693 self.assertEqual( 694 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 695 msg['Content-Disposition']) 696 697 def test_ascii_add_header_with_tspecial(self): 698 msg = Message() 699 msg.add_header('Content-Disposition', 'attachment', 700 filename="windows [filename].ppt") 701 self.assertEqual( 702 'attachment; filename="windows [filename].ppt"', 703 msg['Content-Disposition']) 704 705 def test_nonascii_add_header_with_tspecial(self): 706 msg = Message() 707 msg.add_header('Content-Disposition', 'attachment', 708 filename="Fußballer [filename].ppt") 709 self.assertEqual( 710 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 711 msg['Content-Disposition']) 712 713 def test_binary_quopri_payload(self): 714 for charset in ('latin-1', 'ascii'): 715 msg = Message() 716 msg['content-type'] = 'text/plain; charset=%s' % charset 717 msg['content-transfer-encoding'] = 'quoted-printable' 718 msg.set_payload(b'foo=e6=96=87bar') 719 self.assertEqual( 720 msg.get_payload(decode=True), 721 b'foo\xe6\x96\x87bar', 722 'get_payload returns wrong result with charset %s.' % charset) 723 724 def test_binary_base64_payload(self): 725 for charset in ('latin-1', 'ascii'): 726 msg = Message() 727 msg['content-type'] = 'text/plain; charset=%s' % charset 728 msg['content-transfer-encoding'] = 'base64' 729 msg.set_payload(b'Zm9v5paHYmFy') 730 self.assertEqual( 731 msg.get_payload(decode=True), 732 b'foo\xe6\x96\x87bar', 733 'get_payload returns wrong result with charset %s.' % charset) 734 735 def test_binary_uuencode_payload(self): 736 for charset in ('latin-1', 'ascii'): 737 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 738 msg = Message() 739 msg['content-type'] = 'text/plain; charset=%s' % charset 740 msg['content-transfer-encoding'] = encoding 741 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 742 self.assertEqual( 743 msg.get_payload(decode=True), 744 b'foo\xe6\x96\x87bar', 745 str(('get_payload returns wrong result ', 746 'with charset {0} and encoding {1}.')).\ 747 format(charset, encoding)) 748 749 def test_add_header_with_name_only_param(self): 750 msg = Message() 751 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 752 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 753 754 def test_add_header_with_no_value(self): 755 msg = Message() 756 msg.add_header('X-Status', None) 757 self.assertEqual('', msg['X-Status']) 758 759 # Issue 5871: reject an attempt to embed a header inside a header value 760 # (header injection attack). 761 def test_embedded_header_via_Header_rejected(self): 762 msg = Message() 763 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 764 self.assertRaises(errors.HeaderParseError, msg.as_string) 765 766 def test_embedded_header_via_string_rejected(self): 767 msg = Message() 768 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 769 self.assertRaises(errors.HeaderParseError, msg.as_string) 770 771 def test_unicode_header_defaults_to_utf8_encoding(self): 772 # Issue 14291 773 m = MIMEText('abc\n') 774 m['Subject'] = 'É test' 775 self.assertEqual(str(m),textwrap.dedent("""\ 776 Content-Type: text/plain; charset="us-ascii" 777 MIME-Version: 1.0 778 Content-Transfer-Encoding: 7bit 779 Subject: =?utf-8?q?=C3=89_test?= 780 781 abc 782 """)) 783 784 def test_unicode_body_defaults_to_utf8_encoding(self): 785 # Issue 14291 786 m = MIMEText('É testabc\n') 787 self.assertEqual(str(m),textwrap.dedent("""\ 788 Content-Type: text/plain; charset="utf-8" 789 MIME-Version: 1.0 790 Content-Transfer-Encoding: base64 791 792 w4kgdGVzdGFiYwo= 793 """)) 794 795 796# Test the email.encoders module 797class TestEncoders(unittest.TestCase): 798 799 def test_EncodersEncode_base64(self): 800 with openfile('PyBanner048.gif', 'rb') as fp: 801 bindata = fp.read() 802 mimed = email.mime.image.MIMEImage(bindata) 803 base64ed = mimed.get_payload() 804 # the transfer-encoded body lines should all be <=76 characters 805 lines = base64ed.split('\n') 806 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 807 808 def test_encode_empty_payload(self): 809 eq = self.assertEqual 810 msg = Message() 811 msg.set_charset('us-ascii') 812 eq(msg['content-transfer-encoding'], '7bit') 813 814 def test_default_cte(self): 815 eq = self.assertEqual 816 # 7bit data and the default us-ascii _charset 817 msg = MIMEText('hello world') 818 eq(msg['content-transfer-encoding'], '7bit') 819 # Similar, but with 8bit data 820 msg = MIMEText('hello \xf8 world') 821 eq(msg['content-transfer-encoding'], 'base64') 822 # And now with a different charset 823 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 824 eq(msg['content-transfer-encoding'], 'quoted-printable') 825 826 def test_encode7or8bit(self): 827 # Make sure a charset whose input character set is 8bit but 828 # whose output character set is 7bit gets a transfer-encoding 829 # of 7bit. 830 eq = self.assertEqual 831 msg = MIMEText('文\n', _charset='euc-jp') 832 eq(msg['content-transfer-encoding'], '7bit') 833 eq(msg.as_string(), textwrap.dedent("""\ 834 MIME-Version: 1.0 835 Content-Type: text/plain; charset="iso-2022-jp" 836 Content-Transfer-Encoding: 7bit 837 838 \x1b$BJ8\x1b(B 839 """)) 840 841 def test_qp_encode_latin1(self): 842 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 843 self.assertEqual(str(msg), textwrap.dedent("""\ 844 MIME-Version: 1.0 845 Content-Type: text/text; charset="iso-8859-1" 846 Content-Transfer-Encoding: quoted-printable 847 848 =E1=F6 849 """)) 850 851 def test_qp_encode_non_latin1(self): 852 # Issue 16948 853 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 854 self.assertEqual(str(msg), textwrap.dedent("""\ 855 MIME-Version: 1.0 856 Content-Type: text/text; charset="iso-8859-2" 857 Content-Transfer-Encoding: quoted-printable 858 859 =BF 860 """)) 861 862 863# Test long header wrapping 864class TestLongHeaders(TestEmailBase): 865 866 maxDiff = None 867 868 def test_split_long_continuation(self): 869 eq = self.ndiffAssertEqual 870 msg = email.message_from_string("""\ 871Subject: bug demonstration 872\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 873\tmore text 874 875test 876""") 877 sfp = StringIO() 878 g = Generator(sfp) 879 g.flatten(msg) 880 eq(sfp.getvalue(), """\ 881Subject: bug demonstration 882\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 883\tmore text 884 885test 886""") 887 888 def test_another_long_almost_unsplittable_header(self): 889 eq = self.ndiffAssertEqual 890 hstr = """\ 891bug demonstration 892\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 893\tmore text""" 894 h = Header(hstr, continuation_ws='\t') 895 eq(h.encode(), """\ 896bug demonstration 897\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 898\tmore text""") 899 h = Header(hstr.replace('\t', ' ')) 900 eq(h.encode(), """\ 901bug demonstration 902 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 903 more text""") 904 905 def test_long_nonstring(self): 906 eq = self.ndiffAssertEqual 907 g = Charset("iso-8859-1") 908 cz = Charset("iso-8859-2") 909 utf8 = Charset("utf-8") 910 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 911 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 912 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 913 b'bef\xf6rdert. ') 914 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 915 b'd\xf9vtipu.. ') 916 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 917 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 918 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 919 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 920 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 921 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 922 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 923 '\u3044\u307e\u3059\u3002') 924 h = Header(g_head, g, header_name='Subject') 925 h.append(cz_head, cz) 926 h.append(utf8_head, utf8) 927 msg = Message() 928 msg['Subject'] = h 929 sfp = StringIO() 930 g = Generator(sfp) 931 g.flatten(msg) 932 eq(sfp.getvalue(), """\ 933Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 934 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 935 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 936 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 937 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 938 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 939 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 940 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 941 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 942 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 943 =?utf-8?b?44CC?= 944 945""") 946 eq(h.encode(maxlinelen=76), """\ 947=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 948 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 949 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 950 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 951 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 952 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 953 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 954 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 955 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 956 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 957 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 958 959 def test_long_header_encode(self): 960 eq = self.ndiffAssertEqual 961 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 962 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 963 header_name='X-Foobar-Spoink-Defrobnit') 964 eq(h.encode(), '''\ 965wasnipoop; giraffes="very-long-necked-animals"; 966 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 967 968 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 969 eq = self.ndiffAssertEqual 970 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 971 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 972 header_name='X-Foobar-Spoink-Defrobnit', 973 continuation_ws='\t') 974 eq(h.encode(), '''\ 975wasnipoop; giraffes="very-long-necked-animals"; 976 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 977 978 def test_long_header_encode_with_tab_continuation(self): 979 eq = self.ndiffAssertEqual 980 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 981 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 982 header_name='X-Foobar-Spoink-Defrobnit', 983 continuation_ws='\t') 984 eq(h.encode(), '''\ 985wasnipoop; giraffes="very-long-necked-animals"; 986\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 987 988 def test_header_encode_with_different_output_charset(self): 989 h = Header('文', 'euc-jp') 990 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 991 992 def test_long_header_encode_with_different_output_charset(self): 993 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 994 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 995 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 996 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 997 res = """\ 998=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 999 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 1000 self.assertEqual(h.encode(), res) 1001 1002 def test_header_splitter(self): 1003 eq = self.ndiffAssertEqual 1004 msg = MIMEText('') 1005 # It'd be great if we could use add_header() here, but that doesn't 1006 # guarantee an order of the parameters. 1007 msg['X-Foobar-Spoink-Defrobnit'] = ( 1008 'wasnipoop; giraffes="very-long-necked-animals"; ' 1009 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 1010 sfp = StringIO() 1011 g = Generator(sfp) 1012 g.flatten(msg) 1013 eq(sfp.getvalue(), '''\ 1014Content-Type: text/plain; charset="us-ascii" 1015MIME-Version: 1.0 1016Content-Transfer-Encoding: 7bit 1017X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 1018 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 1019 1020''') 1021 1022 def test_no_semis_header_splitter(self): 1023 eq = self.ndiffAssertEqual 1024 msg = Message() 1025 msg['From'] = 'test@dom.ain' 1026 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10)) 1027 msg.set_payload('Test') 1028 sfp = StringIO() 1029 g = Generator(sfp) 1030 g.flatten(msg) 1031 eq(sfp.getvalue(), """\ 1032From: test@dom.ain 1033References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain> 1034 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain> 1035 1036Test""") 1037 1038 def test_last_split_chunk_does_not_fit(self): 1039 eq = self.ndiffAssertEqual 1040 h = Header('Subject: the first part of this is short, but_the_second' 1041 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1042 '_all_by_itself') 1043 eq(h.encode(), """\ 1044Subject: the first part of this is short, 1045 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1046 1047 def test_splittable_leading_char_followed_by_overlong_unsplittable(self): 1048 eq = self.ndiffAssertEqual 1049 h = Header(', but_the_second' 1050 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1051 '_all_by_itself') 1052 eq(h.encode(), """\ 1053, 1054 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1055 1056 def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self): 1057 eq = self.ndiffAssertEqual 1058 h = Header(', , but_the_second' 1059 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1060 '_all_by_itself') 1061 eq(h.encode(), """\ 1062, , 1063 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1064 1065 def test_trailing_splittable_on_overlong_unsplittable(self): 1066 eq = self.ndiffAssertEqual 1067 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1068 'be_on_a_line_all_by_itself;') 1069 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1070 "be_on_a_line_all_by_itself;") 1071 1072 def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self): 1073 eq = self.ndiffAssertEqual 1074 h = Header('; ' 1075 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1076 'be_on_a_line_all_by_itself; ') 1077 eq(h.encode(), """\ 1078; 1079 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1080 1081 def test_long_header_with_multiple_sequential_split_chars(self): 1082 eq = self.ndiffAssertEqual 1083 h = Header('This is a long line that has two whitespaces in a row. ' 1084 'This used to cause truncation of the header when folded') 1085 eq(h.encode(), """\ 1086This is a long line that has two whitespaces in a row. This used to cause 1087 truncation of the header when folded""") 1088 1089 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1090 eq = self.ndiffAssertEqual 1091 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1092 'they;arenotlegal;fold,points') 1093 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1094 "arenotlegal;fold,points") 1095 1096 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1097 eq = self.ndiffAssertEqual 1098 h = Header('this is a test where we need to have more than one line ' 1099 'before; our final line that is just too big to fit;; ' 1100 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1101 'be_on_a_line_all_by_itself;') 1102 eq(h.encode(), """\ 1103this is a test where we need to have more than one line before; 1104 our final line that is just too big to fit;; 1105 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1106 1107 def test_overlong_last_part_followed_by_split_point(self): 1108 eq = self.ndiffAssertEqual 1109 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1110 'be_on_a_line_all_by_itself ') 1111 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1112 "should_be_on_a_line_all_by_itself ") 1113 1114 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1115 eq = self.ndiffAssertEqual 1116 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1117 'before_our_final_line_; ; ' 1118 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1119 'be_on_a_line_all_by_itself; ') 1120 eq(h.encode(), """\ 1121this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1122 ; 1123 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1124 1125 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1126 eq = self.ndiffAssertEqual 1127 h = Header('this is a test where we need to have more than one line ' 1128 'before our final line; ; ' 1129 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1130 'be_on_a_line_all_by_itself; ') 1131 eq(h.encode(), """\ 1132this is a test where we need to have more than one line before our final line; 1133 ; 1134 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1135 1136 def test_long_header_with_whitespace_runs(self): 1137 eq = self.ndiffAssertEqual 1138 msg = Message() 1139 msg['From'] = 'test@dom.ain' 1140 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10) 1141 msg.set_payload('Test') 1142 sfp = StringIO() 1143 g = Generator(sfp) 1144 g.flatten(msg) 1145 eq(sfp.getvalue(), """\ 1146From: test@dom.ain 1147References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1148 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1149 <foo@dom.ain> <foo@dom.ain>\x20\x20 1150 1151Test""") 1152 1153 def test_long_run_with_semi_header_splitter(self): 1154 eq = self.ndiffAssertEqual 1155 msg = Message() 1156 msg['From'] = 'test@dom.ain' 1157 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc' 1158 msg.set_payload('Test') 1159 sfp = StringIO() 1160 g = Generator(sfp) 1161 g.flatten(msg) 1162 eq(sfp.getvalue(), """\ 1163From: test@dom.ain 1164References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1165 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> 1166 <foo@dom.ain>; abc 1167 1168Test""") 1169 1170 def test_splitter_split_on_punctuation_only_if_fws(self): 1171 eq = self.ndiffAssertEqual 1172 msg = Message() 1173 msg['From'] = 'test@dom.ain' 1174 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1175 'they;arenotlegal;fold,points') 1176 msg.set_payload('Test') 1177 sfp = StringIO() 1178 g = Generator(sfp) 1179 g.flatten(msg) 1180 # XXX the space after the header should not be there. 1181 eq(sfp.getvalue(), """\ 1182From: test@dom.ain 1183References:\x20 1184 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1185 1186Test""") 1187 1188 def test_no_split_long_header(self): 1189 eq = self.ndiffAssertEqual 1190 hstr = 'References: ' + 'x' * 80 1191 h = Header(hstr) 1192 # These come on two lines because Headers are really field value 1193 # classes and don't really know about their field names. 1194 eq(h.encode(), """\ 1195References: 1196 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1197 h = Header('x' * 80) 1198 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1199 1200 def test_splitting_multiple_long_lines(self): 1201 eq = self.ndiffAssertEqual 1202 hstr = """\ 1203from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1204\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1205\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1206""" 1207 h = Header(hstr, continuation_ws='\t') 1208 eq(h.encode(), """\ 1209from babylon.socal-raves.org (localhost [127.0.0.1]); 1210 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1211 for <mailman-admin@babylon.socal-raves.org>; 1212 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1213\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1214 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1215 for <mailman-admin@babylon.socal-raves.org>; 1216 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1217\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1218 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1219 for <mailman-admin@babylon.socal-raves.org>; 1220 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1221 1222 def test_splitting_first_line_only_is_long(self): 1223 eq = self.ndiffAssertEqual 1224 hstr = """\ 1225from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1226\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1227\tid 17k4h5-00034i-00 1228\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""" 1229 h = Header(hstr, maxlinelen=78, header_name='Received', 1230 continuation_ws='\t') 1231 eq(h.encode(), """\ 1232from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1233 helo=cthulhu.gerg.ca) 1234\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1235\tid 17k4h5-00034i-00 1236\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""") 1237 1238 def test_long_8bit_header(self): 1239 eq = self.ndiffAssertEqual 1240 msg = Message() 1241 h = Header('Britische Regierung gibt', 'iso-8859-1', 1242 header_name='Subject') 1243 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1244 eq(h.encode(maxlinelen=76), """\ 1245=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1246 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1247 msg['Subject'] = h 1248 eq(msg.as_string(maxheaderlen=76), """\ 1249Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1250 =?iso-8859-1?q?hore-Windkraftprojekte?= 1251 1252""") 1253 eq(msg.as_string(maxheaderlen=0), """\ 1254Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1255 1256""") 1257 1258 def test_long_8bit_header_no_charset(self): 1259 eq = self.ndiffAssertEqual 1260 msg = Message() 1261 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1262 'f\xfcr Offshore-Windkraftprojekte ' 1263 '<a-very-long-address@example.com>') 1264 msg['Reply-To'] = header_string 1265 eq(msg.as_string(maxheaderlen=78), """\ 1266Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1267 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1268 1269""") 1270 msg = Message() 1271 msg['Reply-To'] = Header(header_string, 1272 header_name='Reply-To') 1273 eq(msg.as_string(maxheaderlen=78), """\ 1274Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1275 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1276 1277""") 1278 1279 def test_long_to_header(self): 1280 eq = self.ndiffAssertEqual 1281 to = ('"Someone Test #A" <someone@eecs.umich.edu>,' 1282 '<someone@eecs.umich.edu>, ' 1283 '"Someone Test #B" <someone@umich.edu>, ' 1284 '"Someone Test #C" <someone@eecs.umich.edu>, ' 1285 '"Someone Test #D" <someone@eecs.umich.edu>') 1286 msg = Message() 1287 msg['To'] = to 1288 eq(msg.as_string(maxheaderlen=78), '''\ 1289To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>, 1290 "Someone Test #B" <someone@umich.edu>, 1291 "Someone Test #C" <someone@eecs.umich.edu>, 1292 "Someone Test #D" <someone@eecs.umich.edu> 1293 1294''') 1295 1296 def test_long_line_after_append(self): 1297 eq = self.ndiffAssertEqual 1298 s = 'This is an example of string which has almost the limit of header length.' 1299 h = Header(s) 1300 h.append('Add another line.') 1301 eq(h.encode(maxlinelen=76), """\ 1302This is an example of string which has almost the limit of header length. 1303 Add another line.""") 1304 1305 def test_shorter_line_with_append(self): 1306 eq = self.ndiffAssertEqual 1307 s = 'This is a shorter line.' 1308 h = Header(s) 1309 h.append('Add another sentence. (Surprise?)') 1310 eq(h.encode(), 1311 'This is a shorter line. Add another sentence. (Surprise?)') 1312 1313 def test_long_field_name(self): 1314 eq = self.ndiffAssertEqual 1315 fn = 'X-Very-Very-Very-Long-Header-Name' 1316 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1317 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1318 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1319 'bef\xf6rdert. ') 1320 h = Header(gs, 'iso-8859-1', header_name=fn) 1321 # BAW: this seems broken because the first line is too long 1322 eq(h.encode(maxlinelen=76), """\ 1323=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1324 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1325 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1326 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1327 1328 def test_long_received_header(self): 1329 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1330 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1331 'Wed, 05 Mar 2003 18:10:18 -0700') 1332 msg = Message() 1333 msg['Received-1'] = Header(h, continuation_ws='\t') 1334 msg['Received-2'] = h 1335 # This should be splitting on spaces not semicolons. 1336 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1337Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1338 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1339 Wed, 05 Mar 2003 18:10:18 -0700 1340Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1341 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1342 Wed, 05 Mar 2003 18:10:18 -0700 1343 1344""") 1345 1346 def test_string_headerinst_eq(self): 1347 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.' 1348 'tu-muenchen.de> (David Bremner\'s message of ' 1349 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1350 msg = Message() 1351 msg['Received-1'] = Header(h, header_name='Received-1', 1352 continuation_ws='\t') 1353 msg['Received-2'] = h 1354 # XXX The space after the ':' should not be there. 1355 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1356Received-1:\x20 1357 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1358 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1359Received-2:\x20 1360 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1361 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1362 1363""") 1364 1365 def test_long_unbreakable_lines_with_continuation(self): 1366 eq = self.ndiffAssertEqual 1367 msg = Message() 1368 t = """\ 1369iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1370 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1371 msg['Face-1'] = t 1372 msg['Face-2'] = Header(t, header_name='Face-2') 1373 msg['Face-3'] = ' ' + t 1374 # XXX This splitting is all wrong. It the first value line should be 1375 # snug against the field name or the space after the header not there. 1376 eq(msg.as_string(maxheaderlen=78), """\ 1377Face-1:\x20 1378 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1379 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1380Face-2:\x20 1381 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1382 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1383Face-3:\x20 1384 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1385 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1386 1387""") 1388 1389 def test_another_long_multiline_header(self): 1390 eq = self.ndiffAssertEqual 1391 m = ('Received: from siimage.com ' 1392 '([172.25.1.3]) by zima.siliconimage.com with ' 1393 'Microsoft SMTPSVC(5.0.2195.4905); ' 1394 'Wed, 16 Oct 2002 07:41:11 -0700') 1395 msg = email.message_from_string(m) 1396 eq(msg.as_string(maxheaderlen=78), '''\ 1397Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1398 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1399 1400''') 1401 1402 def test_long_lines_with_different_header(self): 1403 eq = self.ndiffAssertEqual 1404 h = ('List-Unsubscribe: ' 1405 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1406 ' <mailto:spamassassin-talk-request@lists.sourceforge.net' 1407 '?subject=unsubscribe>') 1408 msg = Message() 1409 msg['List'] = h 1410 msg['List'] = Header(h, header_name='List') 1411 eq(msg.as_string(maxheaderlen=78), """\ 1412List: List-Unsubscribe: 1413 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1414 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1415List: List-Unsubscribe: 1416 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1417 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe> 1418 1419""") 1420 1421 def test_long_rfc2047_header_with_embedded_fws(self): 1422 h = Header(textwrap.dedent("""\ 1423 We're going to pretend this header is in a non-ascii character set 1424 \tto see if line wrapping with encoded words and embedded 1425 folding white space works"""), 1426 charset='utf-8', 1427 header_name='Test') 1428 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1429 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1430 =?utf-8?q?cter_set?= 1431 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1432 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1433 1434 1435 1436# Test mangling of "From " lines in the body of a message 1437class TestFromMangling(unittest.TestCase): 1438 def setUp(self): 1439 self.msg = Message() 1440 self.msg['From'] = 'aaa@bbb.org' 1441 self.msg.set_payload("""\ 1442From the desk of A.A.A.: 1443Blah blah blah 1444""") 1445 1446 def test_mangled_from(self): 1447 s = StringIO() 1448 g = Generator(s, mangle_from_=True) 1449 g.flatten(self.msg) 1450 self.assertEqual(s.getvalue(), """\ 1451From: aaa@bbb.org 1452 1453>From the desk of A.A.A.: 1454Blah blah blah 1455""") 1456 1457 def test_dont_mangle_from(self): 1458 s = StringIO() 1459 g = Generator(s, mangle_from_=False) 1460 g.flatten(self.msg) 1461 self.assertEqual(s.getvalue(), """\ 1462From: aaa@bbb.org 1463 1464From the desk of A.A.A.: 1465Blah blah blah 1466""") 1467 1468 def test_mangle_from_in_preamble_and_epilog(self): 1469 s = StringIO() 1470 g = Generator(s, mangle_from_=True) 1471 msg = email.message_from_string(textwrap.dedent("""\ 1472 From: foo@bar.com 1473 Mime-Version: 1.0 1474 Content-Type: multipart/mixed; boundary=XXX 1475 1476 From somewhere unknown 1477 1478 --XXX 1479 Content-Type: text/plain 1480 1481 foo 1482 1483 --XXX-- 1484 1485 From somewhere unknowable 1486 """)) 1487 g.flatten(msg) 1488 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1489 if x.startswith('>From ')]), 2) 1490 1491 def test_mangled_from_with_bad_bytes(self): 1492 source = textwrap.dedent("""\ 1493 Content-Type: text/plain; charset="utf-8" 1494 MIME-Version: 1.0 1495 Content-Transfer-Encoding: 8bit 1496 From: aaa@bbb.org 1497 1498 """).encode('utf-8') 1499 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1500 b = BytesIO() 1501 g = BytesGenerator(b, mangle_from_=True) 1502 g.flatten(msg) 1503 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1504 1505 def test_multipart_with_bad_bytes_in_cte(self): 1506 # bpo30835 1507 source = textwrap.dedent("""\ 1508 From: aperson@example.com 1509 Content-Type: multipart/mixed; boundary="1" 1510 Content-Transfer-Encoding: \xc8 1511 """).encode('utf-8') 1512 msg = email.message_from_bytes(source) 1513 1514 1515# Test the basic MIMEAudio class 1516class TestMIMEAudio(unittest.TestCase): 1517 def setUp(self): 1518 with openfile('audiotest.au', 'rb') as fp: 1519 self._audiodata = fp.read() 1520 self._au = MIMEAudio(self._audiodata) 1521 1522 def test_guess_minor_type(self): 1523 self.assertEqual(self._au.get_content_type(), 'audio/basic') 1524 1525 def test_encoding(self): 1526 payload = self._au.get_payload() 1527 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1528 self._audiodata) 1529 1530 def test_checkSetMinor(self): 1531 au = MIMEAudio(self._audiodata, 'fish') 1532 self.assertEqual(au.get_content_type(), 'audio/fish') 1533 1534 def test_add_header(self): 1535 eq = self.assertEqual 1536 self._au.add_header('Content-Disposition', 'attachment', 1537 filename='audiotest.au') 1538 eq(self._au['content-disposition'], 1539 'attachment; filename="audiotest.au"') 1540 eq(self._au.get_params(header='content-disposition'), 1541 [('attachment', ''), ('filename', 'audiotest.au')]) 1542 eq(self._au.get_param('filename', header='content-disposition'), 1543 'audiotest.au') 1544 missing = [] 1545 eq(self._au.get_param('attachment', header='content-disposition'), '') 1546 self.assertIs(self._au.get_param('foo', failobj=missing, 1547 header='content-disposition'), missing) 1548 # Try some missing stuff 1549 self.assertIs(self._au.get_param('foobar', missing), missing) 1550 self.assertIs(self._au.get_param('attachment', missing, 1551 header='foobar'), missing) 1552 1553 1554 1555# Test the basic MIMEImage class 1556class TestMIMEImage(unittest.TestCase): 1557 def setUp(self): 1558 with openfile('PyBanner048.gif', 'rb') as fp: 1559 self._imgdata = fp.read() 1560 self._im = MIMEImage(self._imgdata) 1561 1562 def test_guess_minor_type(self): 1563 self.assertEqual(self._im.get_content_type(), 'image/gif') 1564 1565 def test_encoding(self): 1566 payload = self._im.get_payload() 1567 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1568 self._imgdata) 1569 1570 def test_checkSetMinor(self): 1571 im = MIMEImage(self._imgdata, 'fish') 1572 self.assertEqual(im.get_content_type(), 'image/fish') 1573 1574 def test_add_header(self): 1575 eq = self.assertEqual 1576 self._im.add_header('Content-Disposition', 'attachment', 1577 filename='dingusfish.gif') 1578 eq(self._im['content-disposition'], 1579 'attachment; filename="dingusfish.gif"') 1580 eq(self._im.get_params(header='content-disposition'), 1581 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1582 eq(self._im.get_param('filename', header='content-disposition'), 1583 'dingusfish.gif') 1584 missing = [] 1585 eq(self._im.get_param('attachment', header='content-disposition'), '') 1586 self.assertIs(self._im.get_param('foo', failobj=missing, 1587 header='content-disposition'), missing) 1588 # Try some missing stuff 1589 self.assertIs(self._im.get_param('foobar', missing), missing) 1590 self.assertIs(self._im.get_param('attachment', missing, 1591 header='foobar'), missing) 1592 1593 1594 1595# Test the basic MIMEApplication class 1596class TestMIMEApplication(unittest.TestCase): 1597 def test_headers(self): 1598 eq = self.assertEqual 1599 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1600 eq(msg.get_content_type(), 'application/octet-stream') 1601 eq(msg['content-transfer-encoding'], 'base64') 1602 1603 def test_body(self): 1604 eq = self.assertEqual 1605 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1606 msg = MIMEApplication(bytesdata) 1607 # whitespace in the cte encoded block is RFC-irrelevant. 1608 eq(msg.get_payload().strip(), '+vv8/f7/') 1609 eq(msg.get_payload(decode=True), bytesdata) 1610 1611 def test_binary_body_with_encode_7or8bit(self): 1612 # Issue 17171. 1613 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1614 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1615 # Treated as a string, this will be invalid code points. 1616 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1617 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1618 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1619 s = BytesIO() 1620 g = BytesGenerator(s) 1621 g.flatten(msg) 1622 wireform = s.getvalue() 1623 msg2 = email.message_from_bytes(wireform) 1624 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1625 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1626 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1627 1628 def test_binary_body_with_encode_noop(self): 1629 # Issue 16564: This does not produce an RFC valid message, since to be 1630 # valid it should have a CTE of binary. But the below works in 1631 # Python2, and is documented as working this way. 1632 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1633 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1634 # Treated as a string, this will be invalid code points. 1635 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1636 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1637 s = BytesIO() 1638 g = BytesGenerator(s) 1639 g.flatten(msg) 1640 wireform = s.getvalue() 1641 msg2 = email.message_from_bytes(wireform) 1642 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1643 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1644 1645 def test_binary_body_with_unicode_linend_encode_noop(self): 1646 # Issue 19003: This is a variation on #16564. 1647 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1648 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1649 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1650 s = BytesIO() 1651 g = BytesGenerator(s) 1652 g.flatten(msg) 1653 wireform = s.getvalue() 1654 msg2 = email.message_from_bytes(wireform) 1655 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1656 1657 def test_binary_body_with_encode_quopri(self): 1658 # Issue 14360. 1659 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1660 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1661 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1662 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1663 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1664 s = BytesIO() 1665 g = BytesGenerator(s) 1666 g.flatten(msg) 1667 wireform = s.getvalue() 1668 msg2 = email.message_from_bytes(wireform) 1669 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1670 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1671 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1672 1673 def test_binary_body_with_encode_base64(self): 1674 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1675 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1676 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1677 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1678 s = BytesIO() 1679 g = BytesGenerator(s) 1680 g.flatten(msg) 1681 wireform = s.getvalue() 1682 msg2 = email.message_from_bytes(wireform) 1683 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1684 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1685 1686 1687# Test the basic MIMEText class 1688class TestMIMEText(unittest.TestCase): 1689 def setUp(self): 1690 self._msg = MIMEText('hello there') 1691 1692 def test_types(self): 1693 eq = self.assertEqual 1694 eq(self._msg.get_content_type(), 'text/plain') 1695 eq(self._msg.get_param('charset'), 'us-ascii') 1696 missing = [] 1697 self.assertIs(self._msg.get_param('foobar', missing), missing) 1698 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1699 missing) 1700 1701 def test_payload(self): 1702 self.assertEqual(self._msg.get_payload(), 'hello there') 1703 self.assertFalse(self._msg.is_multipart()) 1704 1705 def test_charset(self): 1706 eq = self.assertEqual 1707 msg = MIMEText('hello there', _charset='us-ascii') 1708 eq(msg.get_charset().input_charset, 'us-ascii') 1709 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1710 # Also accept a Charset instance 1711 charset = Charset('utf-8') 1712 charset.body_encoding = None 1713 msg = MIMEText('hello there', _charset=charset) 1714 eq(msg.get_charset().input_charset, 'utf-8') 1715 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1716 eq(msg.get_payload(), 'hello there') 1717 1718 def test_7bit_input(self): 1719 eq = self.assertEqual 1720 msg = MIMEText('hello there', _charset='us-ascii') 1721 eq(msg.get_charset().input_charset, 'us-ascii') 1722 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1723 1724 def test_7bit_input_no_charset(self): 1725 eq = self.assertEqual 1726 msg = MIMEText('hello there') 1727 eq(msg.get_charset(), 'us-ascii') 1728 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1729 self.assertIn('hello there', msg.as_string()) 1730 1731 def test_utf8_input(self): 1732 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1733 eq = self.assertEqual 1734 msg = MIMEText(teststr, _charset='utf-8') 1735 eq(msg.get_charset().output_charset, 'utf-8') 1736 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1737 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1738 1739 @unittest.skip("can't fix because of backward compat in email5, " 1740 "will fix in email6") 1741 def test_utf8_input_no_charset(self): 1742 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1743 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1744 1745 1746 1747# Test complicated multipart/* messages 1748class TestMultipart(TestEmailBase): 1749 def setUp(self): 1750 with openfile('PyBanner048.gif', 'rb') as fp: 1751 data = fp.read() 1752 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1753 image = MIMEImage(data, name='dingusfish.gif') 1754 image.add_header('content-disposition', 'attachment', 1755 filename='dingusfish.gif') 1756 intro = MIMEText('''\ 1757Hi there, 1758 1759This is the dingus fish. 1760''') 1761 container.attach(intro) 1762 container.attach(image) 1763 container['From'] = 'Barry <barry@digicool.com>' 1764 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>' 1765 container['Subject'] = 'Here is your dingus fish' 1766 1767 now = 987809702.54848599 1768 timetuple = time.localtime(now) 1769 if timetuple[-1] == 0: 1770 tzsecs = time.timezone 1771 else: 1772 tzsecs = time.altzone 1773 if tzsecs > 0: 1774 sign = '-' 1775 else: 1776 sign = '+' 1777 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1778 container['Date'] = time.strftime( 1779 '%a, %d %b %Y %H:%M:%S', 1780 time.localtime(now)) + tzoffset 1781 self._msg = container 1782 self._im = image 1783 self._txt = intro 1784 1785 def test_hierarchy(self): 1786 # convenience 1787 eq = self.assertEqual 1788 raises = self.assertRaises 1789 # tests 1790 m = self._msg 1791 self.assertTrue(m.is_multipart()) 1792 eq(m.get_content_type(), 'multipart/mixed') 1793 eq(len(m.get_payload()), 2) 1794 raises(IndexError, m.get_payload, 2) 1795 m0 = m.get_payload(0) 1796 m1 = m.get_payload(1) 1797 self.assertIs(m0, self._txt) 1798 self.assertIs(m1, self._im) 1799 eq(m.get_payload(), [m0, m1]) 1800 self.assertFalse(m0.is_multipart()) 1801 self.assertFalse(m1.is_multipart()) 1802 1803 def test_empty_multipart_idempotent(self): 1804 text = """\ 1805Content-Type: multipart/mixed; boundary="BOUNDARY" 1806MIME-Version: 1.0 1807Subject: A subject 1808To: aperson@dom.ain 1809From: bperson@dom.ain 1810 1811 1812--BOUNDARY 1813 1814 1815--BOUNDARY-- 1816""" 1817 msg = Parser().parsestr(text) 1818 self.ndiffAssertEqual(text, msg.as_string()) 1819 1820 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1821 outer = MIMEBase('multipart', 'mixed') 1822 outer['Subject'] = 'A subject' 1823 outer['To'] = 'aperson@dom.ain' 1824 outer['From'] = 'bperson@dom.ain' 1825 outer.set_boundary('BOUNDARY') 1826 self.ndiffAssertEqual(outer.as_string(), '''\ 1827Content-Type: multipart/mixed; boundary="BOUNDARY" 1828MIME-Version: 1.0 1829Subject: A subject 1830To: aperson@dom.ain 1831From: bperson@dom.ain 1832 1833--BOUNDARY 1834 1835--BOUNDARY-- 1836''') 1837 1838 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1839 outer = MIMEBase('multipart', 'mixed') 1840 outer['Subject'] = 'A subject' 1841 outer['To'] = 'aperson@dom.ain' 1842 outer['From'] = 'bperson@dom.ain' 1843 outer.preamble = '' 1844 outer.epilogue = '' 1845 outer.set_boundary('BOUNDARY') 1846 self.ndiffAssertEqual(outer.as_string(), '''\ 1847Content-Type: multipart/mixed; boundary="BOUNDARY" 1848MIME-Version: 1.0 1849Subject: A subject 1850To: aperson@dom.ain 1851From: bperson@dom.ain 1852 1853 1854--BOUNDARY 1855 1856--BOUNDARY-- 1857''') 1858 1859 def test_one_part_in_a_multipart(self): 1860 eq = self.ndiffAssertEqual 1861 outer = MIMEBase('multipart', 'mixed') 1862 outer['Subject'] = 'A subject' 1863 outer['To'] = 'aperson@dom.ain' 1864 outer['From'] = 'bperson@dom.ain' 1865 outer.set_boundary('BOUNDARY') 1866 msg = MIMEText('hello world') 1867 outer.attach(msg) 1868 eq(outer.as_string(), '''\ 1869Content-Type: multipart/mixed; boundary="BOUNDARY" 1870MIME-Version: 1.0 1871Subject: A subject 1872To: aperson@dom.ain 1873From: bperson@dom.ain 1874 1875--BOUNDARY 1876Content-Type: text/plain; charset="us-ascii" 1877MIME-Version: 1.0 1878Content-Transfer-Encoding: 7bit 1879 1880hello world 1881--BOUNDARY-- 1882''') 1883 1884 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1885 eq = self.ndiffAssertEqual 1886 outer = MIMEBase('multipart', 'mixed') 1887 outer['Subject'] = 'A subject' 1888 outer['To'] = 'aperson@dom.ain' 1889 outer['From'] = 'bperson@dom.ain' 1890 outer.preamble = '' 1891 msg = MIMEText('hello world') 1892 outer.attach(msg) 1893 outer.set_boundary('BOUNDARY') 1894 eq(outer.as_string(), '''\ 1895Content-Type: multipart/mixed; boundary="BOUNDARY" 1896MIME-Version: 1.0 1897Subject: A subject 1898To: aperson@dom.ain 1899From: bperson@dom.ain 1900 1901 1902--BOUNDARY 1903Content-Type: text/plain; charset="us-ascii" 1904MIME-Version: 1.0 1905Content-Transfer-Encoding: 7bit 1906 1907hello world 1908--BOUNDARY-- 1909''') 1910 1911 1912 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1913 eq = self.ndiffAssertEqual 1914 outer = MIMEBase('multipart', 'mixed') 1915 outer['Subject'] = 'A subject' 1916 outer['To'] = 'aperson@dom.ain' 1917 outer['From'] = 'bperson@dom.ain' 1918 outer.preamble = None 1919 msg = MIMEText('hello world') 1920 outer.attach(msg) 1921 outer.set_boundary('BOUNDARY') 1922 eq(outer.as_string(), '''\ 1923Content-Type: multipart/mixed; boundary="BOUNDARY" 1924MIME-Version: 1.0 1925Subject: A subject 1926To: aperson@dom.ain 1927From: bperson@dom.ain 1928 1929--BOUNDARY 1930Content-Type: text/plain; charset="us-ascii" 1931MIME-Version: 1.0 1932Content-Transfer-Encoding: 7bit 1933 1934hello world 1935--BOUNDARY-- 1936''') 1937 1938 1939 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1940 eq = self.ndiffAssertEqual 1941 outer = MIMEBase('multipart', 'mixed') 1942 outer['Subject'] = 'A subject' 1943 outer['To'] = 'aperson@dom.ain' 1944 outer['From'] = 'bperson@dom.ain' 1945 outer.epilogue = None 1946 msg = MIMEText('hello world') 1947 outer.attach(msg) 1948 outer.set_boundary('BOUNDARY') 1949 eq(outer.as_string(), '''\ 1950Content-Type: multipart/mixed; boundary="BOUNDARY" 1951MIME-Version: 1.0 1952Subject: A subject 1953To: aperson@dom.ain 1954From: bperson@dom.ain 1955 1956--BOUNDARY 1957Content-Type: text/plain; charset="us-ascii" 1958MIME-Version: 1.0 1959Content-Transfer-Encoding: 7bit 1960 1961hello world 1962--BOUNDARY-- 1963''') 1964 1965 1966 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 1967 eq = self.ndiffAssertEqual 1968 outer = MIMEBase('multipart', 'mixed') 1969 outer['Subject'] = 'A subject' 1970 outer['To'] = 'aperson@dom.ain' 1971 outer['From'] = 'bperson@dom.ain' 1972 outer.epilogue = '' 1973 msg = MIMEText('hello world') 1974 outer.attach(msg) 1975 outer.set_boundary('BOUNDARY') 1976 eq(outer.as_string(), '''\ 1977Content-Type: multipart/mixed; boundary="BOUNDARY" 1978MIME-Version: 1.0 1979Subject: A subject 1980To: aperson@dom.ain 1981From: bperson@dom.ain 1982 1983--BOUNDARY 1984Content-Type: text/plain; charset="us-ascii" 1985MIME-Version: 1.0 1986Content-Transfer-Encoding: 7bit 1987 1988hello world 1989--BOUNDARY-- 1990''') 1991 1992 1993 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 1994 eq = self.ndiffAssertEqual 1995 outer = MIMEBase('multipart', 'mixed') 1996 outer['Subject'] = 'A subject' 1997 outer['To'] = 'aperson@dom.ain' 1998 outer['From'] = 'bperson@dom.ain' 1999 outer.epilogue = '\n' 2000 msg = MIMEText('hello world') 2001 outer.attach(msg) 2002 outer.set_boundary('BOUNDARY') 2003 eq(outer.as_string(), '''\ 2004Content-Type: multipart/mixed; boundary="BOUNDARY" 2005MIME-Version: 1.0 2006Subject: A subject 2007To: aperson@dom.ain 2008From: bperson@dom.ain 2009 2010--BOUNDARY 2011Content-Type: text/plain; charset="us-ascii" 2012MIME-Version: 1.0 2013Content-Transfer-Encoding: 7bit 2014 2015hello world 2016--BOUNDARY-- 2017 2018''') 2019 2020 def test_message_external_body(self): 2021 eq = self.assertEqual 2022 msg = self._msgobj('msg_36.txt') 2023 eq(len(msg.get_payload()), 2) 2024 msg1 = msg.get_payload(1) 2025 eq(msg1.get_content_type(), 'multipart/alternative') 2026 eq(len(msg1.get_payload()), 2) 2027 for subpart in msg1.get_payload(): 2028 eq(subpart.get_content_type(), 'message/external-body') 2029 eq(len(subpart.get_payload()), 1) 2030 subsubpart = subpart.get_payload(0) 2031 eq(subsubpart.get_content_type(), 'text/plain') 2032 2033 def test_double_boundary(self): 2034 # msg_37.txt is a multipart that contains two dash-boundary's in a 2035 # row. Our interpretation of RFC 2046 calls for ignoring the second 2036 # and subsequent boundaries. 2037 msg = self._msgobj('msg_37.txt') 2038 self.assertEqual(len(msg.get_payload()), 3) 2039 2040 def test_nested_inner_contains_outer_boundary(self): 2041 eq = self.ndiffAssertEqual 2042 # msg_38.txt has an inner part that contains outer boundaries. My 2043 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2044 # these are illegal and should be interpreted as unterminated inner 2045 # parts. 2046 msg = self._msgobj('msg_38.txt') 2047 sfp = StringIO() 2048 iterators._structure(msg, sfp) 2049 eq(sfp.getvalue(), """\ 2050multipart/mixed 2051 multipart/mixed 2052 multipart/alternative 2053 text/plain 2054 text/plain 2055 text/plain 2056 text/plain 2057""") 2058 2059 def test_nested_with_same_boundary(self): 2060 eq = self.ndiffAssertEqual 2061 # msg 39.txt is similarly evil in that it's got inner parts that use 2062 # the same boundary as outer parts. Again, I believe the way this is 2063 # parsed is closest to the spirit of RFC 2046 2064 msg = self._msgobj('msg_39.txt') 2065 sfp = StringIO() 2066 iterators._structure(msg, sfp) 2067 eq(sfp.getvalue(), """\ 2068multipart/mixed 2069 multipart/mixed 2070 multipart/alternative 2071 application/octet-stream 2072 application/octet-stream 2073 text/plain 2074""") 2075 2076 def test_boundary_in_non_multipart(self): 2077 msg = self._msgobj('msg_40.txt') 2078 self.assertEqual(msg.as_string(), '''\ 2079MIME-Version: 1.0 2080Content-Type: text/html; boundary="--961284236552522269" 2081 2082----961284236552522269 2083Content-Type: text/html; 2084Content-Transfer-Encoding: 7Bit 2085 2086<html></html> 2087 2088----961284236552522269-- 2089''') 2090 2091 def test_boundary_with_leading_space(self): 2092 eq = self.assertEqual 2093 msg = email.message_from_string('''\ 2094MIME-Version: 1.0 2095Content-Type: multipart/mixed; boundary=" XXXX" 2096 2097-- XXXX 2098Content-Type: text/plain 2099 2100 2101-- XXXX 2102Content-Type: text/plain 2103 2104-- XXXX-- 2105''') 2106 self.assertTrue(msg.is_multipart()) 2107 eq(msg.get_boundary(), ' XXXX') 2108 eq(len(msg.get_payload()), 2) 2109 2110 def test_boundary_without_trailing_newline(self): 2111 m = Parser().parsestr("""\ 2112Content-Type: multipart/mixed; boundary="===============0012394164==" 2113MIME-Version: 1.0 2114 2115--===============0012394164== 2116Content-Type: image/file1.jpg 2117MIME-Version: 1.0 2118Content-Transfer-Encoding: base64 2119 2120YXNkZg== 2121--===============0012394164==--""") 2122 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2123 2124 def test_mimebase_default_policy(self): 2125 m = MIMEBase('multipart', 'mixed') 2126 self.assertIs(m.policy, email.policy.compat32) 2127 2128 def test_mimebase_custom_policy(self): 2129 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2130 self.assertIs(m.policy, email.policy.default) 2131 2132# Test some badly formatted messages 2133class TestNonConformant(TestEmailBase): 2134 2135 def test_parse_missing_minor_type(self): 2136 eq = self.assertEqual 2137 msg = self._msgobj('msg_14.txt') 2138 eq(msg.get_content_type(), 'text/plain') 2139 eq(msg.get_content_maintype(), 'text') 2140 eq(msg.get_content_subtype(), 'plain') 2141 2142 # test_defect_handling 2143 def test_same_boundary_inner_outer(self): 2144 msg = self._msgobj('msg_15.txt') 2145 # XXX We can probably eventually do better 2146 inner = msg.get_payload(0) 2147 self.assertTrue(hasattr(inner, 'defects')) 2148 self.assertEqual(len(inner.defects), 1) 2149 self.assertIsInstance(inner.defects[0], 2150 errors.StartBoundaryNotFoundDefect) 2151 2152 # test_defect_handling 2153 def test_multipart_no_boundary(self): 2154 msg = self._msgobj('msg_25.txt') 2155 self.assertIsInstance(msg.get_payload(), str) 2156 self.assertEqual(len(msg.defects), 2) 2157 self.assertIsInstance(msg.defects[0], 2158 errors.NoBoundaryInMultipartDefect) 2159 self.assertIsInstance(msg.defects[1], 2160 errors.MultipartInvariantViolationDefect) 2161 2162 multipart_msg = textwrap.dedent("""\ 2163 Date: Wed, 14 Nov 2007 12:56:23 GMT 2164 From: foo@bar.invalid 2165 To: foo@bar.invalid 2166 Subject: Content-Transfer-Encoding: base64 and multipart 2167 MIME-Version: 1.0 2168 Content-Type: multipart/mixed; 2169 boundary="===============3344438784458119861=="{} 2170 2171 --===============3344438784458119861== 2172 Content-Type: text/plain 2173 2174 Test message 2175 2176 --===============3344438784458119861== 2177 Content-Type: application/octet-stream 2178 Content-Transfer-Encoding: base64 2179 2180 YWJj 2181 2182 --===============3344438784458119861==-- 2183 """) 2184 2185 # test_defect_handling 2186 def test_multipart_invalid_cte(self): 2187 msg = self._str_msg( 2188 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2189 self.assertEqual(len(msg.defects), 1) 2190 self.assertIsInstance(msg.defects[0], 2191 errors.InvalidMultipartContentTransferEncodingDefect) 2192 2193 # test_defect_handling 2194 def test_multipart_no_cte_no_defect(self): 2195 msg = self._str_msg(self.multipart_msg.format('')) 2196 self.assertEqual(len(msg.defects), 0) 2197 2198 # test_defect_handling 2199 def test_multipart_valid_cte_no_defect(self): 2200 for cte in ('7bit', '8bit', 'BINary'): 2201 msg = self._str_msg( 2202 self.multipart_msg.format( 2203 "\nContent-Transfer-Encoding: {}".format(cte))) 2204 self.assertEqual(len(msg.defects), 0) 2205 2206 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2207 def test_invalid_content_type(self): 2208 eq = self.assertEqual 2209 neq = self.ndiffAssertEqual 2210 msg = Message() 2211 # RFC 2045, $5.2 says invalid yields text/plain 2212 msg['Content-Type'] = 'text' 2213 eq(msg.get_content_maintype(), 'text') 2214 eq(msg.get_content_subtype(), 'plain') 2215 eq(msg.get_content_type(), 'text/plain') 2216 # Clear the old value and try something /really/ invalid 2217 del msg['content-type'] 2218 msg['Content-Type'] = 'foo' 2219 eq(msg.get_content_maintype(), 'text') 2220 eq(msg.get_content_subtype(), 'plain') 2221 eq(msg.get_content_type(), 'text/plain') 2222 # Still, make sure that the message is idempotently generated 2223 s = StringIO() 2224 g = Generator(s) 2225 g.flatten(msg) 2226 neq(s.getvalue(), 'Content-Type: foo\n\n') 2227 2228 def test_no_start_boundary(self): 2229 eq = self.ndiffAssertEqual 2230 msg = self._msgobj('msg_31.txt') 2231 eq(msg.get_payload(), """\ 2232--BOUNDARY 2233Content-Type: text/plain 2234 2235message 1 2236 2237--BOUNDARY 2238Content-Type: text/plain 2239 2240message 2 2241 2242--BOUNDARY-- 2243""") 2244 2245 def test_no_separating_blank_line(self): 2246 eq = self.ndiffAssertEqual 2247 msg = self._msgobj('msg_35.txt') 2248 eq(msg.as_string(), """\ 2249From: aperson@dom.ain 2250To: bperson@dom.ain 2251Subject: here's something interesting 2252 2253counter to RFC 2822, there's no separating newline here 2254""") 2255 2256 # test_defect_handling 2257 def test_lying_multipart(self): 2258 msg = self._msgobj('msg_41.txt') 2259 self.assertTrue(hasattr(msg, 'defects')) 2260 self.assertEqual(len(msg.defects), 2) 2261 self.assertIsInstance(msg.defects[0], 2262 errors.NoBoundaryInMultipartDefect) 2263 self.assertIsInstance(msg.defects[1], 2264 errors.MultipartInvariantViolationDefect) 2265 2266 # test_defect_handling 2267 def test_missing_start_boundary(self): 2268 outer = self._msgobj('msg_42.txt') 2269 # The message structure is: 2270 # 2271 # multipart/mixed 2272 # text/plain 2273 # message/rfc822 2274 # multipart/mixed [*] 2275 # 2276 # [*] This message is missing its start boundary 2277 bad = outer.get_payload(1).get_payload(0) 2278 self.assertEqual(len(bad.defects), 1) 2279 self.assertIsInstance(bad.defects[0], 2280 errors.StartBoundaryNotFoundDefect) 2281 2282 # test_defect_handling 2283 def test_first_line_is_continuation_header(self): 2284 eq = self.assertEqual 2285 m = ' Line 1\nSubject: test\n\nbody' 2286 msg = email.message_from_string(m) 2287 eq(msg.keys(), ['Subject']) 2288 eq(msg.get_payload(), 'body') 2289 eq(len(msg.defects), 1) 2290 self.assertDefectsEqual(msg.defects, 2291 [errors.FirstHeaderLineIsContinuationDefect]) 2292 eq(msg.defects[0].line, ' Line 1\n') 2293 2294 # test_defect_handling 2295 def test_missing_header_body_separator(self): 2296 # Our heuristic if we see a line that doesn't look like a header (no 2297 # leading whitespace but no ':') is to assume that the blank line that 2298 # separates the header from the body is missing, and to stop parsing 2299 # headers and start parsing the body. 2300 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2301 self.assertEqual(msg.keys(), ['Subject']) 2302 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2303 self.assertDefectsEqual(msg.defects, 2304 [errors.MissingHeaderBodySeparatorDefect]) 2305 2306 2307# Test RFC 2047 header encoding and decoding 2308class TestRFC2047(TestEmailBase): 2309 def test_rfc2047_multiline(self): 2310 eq = self.assertEqual 2311 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2312 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2313 dh = decode_header(s) 2314 eq(dh, [ 2315 (b'Re: ', None), 2316 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2317 (b' baz foo bar ', None), 2318 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2319 header = make_header(dh) 2320 eq(str(header), 2321 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2322 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2323Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2324 =?mac-iceland?q?=9Arg=8Cs?=""") 2325 2326 def test_whitespace_keeper_unicode(self): 2327 eq = self.assertEqual 2328 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' 2329 dh = decode_header(s) 2330 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2331 (b' Pirard <pirard@dom.ain>', None)]) 2332 header = str(make_header(dh)) 2333 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>') 2334 2335 def test_whitespace_keeper_unicode_2(self): 2336 eq = self.assertEqual 2337 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2338 dh = decode_header(s) 2339 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2340 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2341 hu = str(make_header(dh)) 2342 eq(hu, 'The quick brown fox jumped over the lazy dog') 2343 2344 def test_rfc2047_missing_whitespace(self): 2345 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2346 dh = decode_header(s) 2347 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2348 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2349 (b'sbord', None)]) 2350 2351 def test_rfc2047_with_whitespace(self): 2352 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2353 dh = decode_header(s) 2354 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2355 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2356 (b' sbord', None)]) 2357 2358 def test_rfc2047_B_bad_padding(self): 2359 s = '=?iso-8859-1?B?%s?=' 2360 data = [ # only test complete bytes 2361 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2362 ('dmk=', b'vi'), ('dmk', b'vi') 2363 ] 2364 for q, a in data: 2365 dh = decode_header(s % q) 2366 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2367 2368 def test_rfc2047_Q_invalid_digits(self): 2369 # issue 10004. 2370 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2371 self.assertEqual(decode_header(s), 2372 [(b'andr\xe9=zz', 'iso-8859-1')]) 2373 2374 def test_rfc2047_rfc2047_1(self): 2375 # 1st testcase at end of rfc2047 2376 s = '(=?ISO-8859-1?Q?a?=)' 2377 self.assertEqual(decode_header(s), 2378 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2379 2380 def test_rfc2047_rfc2047_2(self): 2381 # 2nd testcase at end of rfc2047 2382 s = '(=?ISO-8859-1?Q?a?= b)' 2383 self.assertEqual(decode_header(s), 2384 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2385 2386 def test_rfc2047_rfc2047_3(self): 2387 # 3rd testcase at end of rfc2047 2388 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2389 self.assertEqual(decode_header(s), 2390 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2391 2392 def test_rfc2047_rfc2047_4(self): 2393 # 4th testcase at end of rfc2047 2394 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2395 self.assertEqual(decode_header(s), 2396 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2397 2398 def test_rfc2047_rfc2047_5a(self): 2399 # 5th testcase at end of rfc2047 newline is \r\n 2400 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2401 self.assertEqual(decode_header(s), 2402 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2403 2404 def test_rfc2047_rfc2047_5b(self): 2405 # 5th testcase at end of rfc2047 newline is \n 2406 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2407 self.assertEqual(decode_header(s), 2408 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2409 2410 def test_rfc2047_rfc2047_6(self): 2411 # 6th testcase at end of rfc2047 2412 s = '(=?ISO-8859-1?Q?a_b?=)' 2413 self.assertEqual(decode_header(s), 2414 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2415 2416 def test_rfc2047_rfc2047_7(self): 2417 # 7th testcase at end of rfc2047 2418 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2419 self.assertEqual(decode_header(s), 2420 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2421 (b')', None)]) 2422 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2423 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2424 2425 def test_multiline_header(self): 2426 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>' 2427 self.assertEqual(decode_header(s), 2428 [(b'"M\xfcller T"', 'windows-1252'), 2429 (b'<T.Mueller@xxx.com>', None)]) 2430 self.assertEqual(make_header(decode_header(s)).encode(), 2431 ''.join(s.splitlines())) 2432 self.assertEqual(str(make_header(decode_header(s))), 2433 '"Müller T" <T.Mueller@xxx.com>') 2434 2435 2436# Test the MIMEMessage class 2437class TestMIMEMessage(TestEmailBase): 2438 def setUp(self): 2439 with openfile('msg_11.txt', encoding="utf-8") as fp: 2440 self._text = fp.read() 2441 2442 def test_type_error(self): 2443 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2444 2445 def test_valid_argument(self): 2446 eq = self.assertEqual 2447 subject = 'A sub-message' 2448 m = Message() 2449 m['Subject'] = subject 2450 r = MIMEMessage(m) 2451 eq(r.get_content_type(), 'message/rfc822') 2452 payload = r.get_payload() 2453 self.assertIsInstance(payload, list) 2454 eq(len(payload), 1) 2455 subpart = payload[0] 2456 self.assertIs(subpart, m) 2457 eq(subpart['subject'], subject) 2458 2459 def test_bad_multipart(self): 2460 msg1 = Message() 2461 msg1['Subject'] = 'subpart 1' 2462 msg2 = Message() 2463 msg2['Subject'] = 'subpart 2' 2464 r = MIMEMessage(msg1) 2465 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2466 2467 def test_generate(self): 2468 # First craft the message to be encapsulated 2469 m = Message() 2470 m['Subject'] = 'An enclosed message' 2471 m.set_payload('Here is the body of the message.\n') 2472 r = MIMEMessage(m) 2473 r['Subject'] = 'The enclosing message' 2474 s = StringIO() 2475 g = Generator(s) 2476 g.flatten(r) 2477 self.assertEqual(s.getvalue(), """\ 2478Content-Type: message/rfc822 2479MIME-Version: 1.0 2480Subject: The enclosing message 2481 2482Subject: An enclosed message 2483 2484Here is the body of the message. 2485""") 2486 2487 def test_parse_message_rfc822(self): 2488 eq = self.assertEqual 2489 msg = self._msgobj('msg_11.txt') 2490 eq(msg.get_content_type(), 'message/rfc822') 2491 payload = msg.get_payload() 2492 self.assertIsInstance(payload, list) 2493 eq(len(payload), 1) 2494 submsg = payload[0] 2495 self.assertIsInstance(submsg, Message) 2496 eq(submsg['subject'], 'An enclosed message') 2497 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2498 2499 def test_dsn(self): 2500 eq = self.assertEqual 2501 # msg 16 is a Delivery Status Notification, see RFC 1894 2502 msg = self._msgobj('msg_16.txt') 2503 eq(msg.get_content_type(), 'multipart/report') 2504 self.assertTrue(msg.is_multipart()) 2505 eq(len(msg.get_payload()), 3) 2506 # Subpart 1 is a text/plain, human readable section 2507 subpart = msg.get_payload(0) 2508 eq(subpart.get_content_type(), 'text/plain') 2509 eq(subpart.get_payload(), """\ 2510This report relates to a message you sent with the following header fields: 2511 2512 Message-id: <002001c144a6$8752e060$56104586@oxy.edu> 2513 Date: Sun, 23 Sep 2001 20:10:55 -0700 2514 From: "Ian T. Henry" <henryi@oxy.edu> 2515 To: SoCal Raves <scr@socal-raves.org> 2516 Subject: [scr] yeah for Ians!! 2517 2518Your message cannot be delivered to the following recipients: 2519 2520 Recipient address: jangel1@cougar.noc.ucla.edu 2521 Reason: recipient reached disk quota 2522 2523""") 2524 # Subpart 2 contains the machine parsable DSN information. It 2525 # consists of two blocks of headers, represented by two nested Message 2526 # objects. 2527 subpart = msg.get_payload(1) 2528 eq(subpart.get_content_type(), 'message/delivery-status') 2529 eq(len(subpart.get_payload()), 2) 2530 # message/delivery-status should treat each block as a bunch of 2531 # headers, i.e. a bunch of Message objects. 2532 dsn1 = subpart.get_payload(0) 2533 self.assertIsInstance(dsn1, Message) 2534 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu') 2535 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2536 # Try a missing one <wink> 2537 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2538 dsn2 = subpart.get_payload(1) 2539 self.assertIsInstance(dsn2, Message) 2540 eq(dsn2['action'], 'failed') 2541 eq(dsn2.get_params(header='original-recipient'), 2542 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')]) 2543 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2544 # Subpart 3 is the original message 2545 subpart = msg.get_payload(2) 2546 eq(subpart.get_content_type(), 'message/rfc822') 2547 payload = subpart.get_payload() 2548 self.assertIsInstance(payload, list) 2549 eq(len(payload), 1) 2550 subsubpart = payload[0] 2551 self.assertIsInstance(subsubpart, Message) 2552 eq(subsubpart.get_content_type(), 'text/plain') 2553 eq(subsubpart['message-id'], 2554 '<002001c144a6$8752e060$56104586@oxy.edu>') 2555 2556 def test_epilogue(self): 2557 eq = self.ndiffAssertEqual 2558 with openfile('msg_21.txt', encoding="utf-8") as fp: 2559 text = fp.read() 2560 msg = Message() 2561 msg['From'] = 'aperson@dom.ain' 2562 msg['To'] = 'bperson@dom.ain' 2563 msg['Subject'] = 'Test' 2564 msg.preamble = 'MIME message' 2565 msg.epilogue = 'End of MIME message\n' 2566 msg1 = MIMEText('One') 2567 msg2 = MIMEText('Two') 2568 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2569 msg.attach(msg1) 2570 msg.attach(msg2) 2571 sfp = StringIO() 2572 g = Generator(sfp) 2573 g.flatten(msg) 2574 eq(sfp.getvalue(), text) 2575 2576 def test_no_nl_preamble(self): 2577 eq = self.ndiffAssertEqual 2578 msg = Message() 2579 msg['From'] = 'aperson@dom.ain' 2580 msg['To'] = 'bperson@dom.ain' 2581 msg['Subject'] = 'Test' 2582 msg.preamble = 'MIME message' 2583 msg.epilogue = '' 2584 msg1 = MIMEText('One') 2585 msg2 = MIMEText('Two') 2586 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2587 msg.attach(msg1) 2588 msg.attach(msg2) 2589 eq(msg.as_string(), """\ 2590From: aperson@dom.ain 2591To: bperson@dom.ain 2592Subject: Test 2593Content-Type: multipart/mixed; boundary="BOUNDARY" 2594 2595MIME message 2596--BOUNDARY 2597Content-Type: text/plain; charset="us-ascii" 2598MIME-Version: 1.0 2599Content-Transfer-Encoding: 7bit 2600 2601One 2602--BOUNDARY 2603Content-Type: text/plain; charset="us-ascii" 2604MIME-Version: 1.0 2605Content-Transfer-Encoding: 7bit 2606 2607Two 2608--BOUNDARY-- 2609""") 2610 2611 def test_default_type(self): 2612 eq = self.assertEqual 2613 with openfile('msg_30.txt', encoding="utf-8") as fp: 2614 msg = email.message_from_file(fp) 2615 container1 = msg.get_payload(0) 2616 eq(container1.get_default_type(), 'message/rfc822') 2617 eq(container1.get_content_type(), 'message/rfc822') 2618 container2 = msg.get_payload(1) 2619 eq(container2.get_default_type(), 'message/rfc822') 2620 eq(container2.get_content_type(), 'message/rfc822') 2621 container1a = container1.get_payload(0) 2622 eq(container1a.get_default_type(), 'text/plain') 2623 eq(container1a.get_content_type(), 'text/plain') 2624 container2a = container2.get_payload(0) 2625 eq(container2a.get_default_type(), 'text/plain') 2626 eq(container2a.get_content_type(), 'text/plain') 2627 2628 def test_default_type_with_explicit_container_type(self): 2629 eq = self.assertEqual 2630 with openfile('msg_28.txt', encoding="utf-8") as fp: 2631 msg = email.message_from_file(fp) 2632 container1 = msg.get_payload(0) 2633 eq(container1.get_default_type(), 'message/rfc822') 2634 eq(container1.get_content_type(), 'message/rfc822') 2635 container2 = msg.get_payload(1) 2636 eq(container2.get_default_type(), 'message/rfc822') 2637 eq(container2.get_content_type(), 'message/rfc822') 2638 container1a = container1.get_payload(0) 2639 eq(container1a.get_default_type(), 'text/plain') 2640 eq(container1a.get_content_type(), 'text/plain') 2641 container2a = container2.get_payload(0) 2642 eq(container2a.get_default_type(), 'text/plain') 2643 eq(container2a.get_content_type(), 'text/plain') 2644 2645 def test_default_type_non_parsed(self): 2646 eq = self.assertEqual 2647 neq = self.ndiffAssertEqual 2648 # Set up container 2649 container = MIMEMultipart('digest', 'BOUNDARY') 2650 container.epilogue = '' 2651 # Set up subparts 2652 subpart1a = MIMEText('message 1\n') 2653 subpart2a = MIMEText('message 2\n') 2654 subpart1 = MIMEMessage(subpart1a) 2655 subpart2 = MIMEMessage(subpart2a) 2656 container.attach(subpart1) 2657 container.attach(subpart2) 2658 eq(subpart1.get_content_type(), 'message/rfc822') 2659 eq(subpart1.get_default_type(), 'message/rfc822') 2660 eq(subpart2.get_content_type(), 'message/rfc822') 2661 eq(subpart2.get_default_type(), 'message/rfc822') 2662 neq(container.as_string(0), '''\ 2663Content-Type: multipart/digest; boundary="BOUNDARY" 2664MIME-Version: 1.0 2665 2666--BOUNDARY 2667Content-Type: message/rfc822 2668MIME-Version: 1.0 2669 2670Content-Type: text/plain; charset="us-ascii" 2671MIME-Version: 1.0 2672Content-Transfer-Encoding: 7bit 2673 2674message 1 2675 2676--BOUNDARY 2677Content-Type: message/rfc822 2678MIME-Version: 1.0 2679 2680Content-Type: text/plain; charset="us-ascii" 2681MIME-Version: 1.0 2682Content-Transfer-Encoding: 7bit 2683 2684message 2 2685 2686--BOUNDARY-- 2687''') 2688 del subpart1['content-type'] 2689 del subpart1['mime-version'] 2690 del subpart2['content-type'] 2691 del subpart2['mime-version'] 2692 eq(subpart1.get_content_type(), 'message/rfc822') 2693 eq(subpart1.get_default_type(), 'message/rfc822') 2694 eq(subpart2.get_content_type(), 'message/rfc822') 2695 eq(subpart2.get_default_type(), 'message/rfc822') 2696 neq(container.as_string(0), '''\ 2697Content-Type: multipart/digest; boundary="BOUNDARY" 2698MIME-Version: 1.0 2699 2700--BOUNDARY 2701 2702Content-Type: text/plain; charset="us-ascii" 2703MIME-Version: 1.0 2704Content-Transfer-Encoding: 7bit 2705 2706message 1 2707 2708--BOUNDARY 2709 2710Content-Type: text/plain; charset="us-ascii" 2711MIME-Version: 1.0 2712Content-Transfer-Encoding: 7bit 2713 2714message 2 2715 2716--BOUNDARY-- 2717''') 2718 2719 def test_mime_attachments_in_constructor(self): 2720 eq = self.assertEqual 2721 text1 = MIMEText('') 2722 text2 = MIMEText('') 2723 msg = MIMEMultipart(_subparts=(text1, text2)) 2724 eq(len(msg.get_payload()), 2) 2725 eq(msg.get_payload(0), text1) 2726 eq(msg.get_payload(1), text2) 2727 2728 def test_default_multipart_constructor(self): 2729 msg = MIMEMultipart() 2730 self.assertTrue(msg.is_multipart()) 2731 2732 def test_multipart_default_policy(self): 2733 msg = MIMEMultipart() 2734 msg['To'] = 'a@b.com' 2735 msg['To'] = 'c@d.com' 2736 self.assertEqual(msg.get_all('to'), ['a@b.com', 'c@d.com']) 2737 2738 def test_multipart_custom_policy(self): 2739 msg = MIMEMultipart(policy=email.policy.default) 2740 msg['To'] = 'a@b.com' 2741 with self.assertRaises(ValueError) as cm: 2742 msg['To'] = 'c@d.com' 2743 self.assertEqual(str(cm.exception), 2744 'There may be at most 1 To headers in a message') 2745 2746 2747# Test the NonMultipart class 2748class TestNonMultipart(TestEmailBase): 2749 def test_nonmultipart_is_not_multipart(self): 2750 msg = MIMENonMultipart('text', 'plain') 2751 self.assertFalse(msg.is_multipart()) 2752 2753 def test_attach_raises_exception(self): 2754 msg = Message() 2755 msg['Subject'] = 'subpart 1' 2756 r = MIMENonMultipart('text', 'plain') 2757 self.assertRaises(errors.MultipartConversionError, r.attach, msg) 2758 2759 2760# A general test of parser->model->generator idempotency. IOW, read a message 2761# in, parse it into a message object tree, then without touching the tree, 2762# regenerate the plain text. The original text and the transformed text 2763# should be identical. Note: that we ignore the Unix-From since that may 2764# contain a changed date. 2765class TestIdempotent(TestEmailBase): 2766 2767 linesep = '\n' 2768 2769 def _msgobj(self, filename): 2770 with openfile(filename, encoding="utf-8") as fp: 2771 data = fp.read() 2772 msg = email.message_from_string(data) 2773 return msg, data 2774 2775 def _idempotent(self, msg, text, unixfrom=False): 2776 eq = self.ndiffAssertEqual 2777 s = StringIO() 2778 g = Generator(s, maxheaderlen=0) 2779 g.flatten(msg, unixfrom=unixfrom) 2780 eq(text, s.getvalue()) 2781 2782 def test_parse_text_message(self): 2783 eq = self.assertEqual 2784 msg, text = self._msgobj('msg_01.txt') 2785 eq(msg.get_content_type(), 'text/plain') 2786 eq(msg.get_content_maintype(), 'text') 2787 eq(msg.get_content_subtype(), 'plain') 2788 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2789 eq(msg.get_param('charset'), 'us-ascii') 2790 eq(msg.preamble, None) 2791 eq(msg.epilogue, None) 2792 self._idempotent(msg, text) 2793 2794 def test_parse_untyped_message(self): 2795 eq = self.assertEqual 2796 msg, text = self._msgobj('msg_03.txt') 2797 eq(msg.get_content_type(), 'text/plain') 2798 eq(msg.get_params(), None) 2799 eq(msg.get_param('charset'), None) 2800 self._idempotent(msg, text) 2801 2802 def test_simple_multipart(self): 2803 msg, text = self._msgobj('msg_04.txt') 2804 self._idempotent(msg, text) 2805 2806 def test_MIME_digest(self): 2807 msg, text = self._msgobj('msg_02.txt') 2808 self._idempotent(msg, text) 2809 2810 def test_long_header(self): 2811 msg, text = self._msgobj('msg_27.txt') 2812 self._idempotent(msg, text) 2813 2814 def test_MIME_digest_with_part_headers(self): 2815 msg, text = self._msgobj('msg_28.txt') 2816 self._idempotent(msg, text) 2817 2818 def test_mixed_with_image(self): 2819 msg, text = self._msgobj('msg_06.txt') 2820 self._idempotent(msg, text) 2821 2822 def test_multipart_report(self): 2823 msg, text = self._msgobj('msg_05.txt') 2824 self._idempotent(msg, text) 2825 2826 def test_dsn(self): 2827 msg, text = self._msgobj('msg_16.txt') 2828 self._idempotent(msg, text) 2829 2830 def test_preamble_epilogue(self): 2831 msg, text = self._msgobj('msg_21.txt') 2832 self._idempotent(msg, text) 2833 2834 def test_multipart_one_part(self): 2835 msg, text = self._msgobj('msg_23.txt') 2836 self._idempotent(msg, text) 2837 2838 def test_multipart_no_parts(self): 2839 msg, text = self._msgobj('msg_24.txt') 2840 self._idempotent(msg, text) 2841 2842 def test_no_start_boundary(self): 2843 msg, text = self._msgobj('msg_31.txt') 2844 self._idempotent(msg, text) 2845 2846 def test_rfc2231_charset(self): 2847 msg, text = self._msgobj('msg_32.txt') 2848 self._idempotent(msg, text) 2849 2850 def test_more_rfc2231_parameters(self): 2851 msg, text = self._msgobj('msg_33.txt') 2852 self._idempotent(msg, text) 2853 2854 def test_text_plain_in_a_multipart_digest(self): 2855 msg, text = self._msgobj('msg_34.txt') 2856 self._idempotent(msg, text) 2857 2858 def test_nested_multipart_mixeds(self): 2859 msg, text = self._msgobj('msg_12a.txt') 2860 self._idempotent(msg, text) 2861 2862 def test_message_external_body_idempotent(self): 2863 msg, text = self._msgobj('msg_36.txt') 2864 self._idempotent(msg, text) 2865 2866 def test_message_delivery_status(self): 2867 msg, text = self._msgobj('msg_43.txt') 2868 self._idempotent(msg, text, unixfrom=True) 2869 2870 def test_message_signed_idempotent(self): 2871 msg, text = self._msgobj('msg_45.txt') 2872 self._idempotent(msg, text) 2873 2874 def test_content_type(self): 2875 eq = self.assertEqual 2876 # Get a message object and reset the seek pointer for other tests 2877 msg, text = self._msgobj('msg_05.txt') 2878 eq(msg.get_content_type(), 'multipart/report') 2879 # Test the Content-Type: parameters 2880 params = {} 2881 for pk, pv in msg.get_params(): 2882 params[pk] = pv 2883 eq(params['report-type'], 'delivery-status') 2884 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2885 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2886 eq(msg.epilogue, self.linesep) 2887 eq(len(msg.get_payload()), 3) 2888 # Make sure the subparts are what we expect 2889 msg1 = msg.get_payload(0) 2890 eq(msg1.get_content_type(), 'text/plain') 2891 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2892 msg2 = msg.get_payload(1) 2893 eq(msg2.get_content_type(), 'text/plain') 2894 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2895 msg3 = msg.get_payload(2) 2896 eq(msg3.get_content_type(), 'message/rfc822') 2897 self.assertIsInstance(msg3, Message) 2898 payload = msg3.get_payload() 2899 self.assertIsInstance(payload, list) 2900 eq(len(payload), 1) 2901 msg4 = payload[0] 2902 self.assertIsInstance(msg4, Message) 2903 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2904 2905 def test_parser(self): 2906 eq = self.assertEqual 2907 msg, text = self._msgobj('msg_06.txt') 2908 # Check some of the outer headers 2909 eq(msg.get_content_type(), 'message/rfc822') 2910 # Make sure the payload is a list of exactly one sub-Message, and that 2911 # that submessage has a type of text/plain 2912 payload = msg.get_payload() 2913 self.assertIsInstance(payload, list) 2914 eq(len(payload), 1) 2915 msg1 = payload[0] 2916 self.assertIsInstance(msg1, Message) 2917 eq(msg1.get_content_type(), 'text/plain') 2918 self.assertIsInstance(msg1.get_payload(), str) 2919 eq(msg1.get_payload(), self.linesep) 2920 2921 2922 2923# Test various other bits of the package's functionality 2924class TestMiscellaneous(TestEmailBase): 2925 def test_message_from_string(self): 2926 with openfile('msg_01.txt', encoding="utf-8") as fp: 2927 text = fp.read() 2928 msg = email.message_from_string(text) 2929 s = StringIO() 2930 # Don't wrap/continue long headers since we're trying to test 2931 # idempotency. 2932 g = Generator(s, maxheaderlen=0) 2933 g.flatten(msg) 2934 self.assertEqual(text, s.getvalue()) 2935 2936 def test_message_from_file(self): 2937 with openfile('msg_01.txt', encoding="utf-8") as fp: 2938 text = fp.read() 2939 fp.seek(0) 2940 msg = email.message_from_file(fp) 2941 s = StringIO() 2942 # Don't wrap/continue long headers since we're trying to test 2943 # idempotency. 2944 g = Generator(s, maxheaderlen=0) 2945 g.flatten(msg) 2946 self.assertEqual(text, s.getvalue()) 2947 2948 def test_message_from_string_with_class(self): 2949 with openfile('msg_01.txt', encoding="utf-8") as fp: 2950 text = fp.read() 2951 2952 # Create a subclass 2953 class MyMessage(Message): 2954 pass 2955 2956 msg = email.message_from_string(text, MyMessage) 2957 self.assertIsInstance(msg, MyMessage) 2958 # Try something more complicated 2959 with openfile('msg_02.txt', encoding="utf-8") as fp: 2960 text = fp.read() 2961 msg = email.message_from_string(text, MyMessage) 2962 for subpart in msg.walk(): 2963 self.assertIsInstance(subpart, MyMessage) 2964 2965 def test_message_from_file_with_class(self): 2966 # Create a subclass 2967 class MyMessage(Message): 2968 pass 2969 2970 with openfile('msg_01.txt', encoding="utf-8") as fp: 2971 msg = email.message_from_file(fp, MyMessage) 2972 self.assertIsInstance(msg, MyMessage) 2973 # Try something more complicated 2974 with openfile('msg_02.txt', encoding="utf-8") as fp: 2975 msg = email.message_from_file(fp, MyMessage) 2976 for subpart in msg.walk(): 2977 self.assertIsInstance(subpart, MyMessage) 2978 2979 def test_custom_message_does_not_require_arguments(self): 2980 class MyMessage(Message): 2981 def __init__(self): 2982 super().__init__() 2983 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 2984 self.assertIsInstance(msg, MyMessage) 2985 2986 def test__all__(self): 2987 module = __import__('email') 2988 self.assertEqual(sorted(module.__all__), [ 2989 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 2990 'generator', 'header', 'iterators', 'message', 2991 'message_from_binary_file', 'message_from_bytes', 2992 'message_from_file', 'message_from_string', 'mime', 'parser', 2993 'quoprimime', 'utils', 2994 ]) 2995 2996 def test_formatdate(self): 2997 now = time.time() 2998 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 2999 time.gmtime(now)[:6]) 3000 3001 def test_formatdate_localtime(self): 3002 now = time.time() 3003 self.assertEqual( 3004 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 3005 time.localtime(now)[:6]) 3006 3007 def test_formatdate_usegmt(self): 3008 now = time.time() 3009 self.assertEqual( 3010 utils.formatdate(now, localtime=False), 3011 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 3012 self.assertEqual( 3013 utils.formatdate(now, localtime=False, usegmt=True), 3014 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 3015 3016 # parsedate and parsedate_tz will become deprecated interfaces someday 3017 def test_parsedate_returns_None_for_invalid_strings(self): 3018 self.assertIsNone(utils.parsedate('')) 3019 self.assertIsNone(utils.parsedate_tz('')) 3020 self.assertIsNone(utils.parsedate(' ')) 3021 self.assertIsNone(utils.parsedate_tz(' ')) 3022 self.assertIsNone(utils.parsedate('0')) 3023 self.assertIsNone(utils.parsedate_tz('0')) 3024 self.assertIsNone(utils.parsedate('A Complete Waste of Time')) 3025 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time')) 3026 self.assertIsNone(utils.parsedate_tz('Wed, 3 Apr 2002 12.34.56.78+0800')) 3027 # Not a part of the spec but, but this has historically worked: 3028 self.assertIsNone(utils.parsedate(None)) 3029 self.assertIsNone(utils.parsedate_tz(None)) 3030 3031 def test_parsedate_compact(self): 3032 # The FWS after the comma is optional 3033 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), 3034 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) 3035 3036 def test_parsedate_no_dayofweek(self): 3037 eq = self.assertEqual 3038 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), 3039 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) 3040 3041 def test_parsedate_compact_no_dayofweek(self): 3042 eq = self.assertEqual 3043 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 3044 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3045 3046 def test_parsedate_no_space_before_positive_offset(self): 3047 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 3048 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3049 3050 def test_parsedate_no_space_before_negative_offset(self): 3051 # Issue 1155362: we already handled '+' for this case. 3052 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3053 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3054 3055 3056 def test_parsedate_accepts_time_with_dots(self): 3057 eq = self.assertEqual 3058 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3059 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3060 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3061 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3062 3063 def test_parsedate_acceptable_to_time_functions(self): 3064 eq = self.assertEqual 3065 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3066 t = int(time.mktime(timetup)) 3067 eq(time.localtime(t)[:6], timetup[:6]) 3068 eq(int(time.strftime('%Y', timetup)), 2003) 3069 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3070 t = int(time.mktime(timetup[:9])) 3071 eq(time.localtime(t)[:6], timetup[:6]) 3072 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3073 3074 def test_mktime_tz(self): 3075 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3076 -1, -1, -1, 0)), 0) 3077 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3078 -1, -1, -1, 1234)), -1234) 3079 3080 def test_parsedate_y2k(self): 3081 """Test for parsing a date with a two-digit year. 3082 3083 Parsing a date with a two-digit year should return the correct 3084 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3085 obsoletes RFC822) requires four-digit years. 3086 3087 """ 3088 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3089 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3090 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3091 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3092 3093 def test_parseaddr_empty(self): 3094 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3095 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3096 3097 def test_parseaddr_multiple_domains(self): 3098 self.assertEqual( 3099 utils.parseaddr('a@b@c'), 3100 ('', '') 3101 ) 3102 self.assertEqual( 3103 utils.parseaddr('a@b.c@c'), 3104 ('', '') 3105 ) 3106 self.assertEqual( 3107 utils.parseaddr('a@172.17.0.1@c'), 3108 ('', '') 3109 ) 3110 3111 def test_noquote_dump(self): 3112 self.assertEqual( 3113 utils.formataddr(('A Silly Person', 'person@dom.ain')), 3114 'A Silly Person <person@dom.ain>') 3115 3116 def test_escape_dump(self): 3117 self.assertEqual( 3118 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), 3119 r'"A (Very) Silly Person" <person@dom.ain>') 3120 self.assertEqual( 3121 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'), 3122 ('A (Very) Silly Person', 'person@dom.ain')) 3123 a = r'A \(Special\) Person' 3124 b = 'person@dom.ain' 3125 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3126 3127 def test_escape_backslashes(self): 3128 self.assertEqual( 3129 utils.formataddr((r'Arthur \Backslash\ Foobar', 'person@dom.ain')), 3130 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>') 3131 a = r'Arthur \Backslash\ Foobar' 3132 b = 'person@dom.ain' 3133 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3134 3135 def test_quotes_unicode_names(self): 3136 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3137 name = "H\u00e4ns W\u00fcrst" 3138 addr = 'person@dom.ain' 3139 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3140 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>" 3141 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3142 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3143 latin1_quopri) 3144 3145 def test_accepts_any_charset_like_object(self): 3146 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3147 name = "H\u00e4ns W\u00fcrst" 3148 addr = 'person@dom.ain' 3149 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>" 3150 foobar = "FOOBAR" 3151 class CharsetMock: 3152 def header_encode(self, string): 3153 return foobar 3154 mock = CharsetMock() 3155 mock_expected = "%s <%s>" % (foobar, addr) 3156 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3157 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3158 utf8_base64) 3159 3160 def test_invalid_charset_like_object_raises_error(self): 3161 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3162 name = "H\u00e4ns W\u00fcrst" 3163 addr = 'person@dom.ain' 3164 # An object without a header_encode method: 3165 bad_charset = object() 3166 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3167 bad_charset) 3168 3169 def test_unicode_address_raises_error(self): 3170 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3171 addr = 'pers\u00f6n@dom.in' 3172 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3173 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3174 3175 def test_name_with_dot(self): 3176 x = 'John X. Doe <jxd@example.com>' 3177 y = '"John X. Doe" <jxd@example.com>' 3178 a, b = ('John X. Doe', 'jxd@example.com') 3179 self.assertEqual(utils.parseaddr(x), (a, b)) 3180 self.assertEqual(utils.parseaddr(y), (a, b)) 3181 # formataddr() quotes the name if there's a dot in it 3182 self.assertEqual(utils.formataddr((a, b)), y) 3183 3184 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3185 # issue 10005. Note that in the third test the second pair of 3186 # backslashes is not actually a quoted pair because it is not inside a 3187 # comment or quoted string: the address being parsed has a quoted 3188 # string containing a quoted backslash, followed by 'example' and two 3189 # backslashes, followed by another quoted string containing a space and 3190 # the word 'example'. parseaddr copies those two backslashes 3191 # literally. Per rfc5322 this is not technically correct since a \ may 3192 # not appear in an address outside of a quoted string. It is probably 3193 # a sensible Postel interpretation, though. 3194 eq = self.assertEqual 3195 eq(utils.parseaddr('""example" example"@example.com'), 3196 ('', '""example" example"@example.com')) 3197 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3198 ('', '"\\"example\\" example"@example.com')) 3199 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3200 ('', '"\\\\"example\\\\" example"@example.com')) 3201 3202 def test_parseaddr_preserves_spaces_in_local_part(self): 3203 # issue 9286. A normal RFC5322 local part should not contain any 3204 # folding white space, but legacy local parts can (they are a sequence 3205 # of atoms, not dotatoms). On the other hand we strip whitespace from 3206 # before the @ and around dots, on the assumption that the whitespace 3207 # around the punctuation is a mistake in what would otherwise be 3208 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3209 self.assertEqual(('', "merwok wok@xample.com"), 3210 utils.parseaddr("merwok wok@xample.com")) 3211 self.assertEqual(('', "merwok wok@xample.com"), 3212 utils.parseaddr("merwok wok@xample.com")) 3213 self.assertEqual(('', "merwok wok@xample.com"), 3214 utils.parseaddr(" merwok wok @xample.com")) 3215 self.assertEqual(('', 'merwok"wok" wok@xample.com'), 3216 utils.parseaddr('merwok"wok" wok@xample.com')) 3217 self.assertEqual(('', 'merwok.wok.wok@xample.com'), 3218 utils.parseaddr('merwok. wok . wok@xample.com')) 3219 3220 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3221 addr = ("'foo@example.com' (foo@example.com)", 3222 'foo@example.com') 3223 addrstr = ('"\'foo@example.com\' ' 3224 '(foo@example.com)" <foo@example.com>') 3225 self.assertEqual(utils.parseaddr(addrstr), addr) 3226 self.assertEqual(utils.formataddr(addr), addrstr) 3227 3228 3229 def test_multiline_from_comment(self): 3230 x = """\ 3231Foo 3232\tBar <foo@example.com>""" 3233 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) 3234 3235 def test_quote_dump(self): 3236 self.assertEqual( 3237 utils.formataddr(('A Silly; Person', 'person@dom.ain')), 3238 r'"A Silly; Person" <person@dom.ain>') 3239 3240 def test_charset_richcomparisons(self): 3241 eq = self.assertEqual 3242 ne = self.assertNotEqual 3243 cset1 = Charset() 3244 cset2 = Charset() 3245 eq(cset1, 'us-ascii') 3246 eq(cset1, 'US-ASCII') 3247 eq(cset1, 'Us-AsCiI') 3248 eq('us-ascii', cset1) 3249 eq('US-ASCII', cset1) 3250 eq('Us-AsCiI', cset1) 3251 ne(cset1, 'usascii') 3252 ne(cset1, 'USASCII') 3253 ne(cset1, 'UsAsCiI') 3254 ne('usascii', cset1) 3255 ne('USASCII', cset1) 3256 ne('UsAsCiI', cset1) 3257 eq(cset1, cset2) 3258 eq(cset2, cset1) 3259 3260 def test_getaddresses(self): 3261 eq = self.assertEqual 3262 eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 3263 'Bud Person <bperson@dom.ain>']), 3264 [('Al Person', 'aperson@dom.ain'), 3265 ('Bud Person', 'bperson@dom.ain')]) 3266 3267 def test_getaddresses_nasty(self): 3268 eq = self.assertEqual 3269 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3270 eq(utils.getaddresses( 3271 ['[]*-- =~$']), 3272 [('', ''), ('', ''), ('', '*--')]) 3273 eq(utils.getaddresses( 3274 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), 3275 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) 3276 3277 def test_getaddresses_embedded_comment(self): 3278 """Test proper handling of a nested comment""" 3279 eq = self.assertEqual 3280 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>']) 3281 eq(addrs[0][1], 'foo@bar.com') 3282 3283 def test_getaddresses_header_obj(self): 3284 """Test the handling of a Header object.""" 3285 addrs = utils.getaddresses([Header('Al Person <aperson@dom.ain>')]) 3286 self.assertEqual(addrs[0][1], 'aperson@dom.ain') 3287 3288 def test_make_msgid_collisions(self): 3289 # Test make_msgid uniqueness, even with multiple threads 3290 class MsgidsThread(Thread): 3291 def run(self): 3292 # generate msgids for 3 seconds 3293 self.msgids = [] 3294 append = self.msgids.append 3295 make_msgid = utils.make_msgid 3296 clock = time.monotonic 3297 tfin = clock() + 3.0 3298 while clock() < tfin: 3299 append(make_msgid(domain='testdomain-string')) 3300 3301 threads = [MsgidsThread() for i in range(5)] 3302 with threading_helper.start_threads(threads): 3303 pass 3304 all_ids = sum([t.msgids for t in threads], []) 3305 self.assertEqual(len(set(all_ids)), len(all_ids)) 3306 3307 def test_utils_quote_unquote(self): 3308 eq = self.assertEqual 3309 msg = Message() 3310 msg.add_header('content-disposition', 'attachment', 3311 filename='foo\\wacky"name') 3312 eq(msg.get_filename(), 'foo\\wacky"name') 3313 3314 def test_get_body_encoding_with_bogus_charset(self): 3315 charset = Charset('not a charset') 3316 self.assertEqual(charset.get_body_encoding(), 'base64') 3317 3318 def test_get_body_encoding_with_uppercase_charset(self): 3319 eq = self.assertEqual 3320 msg = Message() 3321 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3322 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3323 charsets = msg.get_charsets() 3324 eq(len(charsets), 1) 3325 eq(charsets[0], 'utf-8') 3326 charset = Charset(charsets[0]) 3327 eq(charset.get_body_encoding(), 'base64') 3328 msg.set_payload(b'hello world', charset=charset) 3329 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3330 eq(msg.get_payload(decode=True), b'hello world') 3331 eq(msg['content-transfer-encoding'], 'base64') 3332 # Try another one 3333 msg = Message() 3334 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3335 charsets = msg.get_charsets() 3336 eq(len(charsets), 1) 3337 eq(charsets[0], 'us-ascii') 3338 charset = Charset(charsets[0]) 3339 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3340 msg.set_payload('hello world', charset=charset) 3341 eq(msg.get_payload(), 'hello world') 3342 eq(msg['content-transfer-encoding'], '7bit') 3343 3344 def test_charsets_case_insensitive(self): 3345 lc = Charset('us-ascii') 3346 uc = Charset('US-ASCII') 3347 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3348 3349 def test_partial_falls_inside_message_delivery_status(self): 3350 eq = self.ndiffAssertEqual 3351 # The Parser interface provides chunks of data to FeedParser in 8192 3352 # byte gulps. SF bug #1076485 found one of those chunks inside 3353 # message/delivery-status header block, which triggered an 3354 # unreadline() of NeedMoreData. 3355 msg = self._msgobj('msg_43.txt') 3356 sfp = StringIO() 3357 iterators._structure(msg, sfp) 3358 eq(sfp.getvalue(), """\ 3359multipart/report 3360 text/plain 3361 message/delivery-status 3362 text/plain 3363 text/plain 3364 text/plain 3365 text/plain 3366 text/plain 3367 text/plain 3368 text/plain 3369 text/plain 3370 text/plain 3371 text/plain 3372 text/plain 3373 text/plain 3374 text/plain 3375 text/plain 3376 text/plain 3377 text/plain 3378 text/plain 3379 text/plain 3380 text/plain 3381 text/plain 3382 text/plain 3383 text/plain 3384 text/plain 3385 text/plain 3386 text/plain 3387 text/plain 3388 text/rfc822-headers 3389""") 3390 3391 def test_make_msgid_domain(self): 3392 self.assertEqual( 3393 email.utils.make_msgid(domain='testdomain-string')[-19:], 3394 '@testdomain-string>') 3395 3396 def test_make_msgid_idstring(self): 3397 self.assertEqual( 3398 email.utils.make_msgid(idstring='test-idstring', 3399 domain='testdomain-string')[-33:], 3400 '.test-idstring@testdomain-string>') 3401 3402 def test_make_msgid_default_domain(self): 3403 with patch('socket.getfqdn') as mock_getfqdn: 3404 mock_getfqdn.return_value = domain = 'pythontest.example.com' 3405 self.assertTrue( 3406 email.utils.make_msgid().endswith( 3407 '@' + domain + '>')) 3408 3409 def test_Generator_linend(self): 3410 # Issue 14645. 3411 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f: 3412 msgtxt = f.read() 3413 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3414 msg = email.message_from_string(msgtxt) 3415 s = StringIO() 3416 g = email.generator.Generator(s) 3417 g.flatten(msg) 3418 self.assertEqual(s.getvalue(), msgtxt_nl) 3419 3420 def test_BytesGenerator_linend(self): 3421 # Issue 14645. 3422 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f: 3423 msgtxt = f.read() 3424 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3425 msg = email.message_from_string(msgtxt_nl) 3426 s = BytesIO() 3427 g = email.generator.BytesGenerator(s) 3428 g.flatten(msg, linesep='\r\n') 3429 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3430 3431 def test_BytesGenerator_linend_with_non_ascii(self): 3432 # Issue 14645. 3433 with openfile('msg_26.txt', 'rb') as f: 3434 msgtxt = f.read() 3435 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3436 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3437 msg = email.message_from_bytes(msgtxt_nl) 3438 s = BytesIO() 3439 g = email.generator.BytesGenerator(s) 3440 g.flatten(msg, linesep='\r\n') 3441 self.assertEqual(s.getvalue(), msgtxt) 3442 3443 def test_mime_classes_policy_argument(self): 3444 with openfile('audiotest.au', 'rb') as fp: 3445 audiodata = fp.read() 3446 with openfile('PyBanner048.gif', 'rb') as fp: 3447 bindata = fp.read() 3448 classes = [ 3449 (MIMEApplication, ('',)), 3450 (MIMEAudio, (audiodata,)), 3451 (MIMEImage, (bindata,)), 3452 (MIMEMessage, (Message(),)), 3453 (MIMENonMultipart, ('multipart', 'mixed')), 3454 (MIMEText, ('',)), 3455 ] 3456 for cls, constructor in classes: 3457 with self.subTest(cls=cls.__name__, policy='compat32'): 3458 m = cls(*constructor) 3459 self.assertIs(m.policy, email.policy.compat32) 3460 with self.subTest(cls=cls.__name__, policy='default'): 3461 m = cls(*constructor, policy=email.policy.default) 3462 self.assertIs(m.policy, email.policy.default) 3463 3464 3465# Test the iterator/generators 3466class TestIterators(TestEmailBase): 3467 def test_body_line_iterator(self): 3468 eq = self.assertEqual 3469 neq = self.ndiffAssertEqual 3470 # First a simple non-multipart message 3471 msg = self._msgobj('msg_01.txt') 3472 it = iterators.body_line_iterator(msg) 3473 lines = list(it) 3474 eq(len(lines), 6) 3475 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3476 # Now a more complicated multipart 3477 msg = self._msgobj('msg_02.txt') 3478 it = iterators.body_line_iterator(msg) 3479 lines = list(it) 3480 eq(len(lines), 43) 3481 with openfile('msg_19.txt', encoding="utf-8") as fp: 3482 neq(EMPTYSTRING.join(lines), fp.read()) 3483 3484 def test_typed_subpart_iterator(self): 3485 eq = self.assertEqual 3486 msg = self._msgobj('msg_04.txt') 3487 it = iterators.typed_subpart_iterator(msg, 'text') 3488 lines = [] 3489 subparts = 0 3490 for subpart in it: 3491 subparts += 1 3492 lines.append(subpart.get_payload()) 3493 eq(subparts, 2) 3494 eq(EMPTYSTRING.join(lines), """\ 3495a simple kind of mirror 3496to reflect upon our own 3497a simple kind of mirror 3498to reflect upon our own 3499""") 3500 3501 def test_typed_subpart_iterator_default_type(self): 3502 eq = self.assertEqual 3503 msg = self._msgobj('msg_03.txt') 3504 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3505 lines = [] 3506 subparts = 0 3507 for subpart in it: 3508 subparts += 1 3509 lines.append(subpart.get_payload()) 3510 eq(subparts, 1) 3511 eq(EMPTYSTRING.join(lines), """\ 3512 3513Hi, 3514 3515Do you like this message? 3516 3517-Me 3518""") 3519 3520 def test_pushCR_LF(self): 3521 '''FeedParser BufferedSubFile.push() assumed it received complete 3522 line endings. A CR ending one push() followed by a LF starting 3523 the next push() added an empty line. 3524 ''' 3525 imt = [ 3526 ("a\r \n", 2), 3527 ("b", 0), 3528 ("c\n", 1), 3529 ("", 0), 3530 ("d\r\n", 1), 3531 ("e\r", 0), 3532 ("\nf", 1), 3533 ("\r\n", 1), 3534 ] 3535 from email.feedparser import BufferedSubFile, NeedMoreData 3536 bsf = BufferedSubFile() 3537 om = [] 3538 nt = 0 3539 for il, n in imt: 3540 bsf.push(il) 3541 nt += n 3542 n1 = 0 3543 for ol in iter(bsf.readline, NeedMoreData): 3544 om.append(ol) 3545 n1 += 1 3546 self.assertEqual(n, n1) 3547 self.assertEqual(len(om), nt) 3548 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3549 3550 def test_push_random(self): 3551 from email.feedparser import BufferedSubFile, NeedMoreData 3552 3553 n = 10000 3554 chunksize = 5 3555 chars = 'abcd \t\r\n' 3556 3557 s = ''.join(choice(chars) for i in range(n)) + '\n' 3558 target = s.splitlines(True) 3559 3560 bsf = BufferedSubFile() 3561 lines = [] 3562 for i in range(0, len(s), chunksize): 3563 chunk = s[i:i+chunksize] 3564 bsf.push(chunk) 3565 lines.extend(iter(bsf.readline, NeedMoreData)) 3566 self.assertEqual(lines, target) 3567 3568 3569class TestFeedParsers(TestEmailBase): 3570 3571 def parse(self, chunks): 3572 feedparser = FeedParser() 3573 for chunk in chunks: 3574 feedparser.feed(chunk) 3575 return feedparser.close() 3576 3577 def test_empty_header_name_handled(self): 3578 # Issue 19996 3579 msg = self.parse("First: val\n: bad\nSecond: val") 3580 self.assertEqual(msg['First'], 'val') 3581 self.assertEqual(msg['Second'], 'val') 3582 3583 def test_newlines(self): 3584 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3585 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3586 m = self.parse(['a:\nb:\rc:\r\nd:']) 3587 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3588 m = self.parse(['a:\rb', 'c:\n']) 3589 self.assertEqual(m.keys(), ['a', 'bc']) 3590 m = self.parse(['a:\r', 'b:\n']) 3591 self.assertEqual(m.keys(), ['a', 'b']) 3592 m = self.parse(['a:\r', '\nb:\n']) 3593 self.assertEqual(m.keys(), ['a', 'b']) 3594 3595 # Only CR and LF should break header fields 3596 m = self.parse(['a:\x85b:\u2028c:\n']) 3597 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3598 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3599 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3600 3601 def test_long_lines(self): 3602 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3603 M, N = 1000, 20000 3604 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3605 self.assertEqual(m.items(), [('a', 'b')]) 3606 self.assertEqual(m.get_payload(), 'x'*M*N) 3607 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3608 self.assertEqual(m.items(), [('a', 'b')]) 3609 self.assertEqual(m.get_payload(), 'x'*M*N) 3610 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3611 self.assertEqual(m.items(), [('a', 'b')]) 3612 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3613 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3614 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3615 3616 3617class TestParsers(TestEmailBase): 3618 3619 def test_header_parser(self): 3620 eq = self.assertEqual 3621 # Parse only the headers of a complex multipart MIME document 3622 with openfile('msg_02.txt', encoding="utf-8") as fp: 3623 msg = HeaderParser().parse(fp) 3624 eq(msg['from'], 'ppp-request@zzz.org') 3625 eq(msg['to'], 'ppp@zzz.org') 3626 eq(msg.get_content_type(), 'multipart/mixed') 3627 self.assertFalse(msg.is_multipart()) 3628 self.assertIsInstance(msg.get_payload(), str) 3629 3630 def test_bytes_header_parser(self): 3631 eq = self.assertEqual 3632 # Parse only the headers of a complex multipart MIME document 3633 with openfile('msg_02.txt', 'rb') as fp: 3634 msg = email.parser.BytesHeaderParser().parse(fp) 3635 eq(msg['from'], 'ppp-request@zzz.org') 3636 eq(msg['to'], 'ppp@zzz.org') 3637 eq(msg.get_content_type(), 'multipart/mixed') 3638 self.assertFalse(msg.is_multipart()) 3639 self.assertIsInstance(msg.get_payload(), str) 3640 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3641 3642 def test_bytes_parser_does_not_close_file(self): 3643 with openfile('msg_02.txt', 'rb') as fp: 3644 email.parser.BytesParser().parse(fp) 3645 self.assertFalse(fp.closed) 3646 3647 def test_bytes_parser_on_exception_does_not_close_file(self): 3648 with openfile('msg_15.txt', 'rb') as fp: 3649 bytesParser = email.parser.BytesParser 3650 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3651 bytesParser(policy=email.policy.strict).parse, 3652 fp) 3653 self.assertFalse(fp.closed) 3654 3655 def test_parser_does_not_close_file(self): 3656 with openfile('msg_02.txt', encoding="utf-8") as fp: 3657 email.parser.Parser().parse(fp) 3658 self.assertFalse(fp.closed) 3659 3660 def test_parser_on_exception_does_not_close_file(self): 3661 with openfile('msg_15.txt', encoding="utf-8") as fp: 3662 parser = email.parser.Parser 3663 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3664 parser(policy=email.policy.strict).parse, fp) 3665 self.assertFalse(fp.closed) 3666 3667 def test_whitespace_continuation(self): 3668 eq = self.assertEqual 3669 # This message contains a line after the Subject: header that has only 3670 # whitespace, but it is not empty! 3671 msg = email.message_from_string("""\ 3672From: aperson@dom.ain 3673To: bperson@dom.ain 3674Subject: the next line has a space on it 3675\x20 3676Date: Mon, 8 Apr 2002 15:09:19 -0400 3677Message-ID: spam 3678 3679Here's the message body 3680""") 3681 eq(msg['subject'], 'the next line has a space on it\n ') 3682 eq(msg['message-id'], 'spam') 3683 eq(msg.get_payload(), "Here's the message body\n") 3684 3685 def test_whitespace_continuation_last_header(self): 3686 eq = self.assertEqual 3687 # Like the previous test, but the subject line is the last 3688 # header. 3689 msg = email.message_from_string("""\ 3690From: aperson@dom.ain 3691To: bperson@dom.ain 3692Date: Mon, 8 Apr 2002 15:09:19 -0400 3693Message-ID: spam 3694Subject: the next line has a space on it 3695\x20 3696 3697Here's the message body 3698""") 3699 eq(msg['subject'], 'the next line has a space on it\n ') 3700 eq(msg['message-id'], 'spam') 3701 eq(msg.get_payload(), "Here's the message body\n") 3702 3703 def test_crlf_separation(self): 3704 eq = self.assertEqual 3705 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp: 3706 msg = Parser().parse(fp) 3707 eq(len(msg.get_payload()), 2) 3708 part1 = msg.get_payload(0) 3709 eq(part1.get_content_type(), 'text/plain') 3710 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3711 part2 = msg.get_payload(1) 3712 eq(part2.get_content_type(), 'application/riscos') 3713 3714 def test_crlf_flatten(self): 3715 # Using newline='\n' preserves the crlfs in this input file. 3716 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp: 3717 text = fp.read() 3718 msg = email.message_from_string(text) 3719 s = StringIO() 3720 g = Generator(s) 3721 g.flatten(msg, linesep='\r\n') 3722 self.assertEqual(s.getvalue(), text) 3723 3724 maxDiff = None 3725 3726 def test_multipart_digest_with_extra_mime_headers(self): 3727 eq = self.assertEqual 3728 neq = self.ndiffAssertEqual 3729 with openfile('msg_28.txt', encoding="utf-8") as fp: 3730 msg = email.message_from_file(fp) 3731 # Structure is: 3732 # multipart/digest 3733 # message/rfc822 3734 # text/plain 3735 # message/rfc822 3736 # text/plain 3737 eq(msg.is_multipart(), 1) 3738 eq(len(msg.get_payload()), 2) 3739 part1 = msg.get_payload(0) 3740 eq(part1.get_content_type(), 'message/rfc822') 3741 eq(part1.is_multipart(), 1) 3742 eq(len(part1.get_payload()), 1) 3743 part1a = part1.get_payload(0) 3744 eq(part1a.is_multipart(), 0) 3745 eq(part1a.get_content_type(), 'text/plain') 3746 neq(part1a.get_payload(), 'message 1\n') 3747 # next message/rfc822 3748 part2 = msg.get_payload(1) 3749 eq(part2.get_content_type(), 'message/rfc822') 3750 eq(part2.is_multipart(), 1) 3751 eq(len(part2.get_payload()), 1) 3752 part2a = part2.get_payload(0) 3753 eq(part2a.is_multipart(), 0) 3754 eq(part2a.get_content_type(), 'text/plain') 3755 neq(part2a.get_payload(), 'message 2\n') 3756 3757 def test_three_lines(self): 3758 # A bug report by Andrew McNamara 3759 lines = ['From: Andrew Person <aperson@dom.ain', 3760 'Subject: Test', 3761 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3762 msg = email.message_from_string(NL.join(lines)) 3763 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3764 3765 def test_strip_line_feed_and_carriage_return_in_headers(self): 3766 eq = self.assertEqual 3767 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3768 value1 = 'text' 3769 value2 = 'more text' 3770 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3771 value1, value2) 3772 msg = email.message_from_string(m) 3773 eq(msg.get('Header'), value1) 3774 eq(msg.get('Next-Header'), value2) 3775 3776 def test_rfc2822_header_syntax(self): 3777 eq = self.assertEqual 3778 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3779 msg = email.message_from_string(m) 3780 eq(len(msg), 3) 3781 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3782 eq(msg.get_payload(), 'body') 3783 3784 def test_rfc2822_space_not_allowed_in_header(self): 3785 eq = self.assertEqual 3786 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3787 msg = email.message_from_string(m) 3788 eq(len(msg.keys()), 0) 3789 3790 def test_rfc2822_one_character_header(self): 3791 eq = self.assertEqual 3792 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3793 msg = email.message_from_string(m) 3794 headers = msg.keys() 3795 headers.sort() 3796 eq(headers, ['A', 'B', 'CC']) 3797 eq(msg.get_payload(), 'body') 3798 3799 def test_CRLFLF_at_end_of_part(self): 3800 # issue 5610: feedparser should not eat two chars from body part ending 3801 # with "\r\n\n". 3802 m = ( 3803 "From: foo@bar.com\n" 3804 "To: baz\n" 3805 "Mime-Version: 1.0\n" 3806 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3807 "\n" 3808 "--BOUNDARY\n" 3809 "Content-Type: text/plain\n" 3810 "\n" 3811 "body ending with CRLF newline\r\n" 3812 "\n" 3813 "--BOUNDARY--\n" 3814 ) 3815 msg = email.message_from_string(m) 3816 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3817 3818 3819class Test8BitBytesHandling(TestEmailBase): 3820 # In Python3 all input is string, but that doesn't work if the actual input 3821 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3822 # decode byte streams using the surrogateescape error handler, and 3823 # reconvert to binary at appropriate places if we detect surrogates. This 3824 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3825 # but it does allow us to parse and preserve them, and to decode body 3826 # parts that use an 8bit CTE. 3827 3828 bodytest_msg = textwrap.dedent("""\ 3829 From: foo@bar.com 3830 To: baz 3831 Mime-Version: 1.0 3832 Content-Type: text/plain; charset={charset} 3833 Content-Transfer-Encoding: {cte} 3834 3835 {bodyline} 3836 """) 3837 3838 def test_known_8bit_CTE(self): 3839 m = self.bodytest_msg.format(charset='utf-8', 3840 cte='8bit', 3841 bodyline='pöstal').encode('utf-8') 3842 msg = email.message_from_bytes(m) 3843 self.assertEqual(msg.get_payload(), "pöstal\n") 3844 self.assertEqual(msg.get_payload(decode=True), 3845 "pöstal\n".encode('utf-8')) 3846 3847 def test_unknown_8bit_CTE(self): 3848 m = self.bodytest_msg.format(charset='notavalidcharset', 3849 cte='8bit', 3850 bodyline='pöstal').encode('utf-8') 3851 msg = email.message_from_bytes(m) 3852 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3853 self.assertEqual(msg.get_payload(decode=True), 3854 "pöstal\n".encode('utf-8')) 3855 3856 def test_8bit_in_quopri_body(self): 3857 # This is non-RFC compliant data...without 'decode' the library code 3858 # decodes the body using the charset from the headers, and because the 3859 # source byte really is utf-8 this works. This is likely to fail 3860 # against real dirty data (ie: produce mojibake), but the data is 3861 # invalid anyway so it is as good a guess as any. But this means that 3862 # this test just confirms the current behavior; that behavior is not 3863 # necessarily the best possible behavior. With 'decode' it is 3864 # returning the raw bytes, so that test should be of correct behavior, 3865 # or at least produce the same result that email4 did. 3866 m = self.bodytest_msg.format(charset='utf-8', 3867 cte='quoted-printable', 3868 bodyline='p=C3=B6stál').encode('utf-8') 3869 msg = email.message_from_bytes(m) 3870 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 3871 self.assertEqual(msg.get_payload(decode=True), 3872 'pöstál\n'.encode('utf-8')) 3873 3874 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3875 # This is similar to the previous test, but proves that if the 8bit 3876 # byte is undecodeable in the specified charset, it gets replaced 3877 # by the unicode 'unknown' character. Again, this may or may not 3878 # be the ideal behavior. Note that if decode=False none of the 3879 # decoders will get involved, so this is the only test we need 3880 # for this behavior. 3881 m = self.bodytest_msg.format(charset='ascii', 3882 cte='quoted-printable', 3883 bodyline='p=C3=B6stál').encode('utf-8') 3884 msg = email.message_from_bytes(m) 3885 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3886 self.assertEqual(msg.get_payload(decode=True), 3887 'pöstál\n'.encode('utf-8')) 3888 3889 # test_defect_handling:test_invalid_chars_in_base64_payload 3890 def test_8bit_in_base64_body(self): 3891 # If we get 8bit bytes in a base64 body, we can just ignore them 3892 # as being outside the base64 alphabet and decode anyway. But 3893 # we register a defect. 3894 m = self.bodytest_msg.format(charset='utf-8', 3895 cte='base64', 3896 bodyline='cMO2c3RhbAá=').encode('utf-8') 3897 msg = email.message_from_bytes(m) 3898 self.assertEqual(msg.get_payload(decode=True), 3899 'pöstal'.encode('utf-8')) 3900 self.assertIsInstance(msg.defects[0], 3901 errors.InvalidBase64CharactersDefect) 3902 3903 def test_8bit_in_uuencode_body(self): 3904 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3905 # normal means, so the block is returned undecoded, but as bytes. 3906 m = self.bodytest_msg.format(charset='utf-8', 3907 cte='uuencode', 3908 bodyline='<,.V<W1A; á ').encode('utf-8') 3909 msg = email.message_from_bytes(m) 3910 self.assertEqual(msg.get_payload(decode=True), 3911 '<,.V<W1A; á \n'.encode('utf-8')) 3912 3913 3914 headertest_headers = ( 3915 ('From: foo@bar.com', ('From', 'foo@bar.com')), 3916 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3917 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 3918 '\tJean de Baddie', 3919 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3920 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3921 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3922 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3923 ) 3924 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3925 '\nYes, they are flying.\n').encode('utf-8') 3926 3927 def test_get_8bit_header(self): 3928 msg = email.message_from_bytes(self.headertest_msg) 3929 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3930 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3931 3932 def test_print_8bit_headers(self): 3933 msg = email.message_from_bytes(self.headertest_msg) 3934 self.assertEqual(str(msg), 3935 textwrap.dedent("""\ 3936 From: {} 3937 To: {} 3938 Subject: {} 3939 From: {} 3940 3941 Yes, they are flying. 3942 """).format(*[expected[1] for (_, expected) in 3943 self.headertest_headers])) 3944 3945 def test_values_with_8bit_headers(self): 3946 msg = email.message_from_bytes(self.headertest_msg) 3947 self.assertListEqual([str(x) for x in msg.values()], 3948 ['foo@bar.com', 3949 'b\uFFFD\uFFFDz', 3950 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 3951 'coll\uFFFD\uFFFDgue, le pouf ' 3952 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3953 '\tJean de Baddie', 3954 "g\uFFFD\uFFFDst"]) 3955 3956 def test_items_with_8bit_headers(self): 3957 msg = email.message_from_bytes(self.headertest_msg) 3958 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 3959 [('From', 'foo@bar.com'), 3960 ('To', 'b\uFFFD\uFFFDz'), 3961 ('Subject', 'Maintenant je vous ' 3962 'pr\uFFFD\uFFFDsente ' 3963 'mon coll\uFFFD\uFFFDgue, le pouf ' 3964 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 3965 '\tJean de Baddie'), 3966 ('From', 'g\uFFFD\uFFFDst')]) 3967 3968 def test_get_all_with_8bit_headers(self): 3969 msg = email.message_from_bytes(self.headertest_msg) 3970 self.assertListEqual([str(x) for x in msg.get_all('from')], 3971 ['foo@bar.com', 3972 'g\uFFFD\uFFFDst']) 3973 3974 def test_get_content_type_with_8bit(self): 3975 msg = email.message_from_bytes(textwrap.dedent("""\ 3976 Content-Type: text/pl\xA7in; charset=utf-8 3977 """).encode('latin-1')) 3978 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 3979 self.assertEqual(msg.get_content_maintype(), "text") 3980 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 3981 3982 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 3983 def test_get_params_with_8bit(self): 3984 msg = email.message_from_bytes( 3985 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 3986 self.assertEqual(msg.get_params(header='x-header'), 3987 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 3988 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 3989 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 3990 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 3991 3992 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 3993 def test_get_rfc2231_params_with_8bit(self): 3994 msg = email.message_from_bytes(textwrap.dedent("""\ 3995 Content-Type: text/plain; charset=us-ascii; 3996 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 3997 ).encode('latin-1')) 3998 self.assertEqual(msg.get_param('title'), 3999 ('us-ascii', 'en', 'This is not f\uFFFDn')) 4000 4001 def test_set_rfc2231_params_with_8bit(self): 4002 msg = email.message_from_bytes(textwrap.dedent("""\ 4003 Content-Type: text/plain; charset=us-ascii; 4004 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4005 ).encode('latin-1')) 4006 msg.set_param('title', 'test') 4007 self.assertEqual(msg.get_param('title'), 'test') 4008 4009 def test_del_rfc2231_params_with_8bit(self): 4010 msg = email.message_from_bytes(textwrap.dedent("""\ 4011 Content-Type: text/plain; charset=us-ascii; 4012 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4013 ).encode('latin-1')) 4014 msg.del_param('title') 4015 self.assertEqual(msg.get_param('title'), None) 4016 self.assertEqual(msg.get_content_maintype(), 'text') 4017 4018 def test_get_payload_with_8bit_cte_header(self): 4019 msg = email.message_from_bytes(textwrap.dedent("""\ 4020 Content-Transfer-Encoding: b\xa7se64 4021 Content-Type: text/plain; charset=latin-1 4022 4023 payload 4024 """).encode('latin-1')) 4025 self.assertEqual(msg.get_payload(), 'payload\n') 4026 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 4027 4028 non_latin_bin_msg = textwrap.dedent("""\ 4029 From: foo@bar.com 4030 To: báz 4031 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 4032 \tJean de Baddie 4033 Mime-Version: 1.0 4034 Content-Type: text/plain; charset="utf-8" 4035 Content-Transfer-Encoding: 8bit 4036 4037 Да, они летят. 4038 """).encode('utf-8') 4039 4040 def test_bytes_generator(self): 4041 msg = email.message_from_bytes(self.non_latin_bin_msg) 4042 out = BytesIO() 4043 email.generator.BytesGenerator(out).flatten(msg) 4044 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 4045 4046 def test_bytes_generator_handles_None_body(self): 4047 #Issue 11019 4048 msg = email.message.Message() 4049 out = BytesIO() 4050 email.generator.BytesGenerator(out).flatten(msg) 4051 self.assertEqual(out.getvalue(), b"\n") 4052 4053 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 4054 From: foo@bar.com 4055 To: =?unknown-8bit?q?b=C3=A1z?= 4056 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 4057 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 4058 =?unknown-8bit?q?_Jean_de_Baddie?= 4059 Mime-Version: 1.0 4060 Content-Type: text/plain; charset="utf-8" 4061 Content-Transfer-Encoding: base64 4062 4063 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4064 """) 4065 4066 def test_generator_handles_8bit(self): 4067 msg = email.message_from_bytes(self.non_latin_bin_msg) 4068 out = StringIO() 4069 email.generator.Generator(out).flatten(msg) 4070 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4071 4072 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4073 msg = email.message_from_bytes(self.non_latin_bin_msg) 4074 out = BytesIO() 4075 BytesGenerator(out).flatten(msg) 4076 orig_value = out.getvalue() 4077 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4078 out = BytesIO() 4079 BytesGenerator(out).flatten(msg) 4080 self.assertEqual(out.getvalue(), orig_value) 4081 4082 def test_bytes_generator_with_unix_from(self): 4083 # The unixfrom contains a current date, so we can't check it 4084 # literally. Just make sure the first word is 'From' and the 4085 # rest of the message matches the input. 4086 msg = email.message_from_bytes(self.non_latin_bin_msg) 4087 out = BytesIO() 4088 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4089 lines = out.getvalue().split(b'\n') 4090 self.assertEqual(lines[0].split()[0], b'From') 4091 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4092 4093 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4094 non_latin_bin_msg_as7bit[2:4] = [ 4095 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4096 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4097 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4098 4099 def test_message_from_binary_file(self): 4100 fn = 'test.msg' 4101 self.addCleanup(unlink, fn) 4102 with open(fn, 'wb') as testfile: 4103 testfile.write(self.non_latin_bin_msg) 4104 with open(fn, 'rb') as testfile: 4105 m = email.parser.BytesParser().parse(testfile) 4106 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4107 4108 latin_bin_msg = textwrap.dedent("""\ 4109 From: foo@bar.com 4110 To: Dinsdale 4111 Subject: Nudge nudge, wink, wink 4112 Mime-Version: 1.0 4113 Content-Type: text/plain; charset="latin-1" 4114 Content-Transfer-Encoding: 8bit 4115 4116 oh là là, know what I mean, know what I mean? 4117 """).encode('latin-1') 4118 4119 latin_bin_msg_as7bit = textwrap.dedent("""\ 4120 From: foo@bar.com 4121 To: Dinsdale 4122 Subject: Nudge nudge, wink, wink 4123 Mime-Version: 1.0 4124 Content-Type: text/plain; charset="iso-8859-1" 4125 Content-Transfer-Encoding: quoted-printable 4126 4127 oh l=E0 l=E0, know what I mean, know what I mean? 4128 """) 4129 4130 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4131 m = email.message_from_bytes(self.latin_bin_msg) 4132 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4133 4134 def test_decoded_generator_emits_unicode_body(self): 4135 m = email.message_from_bytes(self.latin_bin_msg) 4136 out = StringIO() 4137 email.generator.DecodedGenerator(out).flatten(m) 4138 #DecodedHeader output contains an extra blank line compared 4139 #to the input message. RDM: not sure if this is a bug or not, 4140 #but it is not specific to the 8bit->7bit conversion. 4141 self.assertEqual(out.getvalue(), 4142 self.latin_bin_msg.decode('latin-1')+'\n') 4143 4144 def test_bytes_feedparser(self): 4145 bfp = email.feedparser.BytesFeedParser() 4146 for i in range(0, len(self.latin_bin_msg), 10): 4147 bfp.feed(self.latin_bin_msg[i:i+10]) 4148 m = bfp.close() 4149 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4150 4151 def test_crlf_flatten(self): 4152 with openfile('msg_26.txt', 'rb') as fp: 4153 text = fp.read() 4154 msg = email.message_from_bytes(text) 4155 s = BytesIO() 4156 g = email.generator.BytesGenerator(s) 4157 g.flatten(msg, linesep='\r\n') 4158 self.assertEqual(s.getvalue(), text) 4159 4160 def test_8bit_multipart(self): 4161 # Issue 11605 4162 source = textwrap.dedent("""\ 4163 Date: Fri, 18 Mar 2011 17:15:43 +0100 4164 To: foo@example.com 4165 From: foodwatch-Newsletter <bar@example.com> 4166 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4167 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain> 4168 MIME-Version: 1.0 4169 Content-Type: multipart/alternative; 4170 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4171 4172 --b1_76a486bee62b0d200f33dc2ca08220ad 4173 Content-Type: text/plain; charset="utf-8" 4174 Content-Transfer-Encoding: 8bit 4175 4176 Guten Tag, , 4177 4178 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4179 Nachrichten aus Japan. 4180 4181 4182 --b1_76a486bee62b0d200f33dc2ca08220ad 4183 Content-Type: text/html; charset="utf-8" 4184 Content-Transfer-Encoding: 8bit 4185 4186 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4187 "http://www.w3.org/TR/html4/loose.dtd"> 4188 <html lang="de"> 4189 <head> 4190 <title>foodwatch - Newsletter</title> 4191 </head> 4192 <body> 4193 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4194 die Nachrichten aus Japan.</p> 4195 </body> 4196 </html> 4197 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4198 4199 """).encode('utf-8') 4200 msg = email.message_from_bytes(source) 4201 s = BytesIO() 4202 g = email.generator.BytesGenerator(s) 4203 g.flatten(msg) 4204 self.assertEqual(s.getvalue(), source) 4205 4206 def test_bytes_generator_b_encoding_linesep(self): 4207 # Issue 14062: b encoding was tacking on an extra \n. 4208 m = Message() 4209 # This has enough non-ascii that it should always end up b encoded. 4210 m['Subject'] = Header('žluťoučký kůň') 4211 s = BytesIO() 4212 g = email.generator.BytesGenerator(s) 4213 g.flatten(m, linesep='\r\n') 4214 self.assertEqual( 4215 s.getvalue(), 4216 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4217 4218 def test_generator_b_encoding_linesep(self): 4219 # Since this broke in ByteGenerator, test Generator for completeness. 4220 m = Message() 4221 # This has enough non-ascii that it should always end up b encoded. 4222 m['Subject'] = Header('žluťoučký kůň') 4223 s = StringIO() 4224 g = email.generator.Generator(s) 4225 g.flatten(m, linesep='\r\n') 4226 self.assertEqual( 4227 s.getvalue(), 4228 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4229 4230 maxDiff = None 4231 4232 4233class BaseTestBytesGeneratorIdempotent: 4234 4235 maxDiff = None 4236 4237 def _msgobj(self, filename): 4238 with openfile(filename, 'rb') as fp: 4239 data = fp.read() 4240 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4241 msg = email.message_from_bytes(data) 4242 return msg, data 4243 4244 def _idempotent(self, msg, data, unixfrom=False): 4245 b = BytesIO() 4246 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4247 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4248 self.assertEqual(data, b.getvalue()) 4249 4250 4251class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4252 TestIdempotent): 4253 linesep = '\n' 4254 blinesep = b'\n' 4255 normalize_linesep_regex = re.compile(br'\r\n') 4256 4257 4258class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4259 TestIdempotent): 4260 linesep = '\r\n' 4261 blinesep = b'\r\n' 4262 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4263 4264 4265class TestBase64(unittest.TestCase): 4266 def test_len(self): 4267 eq = self.assertEqual 4268 eq(base64mime.header_length('hello'), 4269 len(base64mime.body_encode(b'hello', eol=''))) 4270 for size in range(15): 4271 if size == 0 : bsize = 0 4272 elif size <= 3 : bsize = 4 4273 elif size <= 6 : bsize = 8 4274 elif size <= 9 : bsize = 12 4275 elif size <= 12: bsize = 16 4276 else : bsize = 20 4277 eq(base64mime.header_length('x' * size), bsize) 4278 4279 def test_decode(self): 4280 eq = self.assertEqual 4281 eq(base64mime.decode(''), b'') 4282 eq(base64mime.decode('aGVsbG8='), b'hello') 4283 4284 def test_encode(self): 4285 eq = self.assertEqual 4286 eq(base64mime.body_encode(b''), '') 4287 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4288 # Test the binary flag 4289 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4290 # Test the maxlinelen arg 4291 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4292eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4293eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4294eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4295eHh4eCB4eHh4IA== 4296""") 4297 # Test the eol argument 4298 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4299 """\ 4300eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4301eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4302eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4303eHh4eCB4eHh4IA==\r 4304""") 4305 4306 def test_header_encode(self): 4307 eq = self.assertEqual 4308 he = base64mime.header_encode 4309 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4310 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4311 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4312 # Test the charset option 4313 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4314 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4315 4316 4317class TestQuopri(unittest.TestCase): 4318 def setUp(self): 4319 # Set of characters (as byte integers) that don't need to be encoded 4320 # in headers. 4321 self.hlit = list(chain( 4322 range(ord('a'), ord('z') + 1), 4323 range(ord('A'), ord('Z') + 1), 4324 range(ord('0'), ord('9') + 1), 4325 (c for c in b'!*+-/'))) 4326 # Set of characters (as byte integers) that do need to be encoded in 4327 # headers. 4328 self.hnon = [c for c in range(256) if c not in self.hlit] 4329 assert len(self.hlit) + len(self.hnon) == 256 4330 # Set of characters (as byte integers) that don't need to be encoded 4331 # in bodies. 4332 self.blit = list(range(ord(' '), ord('~') + 1)) 4333 self.blit.append(ord('\t')) 4334 self.blit.remove(ord('=')) 4335 # Set of characters (as byte integers) that do need to be encoded in 4336 # bodies. 4337 self.bnon = [c for c in range(256) if c not in self.blit] 4338 assert len(self.blit) + len(self.bnon) == 256 4339 4340 def test_quopri_header_check(self): 4341 for c in self.hlit: 4342 self.assertFalse(quoprimime.header_check(c), 4343 'Should not be header quopri encoded: %s' % chr(c)) 4344 for c in self.hnon: 4345 self.assertTrue(quoprimime.header_check(c), 4346 'Should be header quopri encoded: %s' % chr(c)) 4347 4348 def test_quopri_body_check(self): 4349 for c in self.blit: 4350 self.assertFalse(quoprimime.body_check(c), 4351 'Should not be body quopri encoded: %s' % chr(c)) 4352 for c in self.bnon: 4353 self.assertTrue(quoprimime.body_check(c), 4354 'Should be body quopri encoded: %s' % chr(c)) 4355 4356 def test_header_quopri_len(self): 4357 eq = self.assertEqual 4358 eq(quoprimime.header_length(b'hello'), 5) 4359 # RFC 2047 chrome is not included in header_length(). 4360 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4361 quoprimime.header_length(b'hello') + 4362 # =?xxx?q?...?= means 10 extra characters 4363 10) 4364 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4365 # RFC 2047 chrome is not included in header_length(). 4366 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4367 quoprimime.header_length(b'h@e@l@l@o@') + 4368 # =?xxx?q?...?= means 10 extra characters 4369 10) 4370 for c in self.hlit: 4371 eq(quoprimime.header_length(bytes([c])), 1, 4372 'expected length 1 for %r' % chr(c)) 4373 for c in self.hnon: 4374 # Space is special; it's encoded to _ 4375 if c == ord(' '): 4376 continue 4377 eq(quoprimime.header_length(bytes([c])), 3, 4378 'expected length 3 for %r' % chr(c)) 4379 eq(quoprimime.header_length(b' '), 1) 4380 4381 def test_body_quopri_len(self): 4382 eq = self.assertEqual 4383 for c in self.blit: 4384 eq(quoprimime.body_length(bytes([c])), 1) 4385 for c in self.bnon: 4386 eq(quoprimime.body_length(bytes([c])), 3) 4387 4388 def test_quote_unquote_idempotent(self): 4389 for x in range(256): 4390 c = chr(x) 4391 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4392 4393 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4394 if charset is None: 4395 encoded_header = quoprimime.header_encode(header) 4396 else: 4397 encoded_header = quoprimime.header_encode(header, charset) 4398 self.assertEqual(encoded_header, expected_encoded_header) 4399 4400 def test_header_encode_null(self): 4401 self._test_header_encode(b'', '') 4402 4403 def test_header_encode_one_word(self): 4404 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4405 4406 def test_header_encode_two_lines(self): 4407 self._test_header_encode(b'hello\nworld', 4408 '=?iso-8859-1?q?hello=0Aworld?=') 4409 4410 def test_header_encode_non_ascii(self): 4411 self._test_header_encode(b'hello\xc7there', 4412 '=?iso-8859-1?q?hello=C7there?=') 4413 4414 def test_header_encode_alt_charset(self): 4415 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4416 charset='iso-8859-2') 4417 4418 def _test_header_decode(self, encoded_header, expected_decoded_header): 4419 decoded_header = quoprimime.header_decode(encoded_header) 4420 self.assertEqual(decoded_header, expected_decoded_header) 4421 4422 def test_header_decode_null(self): 4423 self._test_header_decode('', '') 4424 4425 def test_header_decode_one_word(self): 4426 self._test_header_decode('hello', 'hello') 4427 4428 def test_header_decode_two_lines(self): 4429 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4430 4431 def test_header_decode_non_ascii(self): 4432 self._test_header_decode('hello=C7there', 'hello\xc7there') 4433 4434 def test_header_decode_re_bug_18380(self): 4435 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4436 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4437 4438 def _test_decode(self, encoded, expected_decoded, eol=None): 4439 if eol is None: 4440 decoded = quoprimime.decode(encoded) 4441 else: 4442 decoded = quoprimime.decode(encoded, eol=eol) 4443 self.assertEqual(decoded, expected_decoded) 4444 4445 def test_decode_null_word(self): 4446 self._test_decode('', '') 4447 4448 def test_decode_null_line_null_word(self): 4449 self._test_decode('\r\n', '\n') 4450 4451 def test_decode_one_word(self): 4452 self._test_decode('hello', 'hello') 4453 4454 def test_decode_one_word_eol(self): 4455 self._test_decode('hello', 'hello', eol='X') 4456 4457 def test_decode_one_line(self): 4458 self._test_decode('hello\r\n', 'hello\n') 4459 4460 def test_decode_one_line_lf(self): 4461 self._test_decode('hello\n', 'hello\n') 4462 4463 def test_decode_one_line_cr(self): 4464 self._test_decode('hello\r', 'hello\n') 4465 4466 def test_decode_one_line_nl(self): 4467 self._test_decode('hello\n', 'helloX', eol='X') 4468 4469 def test_decode_one_line_crnl(self): 4470 self._test_decode('hello\r\n', 'helloX', eol='X') 4471 4472 def test_decode_one_line_one_word(self): 4473 self._test_decode('hello\r\nworld', 'hello\nworld') 4474 4475 def test_decode_one_line_one_word_eol(self): 4476 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4477 4478 def test_decode_two_lines(self): 4479 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4480 4481 def test_decode_two_lines_eol(self): 4482 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4483 4484 def test_decode_one_long_line(self): 4485 self._test_decode('Spam' * 250, 'Spam' * 250) 4486 4487 def test_decode_one_space(self): 4488 self._test_decode(' ', '') 4489 4490 def test_decode_multiple_spaces(self): 4491 self._test_decode(' ' * 5, '') 4492 4493 def test_decode_one_line_trailing_spaces(self): 4494 self._test_decode('hello \r\n', 'hello\n') 4495 4496 def test_decode_two_lines_trailing_spaces(self): 4497 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4498 4499 def test_decode_quoted_word(self): 4500 self._test_decode('=22quoted=20words=22', '"quoted words"') 4501 4502 def test_decode_uppercase_quoting(self): 4503 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4504 4505 def test_decode_lowercase_quoting(self): 4506 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4507 4508 def test_decode_soft_line_break(self): 4509 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4510 4511 def test_decode_false_quoting(self): 4512 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4513 4514 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4515 kwargs = {} 4516 if maxlinelen is None: 4517 # Use body_encode's default. 4518 maxlinelen = 76 4519 else: 4520 kwargs['maxlinelen'] = maxlinelen 4521 if eol is None: 4522 # Use body_encode's default. 4523 eol = '\n' 4524 else: 4525 kwargs['eol'] = eol 4526 encoded_body = quoprimime.body_encode(body, **kwargs) 4527 self.assertEqual(encoded_body, expected_encoded_body) 4528 if eol == '\n' or eol == '\r\n': 4529 # We know how to split the result back into lines, so maxlinelen 4530 # can be checked. 4531 for line in encoded_body.splitlines(): 4532 self.assertLessEqual(len(line), maxlinelen) 4533 4534 def test_encode_null(self): 4535 self._test_encode('', '') 4536 4537 def test_encode_null_lines(self): 4538 self._test_encode('\n\n', '\n\n') 4539 4540 def test_encode_one_line(self): 4541 self._test_encode('hello\n', 'hello\n') 4542 4543 def test_encode_one_line_crlf(self): 4544 self._test_encode('hello\r\n', 'hello\n') 4545 4546 def test_encode_one_line_eol(self): 4547 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4548 4549 def test_encode_one_line_eol_after_non_ascii(self): 4550 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4551 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4552 'hello=CF=85\r\n', eol='\r\n') 4553 4554 def test_encode_one_space(self): 4555 self._test_encode(' ', '=20') 4556 4557 def test_encode_one_line_one_space(self): 4558 self._test_encode(' \n', '=20\n') 4559 4560# XXX: body_encode() expect strings, but uses ord(char) from these strings 4561# to index into a 256-entry list. For code points above 255, this will fail. 4562# Should there be a check for 8-bit only ord() values in body, or at least 4563# a comment about the expected input? 4564 4565 def test_encode_two_lines_one_space(self): 4566 self._test_encode(' \n \n', '=20\n=20\n') 4567 4568 def test_encode_one_word_trailing_spaces(self): 4569 self._test_encode('hello ', 'hello =20') 4570 4571 def test_encode_one_line_trailing_spaces(self): 4572 self._test_encode('hello \n', 'hello =20\n') 4573 4574 def test_encode_one_word_trailing_tab(self): 4575 self._test_encode('hello \t', 'hello =09') 4576 4577 def test_encode_one_line_trailing_tab(self): 4578 self._test_encode('hello \t\n', 'hello =09\n') 4579 4580 def test_encode_trailing_space_before_maxlinelen(self): 4581 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4582 4583 def test_encode_trailing_space_at_maxlinelen(self): 4584 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4585 4586 def test_encode_trailing_space_beyond_maxlinelen(self): 4587 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4588 4589 def test_encode_whitespace_lines(self): 4590 self._test_encode(' \n' * 5, '=20\n' * 5) 4591 4592 def test_encode_quoted_equals(self): 4593 self._test_encode('a = b', 'a =3D b') 4594 4595 def test_encode_one_long_string(self): 4596 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4597 4598 def test_encode_one_long_line(self): 4599 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4600 4601 def test_encode_one_very_long_line(self): 4602 self._test_encode('x' * 200 + '\n', 4603 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4604 4605 def test_encode_shortest_maxlinelen(self): 4606 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4607 4608 def test_encode_maxlinelen_too_small(self): 4609 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4610 4611 def test_encode(self): 4612 eq = self.assertEqual 4613 eq(quoprimime.body_encode(''), '') 4614 eq(quoprimime.body_encode('hello'), 'hello') 4615 # Test the binary flag 4616 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4617 # Test the maxlinelen arg 4618 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4619xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4620 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4621x xxxx xxxx xxxx xxxx=20""") 4622 # Test the eol argument 4623 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4624 """\ 4625xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4626 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4627x xxxx xxxx xxxx xxxx=20""") 4628 eq(quoprimime.body_encode("""\ 4629one line 4630 4631two line"""), """\ 4632one line 4633 4634two line""") 4635 4636 4637 4638# Test the Charset class 4639class TestCharset(unittest.TestCase): 4640 def tearDown(self): 4641 from email import charset as CharsetModule 4642 try: 4643 del CharsetModule.CHARSETS['fake'] 4644 except KeyError: 4645 pass 4646 4647 def test_codec_encodeable(self): 4648 eq = self.assertEqual 4649 # Make sure us-ascii = no Unicode conversion 4650 c = Charset('us-ascii') 4651 eq(c.header_encode('Hello World!'), 'Hello World!') 4652 # Test 8-bit idempotency with us-ascii 4653 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4654 self.assertRaises(UnicodeError, c.header_encode, s) 4655 c = Charset('utf-8') 4656 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4657 4658 def test_body_encode(self): 4659 eq = self.assertEqual 4660 # Try a charset with QP body encoding 4661 c = Charset('iso-8859-1') 4662 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4663 # Try a charset with Base64 body encoding 4664 c = Charset('utf-8') 4665 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4666 # Try a charset with None body encoding 4667 c = Charset('us-ascii') 4668 eq('hello world', c.body_encode('hello world')) 4669 # Try the convert argument, where input codec != output codec 4670 c = Charset('euc-jp') 4671 # With apologies to Tokio Kikuchi ;) 4672 # XXX FIXME 4673## try: 4674## eq('\x1b$B5FCO;~IW\x1b(B', 4675## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4676## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4677## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4678## except LookupError: 4679## # We probably don't have the Japanese codecs installed 4680## pass 4681 # Testing SF bug #625509, which we have to fake, since there are no 4682 # built-in encodings where the header encoding is QP but the body 4683 # encoding is not. 4684 from email import charset as CharsetModule 4685 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4686 c = Charset('fake') 4687 eq('hello world', c.body_encode('hello world')) 4688 4689 def test_unicode_charset_name(self): 4690 charset = Charset('us-ascii') 4691 self.assertEqual(str(charset), 'us-ascii') 4692 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4693 4694 4695 4696# Test multilingual MIME headers. 4697class TestHeader(TestEmailBase): 4698 def test_simple(self): 4699 eq = self.ndiffAssertEqual 4700 h = Header('Hello World!') 4701 eq(h.encode(), 'Hello World!') 4702 h.append(' Goodbye World!') 4703 eq(h.encode(), 'Hello World! Goodbye World!') 4704 4705 def test_simple_surprise(self): 4706 eq = self.ndiffAssertEqual 4707 h = Header('Hello World!') 4708 eq(h.encode(), 'Hello World!') 4709 h.append('Goodbye World!') 4710 eq(h.encode(), 'Hello World! Goodbye World!') 4711 4712 def test_header_needs_no_decoding(self): 4713 h = 'no decoding needed' 4714 self.assertEqual(decode_header(h), [(h, None)]) 4715 4716 def test_long(self): 4717 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4718 maxlinelen=76) 4719 for l in h.encode(splitchars=' ').split('\n '): 4720 self.assertLessEqual(len(l), 76) 4721 4722 def test_multilingual(self): 4723 eq = self.ndiffAssertEqual 4724 g = Charset("iso-8859-1") 4725 cz = Charset("iso-8859-2") 4726 utf8 = Charset("utf-8") 4727 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4728 b'Foerderband komfortabel den Korridor entlang, ' 4729 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4730 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4731 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4732 b'd\xf9vtipu.. ') 4733 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4734 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4735 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4736 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4737 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4738 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4739 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4740 '\u3044\u307e\u3059\u3002') 4741 h = Header(g_head, g) 4742 h.append(cz_head, cz) 4743 h.append(utf8_head, utf8) 4744 enc = h.encode(maxlinelen=76) 4745 eq(enc, """\ 4746=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4747 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4748 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4749 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4750 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4751 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4752 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4753 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4754 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4755 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4756 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4757 decoded = decode_header(enc) 4758 eq(len(decoded), 3) 4759 eq(decoded[0], (g_head, 'iso-8859-1')) 4760 eq(decoded[1], (cz_head, 'iso-8859-2')) 4761 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4762 ustr = str(h) 4763 eq(ustr, 4764 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4765 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4766 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4767 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4768 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4769 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4770 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4771 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4772 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4773 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4774 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4775 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4776 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4777 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4778 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4779 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4780 ).decode('utf-8')) 4781 # Test make_header() 4782 newh = make_header(decode_header(enc)) 4783 eq(newh, h) 4784 4785 def test_empty_header_encode(self): 4786 h = Header() 4787 self.assertEqual(h.encode(), '') 4788 4789 def test_header_ctor_default_args(self): 4790 eq = self.ndiffAssertEqual 4791 h = Header() 4792 eq(h, '') 4793 h.append('foo', Charset('iso-8859-1')) 4794 eq(h, 'foo') 4795 4796 def test_explicit_maxlinelen(self): 4797 eq = self.ndiffAssertEqual 4798 hstr = ('A very long line that must get split to something other ' 4799 'than at the 76th character boundary to test the non-default ' 4800 'behavior') 4801 h = Header(hstr) 4802 eq(h.encode(), '''\ 4803A very long line that must get split to something other than at the 76th 4804 character boundary to test the non-default behavior''') 4805 eq(str(h), hstr) 4806 h = Header(hstr, header_name='Subject') 4807 eq(h.encode(), '''\ 4808A very long line that must get split to something other than at the 4809 76th character boundary to test the non-default behavior''') 4810 eq(str(h), hstr) 4811 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4812 eq(h.encode(), hstr) 4813 eq(str(h), hstr) 4814 4815 def test_quopri_splittable(self): 4816 eq = self.ndiffAssertEqual 4817 h = Header(charset='iso-8859-1', maxlinelen=20) 4818 x = 'xxxx ' * 20 4819 h.append(x) 4820 s = h.encode() 4821 eq(s, """\ 4822=?iso-8859-1?q?xxx?= 4823 =?iso-8859-1?q?x_?= 4824 =?iso-8859-1?q?xx?= 4825 =?iso-8859-1?q?xx?= 4826 =?iso-8859-1?q?_x?= 4827 =?iso-8859-1?q?xx?= 4828 =?iso-8859-1?q?x_?= 4829 =?iso-8859-1?q?xx?= 4830 =?iso-8859-1?q?xx?= 4831 =?iso-8859-1?q?_x?= 4832 =?iso-8859-1?q?xx?= 4833 =?iso-8859-1?q?x_?= 4834 =?iso-8859-1?q?xx?= 4835 =?iso-8859-1?q?xx?= 4836 =?iso-8859-1?q?_x?= 4837 =?iso-8859-1?q?xx?= 4838 =?iso-8859-1?q?x_?= 4839 =?iso-8859-1?q?xx?= 4840 =?iso-8859-1?q?xx?= 4841 =?iso-8859-1?q?_x?= 4842 =?iso-8859-1?q?xx?= 4843 =?iso-8859-1?q?x_?= 4844 =?iso-8859-1?q?xx?= 4845 =?iso-8859-1?q?xx?= 4846 =?iso-8859-1?q?_x?= 4847 =?iso-8859-1?q?xx?= 4848 =?iso-8859-1?q?x_?= 4849 =?iso-8859-1?q?xx?= 4850 =?iso-8859-1?q?xx?= 4851 =?iso-8859-1?q?_x?= 4852 =?iso-8859-1?q?xx?= 4853 =?iso-8859-1?q?x_?= 4854 =?iso-8859-1?q?xx?= 4855 =?iso-8859-1?q?xx?= 4856 =?iso-8859-1?q?_x?= 4857 =?iso-8859-1?q?xx?= 4858 =?iso-8859-1?q?x_?= 4859 =?iso-8859-1?q?xx?= 4860 =?iso-8859-1?q?xx?= 4861 =?iso-8859-1?q?_x?= 4862 =?iso-8859-1?q?xx?= 4863 =?iso-8859-1?q?x_?= 4864 =?iso-8859-1?q?xx?= 4865 =?iso-8859-1?q?xx?= 4866 =?iso-8859-1?q?_x?= 4867 =?iso-8859-1?q?xx?= 4868 =?iso-8859-1?q?x_?= 4869 =?iso-8859-1?q?xx?= 4870 =?iso-8859-1?q?xx?= 4871 =?iso-8859-1?q?_?=""") 4872 eq(x, str(make_header(decode_header(s)))) 4873 h = Header(charset='iso-8859-1', maxlinelen=40) 4874 h.append('xxxx ' * 20) 4875 s = h.encode() 4876 eq(s, """\ 4877=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4878 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4879 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4880 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4881 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4882 eq(x, str(make_header(decode_header(s)))) 4883 4884 def test_base64_splittable(self): 4885 eq = self.ndiffAssertEqual 4886 h = Header(charset='koi8-r', maxlinelen=20) 4887 x = 'xxxx ' * 20 4888 h.append(x) 4889 s = h.encode() 4890 eq(s, """\ 4891=?koi8-r?b?eHh4?= 4892 =?koi8-r?b?eCB4?= 4893 =?koi8-r?b?eHh4?= 4894 =?koi8-r?b?IHh4?= 4895 =?koi8-r?b?eHgg?= 4896 =?koi8-r?b?eHh4?= 4897 =?koi8-r?b?eCB4?= 4898 =?koi8-r?b?eHh4?= 4899 =?koi8-r?b?IHh4?= 4900 =?koi8-r?b?eHgg?= 4901 =?koi8-r?b?eHh4?= 4902 =?koi8-r?b?eCB4?= 4903 =?koi8-r?b?eHh4?= 4904 =?koi8-r?b?IHh4?= 4905 =?koi8-r?b?eHgg?= 4906 =?koi8-r?b?eHh4?= 4907 =?koi8-r?b?eCB4?= 4908 =?koi8-r?b?eHh4?= 4909 =?koi8-r?b?IHh4?= 4910 =?koi8-r?b?eHgg?= 4911 =?koi8-r?b?eHh4?= 4912 =?koi8-r?b?eCB4?= 4913 =?koi8-r?b?eHh4?= 4914 =?koi8-r?b?IHh4?= 4915 =?koi8-r?b?eHgg?= 4916 =?koi8-r?b?eHh4?= 4917 =?koi8-r?b?eCB4?= 4918 =?koi8-r?b?eHh4?= 4919 =?koi8-r?b?IHh4?= 4920 =?koi8-r?b?eHgg?= 4921 =?koi8-r?b?eHh4?= 4922 =?koi8-r?b?eCB4?= 4923 =?koi8-r?b?eHh4?= 4924 =?koi8-r?b?IA==?=""") 4925 eq(x, str(make_header(decode_header(s)))) 4926 h = Header(charset='koi8-r', maxlinelen=40) 4927 h.append(x) 4928 s = h.encode() 4929 eq(s, """\ 4930=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4931 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4932 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4933 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4934 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4935 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4936 eq(x, str(make_header(decode_header(s)))) 4937 4938 def test_us_ascii_header(self): 4939 eq = self.assertEqual 4940 s = 'hello' 4941 x = decode_header(s) 4942 eq(x, [('hello', None)]) 4943 h = make_header(x) 4944 eq(s, h.encode()) 4945 4946 def test_string_charset(self): 4947 eq = self.assertEqual 4948 h = Header() 4949 h.append('hello', 'iso-8859-1') 4950 eq(h, 'hello') 4951 4952## def test_unicode_error(self): 4953## raises = self.assertRaises 4954## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 4955## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 4956## h = Header() 4957## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 4958## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 4959## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 4960 4961 def test_utf8_shortest(self): 4962 eq = self.assertEqual 4963 h = Header('p\xf6stal', 'utf-8') 4964 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 4965 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 4966 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 4967 4968 def test_bad_8bit_header(self): 4969 raises = self.assertRaises 4970 eq = self.assertEqual 4971 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4972 raises(UnicodeError, Header, x) 4973 h = Header() 4974 raises(UnicodeError, h.append, x) 4975 e = x.decode('utf-8', 'replace') 4976 eq(str(Header(x, errors='replace')), e) 4977 h.append(x, errors='replace') 4978 eq(str(h), e) 4979 4980 def test_escaped_8bit_header(self): 4981 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4982 e = x.decode('ascii', 'surrogateescape') 4983 h = Header(e, charset=email.charset.UNKNOWN8BIT) 4984 self.assertEqual(str(h), 4985 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4986 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4987 4988 def test_header_handles_binary_unknown8bit(self): 4989 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4990 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4991 self.assertEqual(str(h), 4992 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 4993 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 4994 4995 def test_make_header_handles_binary_unknown8bit(self): 4996 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 4997 h = Header(x, charset=email.charset.UNKNOWN8BIT) 4998 h2 = email.header.make_header(email.header.decode_header(h)) 4999 self.assertEqual(str(h2), 5000 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5001 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 5002 5003 def test_modify_returned_list_does_not_change_header(self): 5004 h = Header('test') 5005 chunks = email.header.decode_header(h) 5006 chunks.append(('ascii', 'test2')) 5007 self.assertEqual(str(h), 'test') 5008 5009 def test_encoded_adjacent_nonencoded(self): 5010 eq = self.assertEqual 5011 h = Header() 5012 h.append('hello', 'iso-8859-1') 5013 h.append('world') 5014 s = h.encode() 5015 eq(s, '=?iso-8859-1?q?hello?= world') 5016 h = make_header(decode_header(s)) 5017 eq(h.encode(), s) 5018 5019 def test_whitespace_keeper(self): 5020 eq = self.assertEqual 5021 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 5022 parts = decode_header(s) 5023 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 5024 hdr = make_header(parts) 5025 eq(hdr.encode(), 5026 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 5027 5028 def test_broken_base64_header(self): 5029 raises = self.assertRaises 5030 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 5031 raises(errors.HeaderParseError, decode_header, s) 5032 5033 def test_shift_jis_charset(self): 5034 h = Header('文', charset='shift_jis') 5035 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 5036 5037 def test_flatten_header_with_no_value(self): 5038 # Issue 11401 (regression from email 4.x) Note that the space after 5039 # the header doesn't reflect the input, but this is also the way 5040 # email 4.x behaved. At some point it would be nice to fix that. 5041 msg = email.message_from_string("EmptyHeader:") 5042 self.assertEqual(str(msg), "EmptyHeader: \n\n") 5043 5044 def test_encode_preserves_leading_ws_on_value(self): 5045 msg = Message() 5046 msg['SomeHeader'] = ' value with leading ws' 5047 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 5048 5049 def test_whitespace_header(self): 5050 self.assertEqual(Header(' ').encode(), ' ') 5051 5052 5053 5054# Test RFC 2231 header parameters (en/de)coding 5055class TestRFC2231(TestEmailBase): 5056 5057 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5058 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5059 def test_get_param(self): 5060 eq = self.assertEqual 5061 msg = self._msgobj('msg_29.txt') 5062 eq(msg.get_param('title'), 5063 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5064 eq(msg.get_param('title', unquote=False), 5065 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5066 5067 def test_set_param(self): 5068 eq = self.ndiffAssertEqual 5069 msg = Message() 5070 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5071 charset='us-ascii') 5072 eq(msg.get_param('title'), 5073 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5074 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5075 charset='us-ascii', language='en') 5076 eq(msg.get_param('title'), 5077 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5078 msg = self._msgobj('msg_01.txt') 5079 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5080 charset='us-ascii', language='en') 5081 eq(msg.as_string(maxheaderlen=78), """\ 5082Return-Path: <bbb@zzz.org> 5083Delivered-To: bbb@zzz.org 5084Received: by mail.zzz.org (Postfix, from userid 889) 5085\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5086MIME-Version: 1.0 5087Content-Transfer-Encoding: 7bit 5088Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5089From: bbb@ddd.com (John X. Doe) 5090To: bbb@zzz.org 5091Subject: This is a test message 5092Date: Fri, 4 May 2001 14:05:44 -0400 5093Content-Type: text/plain; charset=us-ascii; 5094 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5095 5096 5097Hi, 5098 5099Do you like this message? 5100 5101-Me 5102""") 5103 5104 def test_set_param_requote(self): 5105 msg = Message() 5106 msg.set_param('title', 'foo') 5107 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5108 msg.set_param('title', 'bar', requote=False) 5109 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5110 # tspecial is still quoted. 5111 msg.set_param('title', "(bar)bell", requote=False) 5112 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5113 5114 def test_del_param(self): 5115 eq = self.ndiffAssertEqual 5116 msg = self._msgobj('msg_01.txt') 5117 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5118 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5119 charset='us-ascii', language='en') 5120 msg.del_param('foo', header='Content-Type') 5121 eq(msg.as_string(maxheaderlen=78), """\ 5122Return-Path: <bbb@zzz.org> 5123Delivered-To: bbb@zzz.org 5124Received: by mail.zzz.org (Postfix, from userid 889) 5125\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5126MIME-Version: 1.0 5127Content-Transfer-Encoding: 7bit 5128Message-ID: <15090.61304.110929.45684@aaa.zzz.org> 5129From: bbb@ddd.com (John X. Doe) 5130To: bbb@zzz.org 5131Subject: This is a test message 5132Date: Fri, 4 May 2001 14:05:44 -0400 5133Content-Type: text/plain; charset="us-ascii"; 5134 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5135 5136 5137Hi, 5138 5139Do you like this message? 5140 5141-Me 5142""") 5143 5144 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5145 # I changed the charset name, though, because the one in the file isn't 5146 # a legal charset name. Should add a test for an illegal charset. 5147 def test_rfc2231_get_content_charset(self): 5148 eq = self.assertEqual 5149 msg = self._msgobj('msg_32.txt') 5150 eq(msg.get_content_charset(), 'us-ascii') 5151 5152 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5153 def test_rfc2231_parse_rfc_quoting(self): 5154 m = textwrap.dedent('''\ 5155 Content-Disposition: inline; 5156 \tfilename*0*=''This%20is%20even%20more%20; 5157 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5158 \tfilename*2="is it not.pdf" 5159 5160 ''') 5161 msg = email.message_from_string(m) 5162 self.assertEqual(msg.get_filename(), 5163 'This is even more ***fun*** is it not.pdf') 5164 self.assertEqual(m, msg.as_string()) 5165 5166 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5167 def test_rfc2231_parse_extra_quoting(self): 5168 m = textwrap.dedent('''\ 5169 Content-Disposition: inline; 5170 \tfilename*0*="''This%20is%20even%20more%20"; 5171 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5172 \tfilename*2="is it not.pdf" 5173 5174 ''') 5175 msg = email.message_from_string(m) 5176 self.assertEqual(msg.get_filename(), 5177 'This is even more ***fun*** is it not.pdf') 5178 self.assertEqual(m, msg.as_string()) 5179 5180 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5181 # but new test uses *0* because otherwise lang/charset is not valid. 5182 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5183 def test_rfc2231_no_language_or_charset(self): 5184 m = '''\ 5185Content-Transfer-Encoding: 8bit 5186Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5187Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5188 5189''' 5190 msg = email.message_from_string(m) 5191 param = msg.get_param('NAME') 5192 self.assertNotIsInstance(param, tuple) 5193 self.assertEqual( 5194 param, 5195 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5196 5197 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5198 def test_rfc2231_no_language_or_charset_in_filename(self): 5199 m = '''\ 5200Content-Disposition: inline; 5201\tfilename*0*="''This%20is%20even%20more%20"; 5202\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5203\tfilename*2="is it not.pdf" 5204 5205''' 5206 msg = email.message_from_string(m) 5207 self.assertEqual(msg.get_filename(), 5208 'This is even more ***fun*** is it not.pdf') 5209 5210 # Duplicate of previous test? 5211 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5212 m = '''\ 5213Content-Disposition: inline; 5214\tfilename*0*="''This%20is%20even%20more%20"; 5215\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5216\tfilename*2="is it not.pdf" 5217 5218''' 5219 msg = email.message_from_string(m) 5220 self.assertEqual(msg.get_filename(), 5221 'This is even more ***fun*** is it not.pdf') 5222 5223 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5224 # but the test below is wrong (the first part should be decoded). 5225 def test_rfc2231_partly_encoded(self): 5226 m = '''\ 5227Content-Disposition: inline; 5228\tfilename*0="''This%20is%20even%20more%20"; 5229\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5230\tfilename*2="is it not.pdf" 5231 5232''' 5233 msg = email.message_from_string(m) 5234 self.assertEqual( 5235 msg.get_filename(), 5236 'This%20is%20even%20more%20***fun*** is it not.pdf') 5237 5238 def test_rfc2231_partly_nonencoded(self): 5239 m = '''\ 5240Content-Disposition: inline; 5241\tfilename*0="This%20is%20even%20more%20"; 5242\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5243\tfilename*2="is it not.pdf" 5244 5245''' 5246 msg = email.message_from_string(m) 5247 self.assertEqual( 5248 msg.get_filename(), 5249 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5250 5251 def test_rfc2231_no_language_or_charset_in_boundary(self): 5252 m = '''\ 5253Content-Type: multipart/alternative; 5254\tboundary*0*="''This%20is%20even%20more%20"; 5255\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5256\tboundary*2="is it not.pdf" 5257 5258''' 5259 msg = email.message_from_string(m) 5260 self.assertEqual(msg.get_boundary(), 5261 'This is even more ***fun*** is it not.pdf') 5262 5263 def test_rfc2231_no_language_or_charset_in_charset(self): 5264 # This is a nonsensical charset value, but tests the code anyway 5265 m = '''\ 5266Content-Type: text/plain; 5267\tcharset*0*="This%20is%20even%20more%20"; 5268\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5269\tcharset*2="is it not.pdf" 5270 5271''' 5272 msg = email.message_from_string(m) 5273 self.assertEqual(msg.get_content_charset(), 5274 'this is even more ***fun*** is it not.pdf') 5275 5276 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5277 def test_rfc2231_bad_encoding_in_filename(self): 5278 m = '''\ 5279Content-Disposition: inline; 5280\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5281\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5282\tfilename*2="is it not.pdf" 5283 5284''' 5285 msg = email.message_from_string(m) 5286 self.assertEqual(msg.get_filename(), 5287 'This is even more ***fun*** is it not.pdf') 5288 5289 def test_rfc2231_bad_encoding_in_charset(self): 5290 m = """\ 5291Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5292 5293""" 5294 msg = email.message_from_string(m) 5295 # This should return None because non-ascii characters in the charset 5296 # are not allowed. 5297 self.assertEqual(msg.get_content_charset(), None) 5298 5299 def test_rfc2231_bad_character_in_charset(self): 5300 m = """\ 5301Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5302 5303""" 5304 msg = email.message_from_string(m) 5305 # This should return None because non-ascii characters in the charset 5306 # are not allowed. 5307 self.assertEqual(msg.get_content_charset(), None) 5308 5309 def test_rfc2231_bad_character_in_filename(self): 5310 m = '''\ 5311Content-Disposition: inline; 5312\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5313\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5314\tfilename*2*="is it not.pdf%E2" 5315 5316''' 5317 msg = email.message_from_string(m) 5318 self.assertEqual(msg.get_filename(), 5319 'This is even more ***fun*** is it not.pdf\ufffd') 5320 5321 def test_rfc2231_unknown_encoding(self): 5322 m = """\ 5323Content-Transfer-Encoding: 8bit 5324Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5325 5326""" 5327 msg = email.message_from_string(m) 5328 self.assertEqual(msg.get_filename(), 'myfile.txt') 5329 5330 def test_rfc2231_single_tick_in_filename_extended(self): 5331 eq = self.assertEqual 5332 m = """\ 5333Content-Type: application/x-foo; 5334\tname*0*=\"Frank's\"; name*1*=\" Document\" 5335 5336""" 5337 msg = email.message_from_string(m) 5338 charset, language, s = msg.get_param('name') 5339 eq(charset, None) 5340 eq(language, None) 5341 eq(s, "Frank's Document") 5342 5343 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5344 def test_rfc2231_single_tick_in_filename(self): 5345 m = """\ 5346Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5347 5348""" 5349 msg = email.message_from_string(m) 5350 param = msg.get_param('name') 5351 self.assertNotIsInstance(param, tuple) 5352 self.assertEqual(param, "Frank's Document") 5353 5354 def test_rfc2231_missing_tick(self): 5355 m = '''\ 5356Content-Disposition: inline; 5357\tfilename*0*="'This%20is%20broken"; 5358''' 5359 msg = email.message_from_string(m) 5360 self.assertEqual( 5361 msg.get_filename(), 5362 "'This is broken") 5363 5364 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5365 m = '''\ 5366Content-Disposition: inline; 5367\tfilename*0*="'This%20is%E2broken"; 5368''' 5369 msg = email.message_from_string(m) 5370 self.assertEqual( 5371 msg.get_filename(), 5372 "'This is\ufffdbroken") 5373 5374 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5375 def test_rfc2231_tick_attack_extended(self): 5376 eq = self.assertEqual 5377 m = """\ 5378Content-Type: application/x-foo; 5379\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5380 5381""" 5382 msg = email.message_from_string(m) 5383 charset, language, s = msg.get_param('name') 5384 eq(charset, 'us-ascii') 5385 eq(language, 'en-us') 5386 eq(s, "Frank's Document") 5387 5388 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5389 def test_rfc2231_tick_attack(self): 5390 m = """\ 5391Content-Type: application/x-foo; 5392\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5393 5394""" 5395 msg = email.message_from_string(m) 5396 param = msg.get_param('name') 5397 self.assertNotIsInstance(param, tuple) 5398 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5399 5400 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5401 def test_rfc2231_no_extended_values(self): 5402 eq = self.assertEqual 5403 m = """\ 5404Content-Type: application/x-foo; name=\"Frank's Document\" 5405 5406""" 5407 msg = email.message_from_string(m) 5408 eq(msg.get_param('name'), "Frank's Document") 5409 5410 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5411 def test_rfc2231_encoded_then_unencoded_segments(self): 5412 eq = self.assertEqual 5413 m = """\ 5414Content-Type: application/x-foo; 5415\tname*0*=\"us-ascii'en-us'My\"; 5416\tname*1=\" Document\"; 5417\tname*2*=\" For You\" 5418 5419""" 5420 msg = email.message_from_string(m) 5421 charset, language, s = msg.get_param('name') 5422 eq(charset, 'us-ascii') 5423 eq(language, 'en-us') 5424 eq(s, 'My Document For You') 5425 5426 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5427 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5428 def test_rfc2231_unencoded_then_encoded_segments(self): 5429 eq = self.assertEqual 5430 m = """\ 5431Content-Type: application/x-foo; 5432\tname*0=\"us-ascii'en-us'My\"; 5433\tname*1*=\" Document\"; 5434\tname*2*=\" For You\" 5435 5436""" 5437 msg = email.message_from_string(m) 5438 charset, language, s = msg.get_param('name') 5439 eq(charset, 'us-ascii') 5440 eq(language, 'en-us') 5441 eq(s, 'My Document For You') 5442 5443 def test_should_not_hang_on_invalid_ew_messages(self): 5444 messages = ["""From: user@host.com 5445To: user@host.com 5446Bad-Header: 5447 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5448 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5449 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5450 5451Hello! 5452""", """From: ����� �������� <xxx@xxx> 5453To: "xxx" <xxx@xxx> 5454Subject: ��� ���������� ����� ����� � ��������� �� ���� 5455MIME-Version: 1.0 5456Content-Type: text/plain; charset="windows-1251"; 5457Content-Transfer-Encoding: 8bit 5458 5459�� ����� � ���� ������ ��� �������� 5460"""] 5461 for m in messages: 5462 with self.subTest(m=m): 5463 msg = email.message_from_string(m) 5464 5465 5466# Tests to ensure that signed parts of an email are completely preserved, as 5467# required by RFC1847 section 2.1. Note that these are incomplete, because the 5468# email package does not currently always preserve the body. See issue 1670765. 5469class TestSigned(TestEmailBase): 5470 5471 def _msg_and_obj(self, filename): 5472 with openfile(filename, encoding="utf-8") as fp: 5473 original = fp.read() 5474 msg = email.message_from_string(original) 5475 return original, msg 5476 5477 def _signed_parts_eq(self, original, result): 5478 # Extract the first mime part of each message 5479 import re 5480 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5481 inpart = repart.search(original).group(2) 5482 outpart = repart.search(result).group(2) 5483 self.assertEqual(outpart, inpart) 5484 5485 def test_long_headers_as_string(self): 5486 original, msg = self._msg_and_obj('msg_45.txt') 5487 result = msg.as_string() 5488 self._signed_parts_eq(original, result) 5489 5490 def test_long_headers_as_string_maxheaderlen(self): 5491 original, msg = self._msg_and_obj('msg_45.txt') 5492 result = msg.as_string(maxheaderlen=60) 5493 self._signed_parts_eq(original, result) 5494 5495 def test_long_headers_flatten(self): 5496 original, msg = self._msg_and_obj('msg_45.txt') 5497 fp = StringIO() 5498 Generator(fp).flatten(msg) 5499 result = fp.getvalue() 5500 self._signed_parts_eq(original, result) 5501 5502 5503 5504if __name__ == '__main__': 5505 unittest.main() 5506