1# 2# Test suite for the textwrap module. 3# 4# Original tests written by Greg Ward <gward@python.net>. 5# Converted to PyUnit by Peter Hansen <peter@engcorp.com>. 6# Currently maintained by Greg Ward. 7# 8# $Id$ 9# 10 11import unittest 12from test import test_support 13 14from textwrap import TextWrapper, wrap, fill, dedent 15 16 17class BaseTestCase(unittest.TestCase): 18 '''Parent class with utility methods for textwrap tests.''' 19 20 def show(self, textin): 21 if isinstance(textin, list): 22 result = [] 23 for i in range(len(textin)): 24 result.append(" %d: %r" % (i, textin[i])) 25 result = '\n'.join(result) 26 elif isinstance(textin, basestring): 27 result = " %s\n" % repr(textin) 28 return result 29 30 31 def check(self, result, expect): 32 self.assertEqual(result, expect, 33 'expected:\n%s\nbut got:\n%s' % ( 34 self.show(expect), self.show(result))) 35 36 def check_wrap(self, text, width, expect, **kwargs): 37 result = wrap(text, width, **kwargs) 38 self.check(result, expect) 39 40 def check_split(self, text, expect): 41 result = self.wrapper._split(text) 42 self.assertEqual(result, expect, 43 "\nexpected %r\n" 44 "but got %r" % (expect, result)) 45 46 47class WrapTestCase(BaseTestCase): 48 49 def setUp(self): 50 self.wrapper = TextWrapper(width=45) 51 52 def test_simple(self): 53 # Simple case: just words, spaces, and a bit of punctuation 54 55 text = "Hello there, how are you this fine day? I'm glad to hear it!" 56 57 self.check_wrap(text, 12, 58 ["Hello there,", 59 "how are you", 60 "this fine", 61 "day? I'm", 62 "glad to hear", 63 "it!"]) 64 self.check_wrap(text, 42, 65 ["Hello there, how are you this fine day?", 66 "I'm glad to hear it!"]) 67 self.check_wrap(text, 80, [text]) 68 69 def test_empty_string(self): 70 # Check that wrapping the empty string returns an empty list. 71 self.check_wrap("", 6, []) 72 self.check_wrap("", 6, [], drop_whitespace=False) 73 74 def test_empty_string_with_initial_indent(self): 75 # Check that the empty string is not indented. 76 self.check_wrap("", 6, [], initial_indent="++") 77 self.check_wrap("", 6, [], initial_indent="++", drop_whitespace=False) 78 79 def test_whitespace(self): 80 # Whitespace munging and end-of-sentence detection 81 82 text = """\ 83This is a paragraph that already has 84line breaks. But some of its lines are much longer than the others, 85so it needs to be wrapped. 86Some lines are \ttabbed too. 87What a mess! 88""" 89 90 expect = ["This is a paragraph that already has line", 91 "breaks. But some of its lines are much", 92 "longer than the others, so it needs to be", 93 "wrapped. Some lines are tabbed too. What a", 94 "mess!"] 95 96 wrapper = TextWrapper(45, fix_sentence_endings=True) 97 result = wrapper.wrap(text) 98 self.check(result, expect) 99 100 result = wrapper.fill(text) 101 self.check(result, '\n'.join(expect)) 102 103 def test_fix_sentence_endings(self): 104 wrapper = TextWrapper(60, fix_sentence_endings=True) 105 106 # SF #847346: ensure that fix_sentence_endings=True does the 107 # right thing even on input short enough that it doesn't need to 108 # be wrapped. 109 text = "A short line. Note the single space." 110 expect = ["A short line. Note the single space."] 111 self.check(wrapper.wrap(text), expect) 112 113 # Test some of the hairy end cases that _fix_sentence_endings() 114 # is supposed to handle (the easy stuff is tested in 115 # test_whitespace() above). 116 text = "Well, Doctor? What do you think?" 117 expect = ["Well, Doctor? What do you think?"] 118 self.check(wrapper.wrap(text), expect) 119 120 text = "Well, Doctor?\nWhat do you think?" 121 self.check(wrapper.wrap(text), expect) 122 123 text = 'I say, chaps! Anyone for "tennis?"\nHmmph!' 124 expect = ['I say, chaps! Anyone for "tennis?" Hmmph!'] 125 self.check(wrapper.wrap(text), expect) 126 127 wrapper.width = 20 128 expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!'] 129 self.check(wrapper.wrap(text), expect) 130 131 text = 'And she said, "Go to hell!"\nCan you believe that?' 132 expect = ['And she said, "Go to', 133 'hell!" Can you', 134 'believe that?'] 135 self.check(wrapper.wrap(text), expect) 136 137 wrapper.width = 60 138 expect = ['And she said, "Go to hell!" Can you believe that?'] 139 self.check(wrapper.wrap(text), expect) 140 141 text = 'File stdio.h is nice.' 142 expect = ['File stdio.h is nice.'] 143 self.check(wrapper.wrap(text), expect) 144 145 def test_wrap_short(self): 146 # Wrapping to make short lines longer 147 148 text = "This is a\nshort paragraph." 149 150 self.check_wrap(text, 20, ["This is a short", 151 "paragraph."]) 152 self.check_wrap(text, 40, ["This is a short paragraph."]) 153 154 155 def test_wrap_short_1line(self): 156 # Test endcases 157 158 text = "This is a short line." 159 160 self.check_wrap(text, 30, ["This is a short line."]) 161 self.check_wrap(text, 30, ["(1) This is a short line."], 162 initial_indent="(1) ") 163 164 165 def test_hyphenated(self): 166 # Test breaking hyphenated words 167 168 text = ("this-is-a-useful-feature-for-" 169 "reformatting-posts-from-tim-peters'ly") 170 171 self.check_wrap(text, 40, 172 ["this-is-a-useful-feature-for-", 173 "reformatting-posts-from-tim-peters'ly"]) 174 self.check_wrap(text, 41, 175 ["this-is-a-useful-feature-for-", 176 "reformatting-posts-from-tim-peters'ly"]) 177 self.check_wrap(text, 42, 178 ["this-is-a-useful-feature-for-reformatting-", 179 "posts-from-tim-peters'ly"]) 180 181 def test_hyphenated_numbers(self): 182 # Test that hyphenated numbers (eg. dates) are not broken like words. 183 text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n" 184 "released on 1994-02-15.") 185 186 self.check_wrap(text, 35, ['Python 1.0.0 was released on', 187 '1994-01-26. Python 1.0.1 was', 188 'released on 1994-02-15.']) 189 self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.', 190 'Python 1.0.1 was released on 1994-02-15.']) 191 192 text = "I do all my shopping at 7-11." 193 self.check_wrap(text, 25, ["I do all my shopping at", 194 "7-11."]) 195 self.check_wrap(text, 27, ["I do all my shopping at", 196 "7-11."]) 197 self.check_wrap(text, 29, ["I do all my shopping at 7-11."]) 198 199 def test_em_dash(self): 200 # Test text with em-dashes 201 text = "Em-dashes should be written -- thus." 202 self.check_wrap(text, 25, 203 ["Em-dashes should be", 204 "written -- thus."]) 205 206 # Probe the boundaries of the properly written em-dash, 207 # ie. " -- ". 208 self.check_wrap(text, 29, 209 ["Em-dashes should be written", 210 "-- thus."]) 211 expect = ["Em-dashes should be written --", 212 "thus."] 213 self.check_wrap(text, 30, expect) 214 self.check_wrap(text, 35, expect) 215 self.check_wrap(text, 36, 216 ["Em-dashes should be written -- thus."]) 217 218 # The improperly written em-dash is handled too, because 219 # it's adjacent to non-whitespace on both sides. 220 text = "You can also do--this or even---this." 221 expect = ["You can also do", 222 "--this or even", 223 "---this."] 224 self.check_wrap(text, 15, expect) 225 self.check_wrap(text, 16, expect) 226 expect = ["You can also do--", 227 "this or even---", 228 "this."] 229 self.check_wrap(text, 17, expect) 230 self.check_wrap(text, 19, expect) 231 expect = ["You can also do--this or even", 232 "---this."] 233 self.check_wrap(text, 29, expect) 234 self.check_wrap(text, 31, expect) 235 expect = ["You can also do--this or even---", 236 "this."] 237 self.check_wrap(text, 32, expect) 238 self.check_wrap(text, 35, expect) 239 240 # All of the above behaviour could be deduced by probing the 241 # _split() method. 242 text = "Here's an -- em-dash and--here's another---and another!" 243 expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ", 244 "and", "--", "here's", " ", "another", "---", 245 "and", " ", "another!"] 246 self.check_split(text, expect) 247 248 text = "and then--bam!--he was gone" 249 expect = ["and", " ", "then", "--", "bam!", "--", 250 "he", " ", "was", " ", "gone"] 251 self.check_split(text, expect) 252 253 254 def test_unix_options (self): 255 # Test that Unix-style command-line options are wrapped correctly. 256 # Both Optik (OptionParser) and Docutils rely on this behaviour! 257 258 text = "You should use the -n option, or --dry-run in its long form." 259 self.check_wrap(text, 20, 260 ["You should use the", 261 "-n option, or --dry-", 262 "run in its long", 263 "form."]) 264 self.check_wrap(text, 21, 265 ["You should use the -n", 266 "option, or --dry-run", 267 "in its long form."]) 268 expect = ["You should use the -n option, or", 269 "--dry-run in its long form."] 270 self.check_wrap(text, 32, expect) 271 self.check_wrap(text, 34, expect) 272 self.check_wrap(text, 35, expect) 273 self.check_wrap(text, 38, expect) 274 expect = ["You should use the -n option, or --dry-", 275 "run in its long form."] 276 self.check_wrap(text, 39, expect) 277 self.check_wrap(text, 41, expect) 278 expect = ["You should use the -n option, or --dry-run", 279 "in its long form."] 280 self.check_wrap(text, 42, expect) 281 282 # Again, all of the above can be deduced from _split(). 283 text = "the -n option, or --dry-run or --dryrun" 284 expect = ["the", " ", "-n", " ", "option,", " ", "or", " ", 285 "--dry-", "run", " ", "or", " ", "--dryrun"] 286 self.check_split(text, expect) 287 288 def test_funky_hyphens (self): 289 # Screwy edge cases cooked up by David Goodger. All reported 290 # in SF bug #596434. 291 self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"]) 292 self.check_split("what the--", ["what", " ", "the--"]) 293 self.check_split("what the--.", ["what", " ", "the--."]) 294 self.check_split("--text--.", ["--text--."]) 295 296 # When I first read bug #596434, this is what I thought David 297 # was talking about. I was wrong; these have always worked 298 # fine. The real problem is tested in test_funky_parens() 299 # below... 300 self.check_split("--option", ["--option"]) 301 self.check_split("--option-opt", ["--option-", "opt"]) 302 self.check_split("foo --option-opt bar", 303 ["foo", " ", "--option-", "opt", " ", "bar"]) 304 305 def test_punct_hyphens(self): 306 # Oh bother, SF #965425 found another problem with hyphens -- 307 # hyphenated words in single quotes weren't handled correctly. 308 # In fact, the bug is that *any* punctuation around a hyphenated 309 # word was handled incorrectly, except for a leading "--", which 310 # was special-cased for Optik and Docutils. So test a variety 311 # of styles of punctuation around a hyphenated word. 312 # (Actually this is based on an Optik bug report, #813077). 313 self.check_split("the 'wibble-wobble' widget", 314 ['the', ' ', "'wibble-", "wobble'", ' ', 'widget']) 315 self.check_split('the "wibble-wobble" widget', 316 ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget']) 317 self.check_split("the (wibble-wobble) widget", 318 ['the', ' ', "(wibble-", "wobble)", ' ', 'widget']) 319 self.check_split("the ['wibble-wobble'] widget", 320 ['the', ' ', "['wibble-", "wobble']", ' ', 'widget']) 321 322 def test_funky_parens (self): 323 # Second part of SF bug #596434: long option strings inside 324 # parentheses. 325 self.check_split("foo (--option) bar", 326 ["foo", " ", "(--option)", " ", "bar"]) 327 328 # Related stuff -- make sure parens work in simpler contexts. 329 self.check_split("foo (bar) baz", 330 ["foo", " ", "(bar)", " ", "baz"]) 331 self.check_split("blah (ding dong), wubba", 332 ["blah", " ", "(ding", " ", "dong),", 333 " ", "wubba"]) 334 335 def test_drop_whitespace_false(self): 336 # Check that drop_whitespace=False preserves whitespace. 337 # SF patch #1581073 338 text = " This is a sentence with much whitespace." 339 self.check_wrap(text, 10, 340 [" This is a", " ", "sentence ", 341 "with ", "much white", "space."], 342 drop_whitespace=False) 343 344 def test_drop_whitespace_false_whitespace_only(self): 345 # Check that drop_whitespace=False preserves a whitespace-only string. 346 self.check_wrap(" ", 6, [" "], drop_whitespace=False) 347 348 def test_drop_whitespace_false_whitespace_only_with_indent(self): 349 # Check that a whitespace-only string gets indented (when 350 # drop_whitespace is False). 351 self.check_wrap(" ", 6, [" "], drop_whitespace=False, 352 initial_indent=" ") 353 354 def test_drop_whitespace_whitespace_only(self): 355 # Check drop_whitespace on a whitespace-only string. 356 self.check_wrap(" ", 6, []) 357 358 def test_drop_whitespace_leading_whitespace(self): 359 # Check that drop_whitespace does not drop leading whitespace (if 360 # followed by non-whitespace). 361 # SF bug #622849 reported inconsistent handling of leading 362 # whitespace; let's test that a bit, shall we? 363 text = " This is a sentence with leading whitespace." 364 self.check_wrap(text, 50, 365 [" This is a sentence with leading whitespace."]) 366 self.check_wrap(text, 30, 367 [" This is a sentence with", "leading whitespace."]) 368 369 def test_drop_whitespace_whitespace_line(self): 370 # Check that drop_whitespace skips the whole line if a non-leading 371 # line consists only of whitespace. 372 text = "abcd efgh" 373 # Include the result for drop_whitespace=False for comparison. 374 self.check_wrap(text, 6, ["abcd", " ", "efgh"], 375 drop_whitespace=False) 376 self.check_wrap(text, 6, ["abcd", "efgh"]) 377 378 def test_drop_whitespace_whitespace_only_with_indent(self): 379 # Check that initial_indent is not applied to a whitespace-only 380 # string. This checks a special case of the fact that dropping 381 # whitespace occurs before indenting. 382 self.check_wrap(" ", 6, [], initial_indent="++") 383 384 def test_drop_whitespace_whitespace_indent(self): 385 # Check that drop_whitespace does not drop whitespace indents. 386 # This checks a special case of the fact that dropping whitespace 387 # occurs before indenting. 388 self.check_wrap("abcd efgh", 6, [" abcd", " efgh"], 389 initial_indent=" ", subsequent_indent=" ") 390 391 if test_support.have_unicode: 392 def test_unicode(self): 393 # *Very* simple test of wrapping Unicode strings. I'm sure 394 # there's more to it than this, but let's at least make 395 # sure textwrap doesn't crash on Unicode input! 396 text = u"Hello there, how are you today?" 397 self.check_wrap(text, 50, [u"Hello there, how are you today?"]) 398 self.check_wrap(text, 20, [u"Hello there, how are", "you today?"]) 399 olines = self.wrapper.wrap(text) 400 self.assertIsInstance(olines, list) 401 self.assertIsInstance(olines[0], unicode) 402 otext = self.wrapper.fill(text) 403 self.assertIsInstance(otext, unicode) 404 405 def test_no_split_at_umlaut(self): 406 text = u"Die Empf\xe4nger-Auswahl" 407 self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"]) 408 409 def test_umlaut_followed_by_dash(self): 410 text = u"aa \xe4\xe4-\xe4\xe4" 411 self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"]) 412 413 def test_split(self): 414 # Ensure that the standard _split() method works as advertised 415 # in the comments 416 417 text = "Hello there -- you goof-ball, use the -b option!" 418 419 result = self.wrapper._split(text) 420 self.check(result, 421 ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-", 422 "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"]) 423 424 def test_break_on_hyphens(self): 425 # Ensure that the break_on_hyphens attributes work 426 text = "yaba daba-doo" 427 self.check_wrap(text, 10, ["yaba daba-", "doo"], 428 break_on_hyphens=True) 429 self.check_wrap(text, 10, ["yaba", "daba-doo"], 430 break_on_hyphens=False) 431 432 def test_bad_width(self): 433 # Ensure that width <= 0 is caught. 434 text = "Whatever, it doesn't matter." 435 self.assertRaises(ValueError, wrap, text, 0) 436 self.assertRaises(ValueError, wrap, text, -1) 437 438 439class LongWordTestCase (BaseTestCase): 440 def setUp(self): 441 self.wrapper = TextWrapper() 442 self.text = '''\ 443Did you say "supercalifragilisticexpialidocious?" 444How *do* you spell that odd word, anyways? 445''' 446 447 def test_break_long(self): 448 # Wrap text with long words and lots of punctuation 449 450 self.check_wrap(self.text, 30, 451 ['Did you say "supercalifragilis', 452 'ticexpialidocious?" How *do*', 453 'you spell that odd word,', 454 'anyways?']) 455 self.check_wrap(self.text, 50, 456 ['Did you say "supercalifragilisticexpialidocious?"', 457 'How *do* you spell that odd word, anyways?']) 458 459 # SF bug 797650. Prevent an infinite loop by making sure that at 460 # least one character gets split off on every pass. 461 self.check_wrap('-'*10+'hello', 10, 462 ['----------', 463 ' h', 464 ' e', 465 ' l', 466 ' l', 467 ' o'], 468 subsequent_indent = ' '*15) 469 470 # bug 1146. Prevent a long word to be wrongly wrapped when the 471 # preceding word is exactly one character shorter than the width 472 self.check_wrap(self.text, 12, 473 ['Did you say ', 474 '"supercalifr', 475 'agilisticexp', 476 'ialidocious?', 477 '" How *do*', 478 'you spell', 479 'that odd', 480 'word,', 481 'anyways?']) 482 483 def test_nobreak_long(self): 484 # Test with break_long_words disabled 485 self.wrapper.break_long_words = 0 486 self.wrapper.width = 30 487 expect = ['Did you say', 488 '"supercalifragilisticexpialidocious?"', 489 'How *do* you spell that odd', 490 'word, anyways?' 491 ] 492 result = self.wrapper.wrap(self.text) 493 self.check(result, expect) 494 495 # Same thing with kwargs passed to standalone wrap() function. 496 result = wrap(self.text, width=30, break_long_words=0) 497 self.check(result, expect) 498 499 500class IndentTestCases(BaseTestCase): 501 502 # called before each test method 503 def setUp(self): 504 self.text = '''\ 505This paragraph will be filled, first without any indentation, 506and then with some (including a hanging indent).''' 507 508 509 def test_fill(self): 510 # Test the fill() method 511 512 expect = '''\ 513This paragraph will be filled, first 514without any indentation, and then with 515some (including a hanging indent).''' 516 517 result = fill(self.text, 40) 518 self.check(result, expect) 519 520 521 def test_initial_indent(self): 522 # Test initial_indent parameter 523 524 expect = [" This paragraph will be filled,", 525 "first without any indentation, and then", 526 "with some (including a hanging indent)."] 527 result = wrap(self.text, 40, initial_indent=" ") 528 self.check(result, expect) 529 530 expect = "\n".join(expect) 531 result = fill(self.text, 40, initial_indent=" ") 532 self.check(result, expect) 533 534 535 def test_subsequent_indent(self): 536 # Test subsequent_indent parameter 537 538 expect = '''\ 539 * This paragraph will be filled, first 540 without any indentation, and then 541 with some (including a hanging 542 indent).''' 543 544 result = fill(self.text, 40, 545 initial_indent=" * ", subsequent_indent=" ") 546 self.check(result, expect) 547 548 549# Despite the similar names, DedentTestCase is *not* the inverse 550# of IndentTestCase! 551class DedentTestCase(unittest.TestCase): 552 553 def assertUnchanged(self, text): 554 """assert that dedent() has no effect on 'text'""" 555 self.assertEqual(text, dedent(text)) 556 557 def test_dedent_nomargin(self): 558 # No lines indented. 559 text = "Hello there.\nHow are you?\nOh good, I'm glad." 560 self.assertUnchanged(text) 561 562 # Similar, with a blank line. 563 text = "Hello there.\n\nBoo!" 564 self.assertUnchanged(text) 565 566 # Some lines indented, but overall margin is still zero. 567 text = "Hello there.\n This is indented." 568 self.assertUnchanged(text) 569 570 # Again, add a blank line. 571 text = "Hello there.\n\n Boo!\n" 572 self.assertUnchanged(text) 573 574 def test_dedent_even(self): 575 # All lines indented by two spaces. 576 text = " Hello there.\n How are ya?\n Oh good." 577 expect = "Hello there.\nHow are ya?\nOh good." 578 self.assertEqual(expect, dedent(text)) 579 580 # Same, with blank lines. 581 text = " Hello there.\n\n How are ya?\n Oh good.\n" 582 expect = "Hello there.\n\nHow are ya?\nOh good.\n" 583 self.assertEqual(expect, dedent(text)) 584 585 # Now indent one of the blank lines. 586 text = " Hello there.\n \n How are ya?\n Oh good.\n" 587 expect = "Hello there.\n\nHow are ya?\nOh good.\n" 588 self.assertEqual(expect, dedent(text)) 589 590 def test_dedent_uneven(self): 591 # Lines indented unevenly. 592 text = '''\ 593 def foo(): 594 while 1: 595 return foo 596 ''' 597 expect = '''\ 598def foo(): 599 while 1: 600 return foo 601''' 602 self.assertEqual(expect, dedent(text)) 603 604 # Uneven indentation with a blank line. 605 text = " Foo\n Bar\n\n Baz\n" 606 expect = "Foo\n Bar\n\n Baz\n" 607 self.assertEqual(expect, dedent(text)) 608 609 # Uneven indentation with a whitespace-only line. 610 text = " Foo\n Bar\n \n Baz\n" 611 expect = "Foo\n Bar\n\n Baz\n" 612 self.assertEqual(expect, dedent(text)) 613 614 # dedent() should not mangle internal tabs 615 def test_dedent_preserve_internal_tabs(self): 616 text = " hello\tthere\n how are\tyou?" 617 expect = "hello\tthere\nhow are\tyou?" 618 self.assertEqual(expect, dedent(text)) 619 620 # make sure that it preserves tabs when it's not making any 621 # changes at all 622 self.assertEqual(expect, dedent(expect)) 623 624 # dedent() should not mangle tabs in the margin (i.e. 625 # tabs and spaces both count as margin, but are *not* 626 # considered equivalent) 627 def test_dedent_preserve_margin_tabs(self): 628 text = " hello there\n\thow are you?" 629 self.assertUnchanged(text) 630 631 # same effect even if we have 8 spaces 632 text = " hello there\n\thow are you?" 633 self.assertUnchanged(text) 634 635 # dedent() only removes whitespace that can be uniformly removed! 636 text = "\thello there\n\thow are you?" 637 expect = "hello there\nhow are you?" 638 self.assertEqual(expect, dedent(text)) 639 640 text = " \thello there\n \thow are you?" 641 self.assertEqual(expect, dedent(text)) 642 643 text = " \t hello there\n \t how are you?" 644 self.assertEqual(expect, dedent(text)) 645 646 text = " \thello there\n \t how are you?" 647 expect = "hello there\n how are you?" 648 self.assertEqual(expect, dedent(text)) 649 650 # test margin is smaller than smallest indent 651 text = " \thello there\n \thow are you?\n \tI'm fine, thanks" 652 expect = " \thello there\n \thow are you?\n\tI'm fine, thanks" 653 self.assertEqual(expect, dedent(text)) 654 655 656def test_main(): 657 test_support.run_unittest(WrapTestCase, 658 LongWordTestCase, 659 IndentTestCases, 660 DedentTestCase) 661 662if __name__ == '__main__': 663 test_main() 664