• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import difflib
2from test.support import findfile
3import unittest
4import doctest
5import sys
6
7
8class TestWithAscii(unittest.TestCase):
9    def test_one_insert(self):
10        sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11        self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12        self.assertEqual(list(sm.get_opcodes()),
13            [   ('insert', 0, 0, 0, 1),
14                ('equal', 0, 100, 1, 101)])
15        self.assertEqual(sm.bpopular, set())
16        sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
17        self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
18        self.assertEqual(list(sm.get_opcodes()),
19            [   ('equal', 0, 50, 0, 50),
20                ('insert', 50, 50, 50, 51),
21                ('equal', 50, 100, 51, 101)])
22        self.assertEqual(sm.bpopular, set())
23
24    def test_one_delete(self):
25        sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
26        self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
27        self.assertEqual(list(sm.get_opcodes()),
28            [   ('equal', 0, 40, 0, 40),
29                ('delete', 40, 41, 40, 40),
30                ('equal', 41, 81, 40, 80)])
31
32    def test_bjunk(self):
33        sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
34                a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
35        self.assertEqual(sm.bjunk, set())
36
37        sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
38                a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
39        self.assertEqual(sm.bjunk, {' '})
40
41        sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
42                a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
43        self.assertEqual(sm.bjunk, {' ', 'b'})
44
45
46class TestAutojunk(unittest.TestCase):
47    """Tests for the autojunk parameter added in 2.7"""
48    def test_one_insert_homogenous_sequence(self):
49        # By default autojunk=True and the heuristic kicks in for a sequence
50        # of length 200+
51        seq1 = 'b' * 200
52        seq2 = 'a' + 'b' * 200
53
54        sm = difflib.SequenceMatcher(None, seq1, seq2)
55        self.assertAlmostEqual(sm.ratio(), 0, places=3)
56        self.assertEqual(sm.bpopular, {'b'})
57
58        # Now turn the heuristic off
59        sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
60        self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
61        self.assertEqual(sm.bpopular, set())
62
63
64class TestSFbugs(unittest.TestCase):
65    def test_ratio_for_null_seqn(self):
66        # Check clearing of SF bug 763023
67        s = difflib.SequenceMatcher(None, [], [])
68        self.assertEqual(s.ratio(), 1)
69        self.assertEqual(s.quick_ratio(), 1)
70        self.assertEqual(s.real_quick_ratio(), 1)
71
72    def test_comparing_empty_lists(self):
73        # Check fix for bug #979794
74        group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
75        self.assertRaises(StopIteration, next, group_gen)
76        diff_gen = difflib.unified_diff([], [])
77        self.assertRaises(StopIteration, next, diff_gen)
78
79    def test_matching_blocks_cache(self):
80        # Issue #21635
81        s = difflib.SequenceMatcher(None, "abxcd", "abcd")
82        first = s.get_matching_blocks()
83        second = s.get_matching_blocks()
84        self.assertEqual(second[0].size, 2)
85        self.assertEqual(second[1].size, 2)
86        self.assertEqual(second[2].size, 0)
87
88    def test_added_tab_hint(self):
89        # Check fix for bug #1488943
90        diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
91        self.assertEqual("- \tI am a buggy", diff[0])
92        self.assertEqual("? \t          --\n", diff[1])
93        self.assertEqual("+ \t\tI am a bug", diff[2])
94        self.assertEqual("? +\n", diff[3])
95
96    def test_hint_indented_properly_with_tabs(self):
97        diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"]))
98        self.assertEqual("- \t \t \t^", diff[0])
99        self.assertEqual("+ \t \t \t^\n", diff[1])
100        self.assertEqual("? \t \t \t +\n", diff[2])
101
102    def test_mdiff_catch_stop_iteration(self):
103        # Issue #33224
104        self.assertEqual(
105            list(difflib._mdiff(["2"], ["3"], 1)),
106            [((1, '\x00-2\x01'), (1, '\x00+3\x01'), True)],
107        )
108
109
110patch914575_from1 = """
111   1. Beautiful is beTTer than ugly.
112   2. Explicit is better than implicit.
113   3. Simple is better than complex.
114   4. Complex is better than complicated.
115"""
116
117patch914575_to1 = """
118   1. Beautiful is better than ugly.
119   3.   Simple is better than complex.
120   4. Complicated is better than complex.
121   5. Flat is better than nested.
122"""
123
124patch914575_nonascii_from1 = """
125   1. Beautiful is beTTer than ugly.
126   2. Explicit is better than ımplıcıt.
127   3. Simple is better than complex.
128   4. Complex is better than complicated.
129"""
130
131patch914575_nonascii_to1 = """
132   1. Beautiful is better than ügly.
133   3.   Sımple is better than complex.
134   4. Complicated is better than cömplex.
135   5. Flat is better than nested.
136"""
137
138patch914575_from2 = """
139\t\tLine 1: preceded by from:[tt] to:[ssss]
140  \t\tLine 2: preceded by from:[sstt] to:[sssst]
141  \t \tLine 3: preceded by from:[sstst] to:[ssssss]
142Line 4:  \thas from:[sst] to:[sss] after :
143Line 5: has from:[t] to:[ss] at end\t
144"""
145
146patch914575_to2 = """
147    Line 1: preceded by from:[tt] to:[ssss]
148    \tLine 2: preceded by from:[sstt] to:[sssst]
149      Line 3: preceded by from:[sstst] to:[ssssss]
150Line 4:   has from:[sst] to:[sss] after :
151Line 5: has from:[t] to:[ss] at end
152"""
153
154patch914575_from3 = """line 0
1551234567890123456789012345689012345
156line 1
157line 2
158line 3
159line 4   changed
160line 5   changed
161line 6   changed
162line 7
163line 8  subtracted
164line 9
1651234567890123456789012345689012345
166short line
167just fits in!!
168just fits in two lines yup!!
169the end"""
170
171patch914575_to3 = """line 0
1721234567890123456789012345689012345
173line 1
174line 2    added
175line 3
176line 4   chanGEd
177line 5a  chanGed
178line 6a  changEd
179line 7
180line 8
181line 9
1821234567890
183another long line that needs to be wrapped
184just fitS in!!
185just fits in two lineS yup!!
186the end"""
187
188class TestSFpatches(unittest.TestCase):
189
190    def test_html_diff(self):
191        # Check SF patch 914575 for generating HTML differences
192        f1a = ((patch914575_from1 + '123\n'*10)*3)
193        t1a = (patch914575_to1 + '123\n'*10)*3
194        f1b = '456\n'*10 + f1a
195        t1b = '456\n'*10 + t1a
196        f1a = f1a.splitlines()
197        t1a = t1a.splitlines()
198        f1b = f1b.splitlines()
199        t1b = t1b.splitlines()
200        f2 = patch914575_from2.splitlines()
201        t2 = patch914575_to2.splitlines()
202        f3 = patch914575_from3
203        t3 = patch914575_to3
204        i = difflib.HtmlDiff()
205        j = difflib.HtmlDiff(tabsize=2)
206        k = difflib.HtmlDiff(wrapcolumn=14)
207
208        full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
209        tables = '\n'.join(
210            [
211             '<h2>Context (first diff within numlines=5(default))</h2>',
212             i.make_table(f1a,t1a,'from','to',context=True),
213             '<h2>Context (first diff after numlines=5(default))</h2>',
214             i.make_table(f1b,t1b,'from','to',context=True),
215             '<h2>Context (numlines=6)</h2>',
216             i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
217             '<h2>Context (numlines=0)</h2>',
218             i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
219             '<h2>Same Context</h2>',
220             i.make_table(f1a,f1a,'from','to',context=True),
221             '<h2>Same Full</h2>',
222             i.make_table(f1a,f1a,'from','to',context=False),
223             '<h2>Empty Context</h2>',
224             i.make_table([],[],'from','to',context=True),
225             '<h2>Empty Full</h2>',
226             i.make_table([],[],'from','to',context=False),
227             '<h2>tabsize=2</h2>',
228             j.make_table(f2,t2),
229             '<h2>tabsize=default</h2>',
230             i.make_table(f2,t2),
231             '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
232             k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
233             '<h2>wrapcolumn=14,splitlines()</h2>',
234             k.make_table(f3.splitlines(),t3.splitlines()),
235             '<h2>wrapcolumn=14,splitlines(True)</h2>',
236             k.make_table(f3.splitlines(True),t3.splitlines(True)),
237             ])
238        actual = full.replace('</body>','\n%s\n</body>' % tables)
239
240        # temporarily uncomment next two lines to baseline this test
241        #with open('test_difflib_expect.html','w') as fp:
242        #    fp.write(actual)
243
244        with open(findfile('test_difflib_expect.html'), encoding="utf-8") as fp:
245            self.assertEqual(actual, fp.read())
246
247    def test_recursion_limit(self):
248        # Check if the problem described in patch #1413711 exists.
249        limit = sys.getrecursionlimit()
250        old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
251        new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
252        difflib.SequenceMatcher(None, old, new).get_opcodes()
253
254    def test_make_file_default_charset(self):
255        html_diff = difflib.HtmlDiff()
256        output = html_diff.make_file(patch914575_from1.splitlines(),
257                                     patch914575_to1.splitlines())
258        self.assertIn('content="text/html; charset=utf-8"', output)
259
260    def test_make_file_iso88591_charset(self):
261        html_diff = difflib.HtmlDiff()
262        output = html_diff.make_file(patch914575_from1.splitlines(),
263                                     patch914575_to1.splitlines(),
264                                     charset='iso-8859-1')
265        self.assertIn('content="text/html; charset=iso-8859-1"', output)
266
267    def test_make_file_usascii_charset_with_nonascii_input(self):
268        html_diff = difflib.HtmlDiff()
269        output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
270                                     patch914575_nonascii_to1.splitlines(),
271                                     charset='us-ascii')
272        self.assertIn('content="text/html; charset=us-ascii"', output)
273        self.assertIn('&#305;mpl&#305;c&#305;t', output)
274
275
276class TestOutputFormat(unittest.TestCase):
277    def test_tab_delimiter(self):
278        args = ['one', 'two', 'Original', 'Current',
279            '2005-01-26 23:30:50', '2010-04-02 10:20:52']
280        ud = difflib.unified_diff(*args, lineterm='')
281        self.assertEqual(list(ud)[0:2], [
282                           "--- Original\t2005-01-26 23:30:50",
283                           "+++ Current\t2010-04-02 10:20:52"])
284        cd = difflib.context_diff(*args, lineterm='')
285        self.assertEqual(list(cd)[0:2], [
286                           "*** Original\t2005-01-26 23:30:50",
287                           "--- Current\t2010-04-02 10:20:52"])
288
289    def test_no_trailing_tab_on_empty_filedate(self):
290        args = ['one', 'two', 'Original', 'Current']
291        ud = difflib.unified_diff(*args, lineterm='')
292        self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
293
294        cd = difflib.context_diff(*args, lineterm='')
295        self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
296
297    def test_range_format_unified(self):
298        # Per the diff spec at http://www.unix.org/single_unix_specification/
299        spec = '''\
300           Each <range> field shall be of the form:
301             %1d", <beginning line number>  if the range contains exactly one line,
302           and:
303            "%1d,%1d", <beginning line number>, <number of lines> otherwise.
304           If a range is empty, its beginning line number shall be the number of
305           the line just before the range, or 0 if the empty range starts the file.
306        '''
307        fmt = difflib._format_range_unified
308        self.assertEqual(fmt(3,3), '3,0')
309        self.assertEqual(fmt(3,4), '4')
310        self.assertEqual(fmt(3,5), '4,2')
311        self.assertEqual(fmt(3,6), '4,3')
312        self.assertEqual(fmt(0,0), '0,0')
313
314    def test_range_format_context(self):
315        # Per the diff spec at http://www.unix.org/single_unix_specification/
316        spec = '''\
317           The range of lines in file1 shall be written in the following format
318           if the range contains two or more lines:
319               "*** %d,%d ****\n", <beginning line number>, <ending line number>
320           and the following format otherwise:
321               "*** %d ****\n", <ending line number>
322           The ending line number of an empty range shall be the number of the preceding line,
323           or 0 if the range is at the start of the file.
324
325           Next, the range of lines in file2 shall be written in the following format
326           if the range contains two or more lines:
327               "--- %d,%d ----\n", <beginning line number>, <ending line number>
328           and the following format otherwise:
329               "--- %d ----\n", <ending line number>
330        '''
331        fmt = difflib._format_range_context
332        self.assertEqual(fmt(3,3), '3')
333        self.assertEqual(fmt(3,4), '4')
334        self.assertEqual(fmt(3,5), '4,5')
335        self.assertEqual(fmt(3,6), '4,6')
336        self.assertEqual(fmt(0,0), '0')
337
338
339class TestBytes(unittest.TestCase):
340    # don't really care about the content of the output, just the fact
341    # that it's bytes and we don't crash
342    def check(self, diff):
343        diff = list(diff)   # trigger exceptions first
344        for line in diff:
345            self.assertIsInstance(
346                line, bytes,
347                "all lines of diff should be bytes, but got: %r" % line)
348
349    def test_byte_content(self):
350        # if we receive byte strings, we return byte strings
351        a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
352        b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
353
354        unified = difflib.unified_diff
355        context = difflib.context_diff
356
357        check = self.check
358        check(difflib.diff_bytes(unified, a, a))
359        check(difflib.diff_bytes(unified, a, b))
360
361        # now with filenames (content and filenames are all bytes!)
362        check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
363        check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
364
365        # and with filenames and dates
366        check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
367        check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
368
369        # same all over again, with context diff
370        check(difflib.diff_bytes(context, a, a))
371        check(difflib.diff_bytes(context, a, b))
372        check(difflib.diff_bytes(context, a, a, b'a', b'a'))
373        check(difflib.diff_bytes(context, a, b, b'a', b'b'))
374        check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
375        check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
376
377    def test_byte_filenames(self):
378        # somebody renamed a file from ISO-8859-2 to UTF-8
379        fna = b'\xb3odz.txt'    # "łodz.txt"
380        fnb = b'\xc5\x82odz.txt'
381
382        # they transcoded the content at the same time
383        a = [b'\xa3odz is a city in Poland.']
384        b = [b'\xc5\x81odz is a city in Poland.']
385
386        check = self.check
387        unified = difflib.unified_diff
388        context = difflib.context_diff
389        check(difflib.diff_bytes(unified, a, b, fna, fnb))
390        check(difflib.diff_bytes(context, a, b, fna, fnb))
391
392        def assertDiff(expect, actual):
393            # do not compare expect and equal as lists, because unittest
394            # uses difflib to report difference between lists
395            actual = list(actual)
396            self.assertEqual(len(expect), len(actual))
397            for e, a in zip(expect, actual):
398                self.assertEqual(e, a)
399
400        expect = [
401            b'--- \xb3odz.txt',
402            b'+++ \xc5\x82odz.txt',
403            b'@@ -1 +1 @@',
404            b'-\xa3odz is a city in Poland.',
405            b'+\xc5\x81odz is a city in Poland.',
406        ]
407        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
408        assertDiff(expect, actual)
409
410        # with dates (plain ASCII)
411        datea = b'2005-03-18'
412        dateb = b'2005-03-19'
413        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
414        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
415
416        expect = [
417            # note the mixed encodings here: this is deeply wrong by every
418            # tenet of Unicode, but it doesn't crash, it's parseable by
419            # patch, and it's how UNIX(tm) diff behaves
420            b'--- \xb3odz.txt\t2005-03-18',
421            b'+++ \xc5\x82odz.txt\t2005-03-19',
422            b'@@ -1 +1 @@',
423            b'-\xa3odz is a city in Poland.',
424            b'+\xc5\x81odz is a city in Poland.',
425        ]
426        actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
427                                    lineterm=b'')
428        assertDiff(expect, actual)
429
430    def test_mixed_types_content(self):
431        # type of input content must be consistent: all str or all bytes
432        a = [b'hello']
433        b = ['hello']
434
435        unified = difflib.unified_diff
436        context = difflib.context_diff
437
438        expect = "lines to compare must be str, not bytes (b'hello')"
439        self._assert_type_error(expect, unified, a, b)
440        self._assert_type_error(expect, unified, b, a)
441        self._assert_type_error(expect, context, a, b)
442        self._assert_type_error(expect, context, b, a)
443
444        expect = "all arguments must be bytes, not str ('hello')"
445        self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
446        self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
447        self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
448        self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
449
450    def test_mixed_types_filenames(self):
451        # cannot pass filenames as bytes if content is str (this may not be
452        # the right behaviour, but at least the test demonstrates how
453        # things work)
454        a = ['hello\n']
455        b = ['ohell\n']
456        fna = b'ol\xe9.txt'     # filename transcoded from ISO-8859-1
457        fnb = b'ol\xc3a9.txt'   # to UTF-8
458        self._assert_type_error(
459            "all arguments must be str, not: b'ol\\xe9.txt'",
460            difflib.unified_diff, a, b, fna, fnb)
461
462    def test_mixed_types_dates(self):
463        # type of dates must be consistent with type of contents
464        a = [b'foo\n']
465        b = [b'bar\n']
466        datea = '1 fév'
467        dateb = '3 fév'
468        self._assert_type_error(
469            "all arguments must be bytes, not str ('1 fév')",
470            difflib.diff_bytes, difflib.unified_diff,
471            a, b, b'a', b'b', datea, dateb)
472
473        # if input is str, non-ASCII dates are fine
474        a = ['foo\n']
475        b = ['bar\n']
476        list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
477
478    def _assert_type_error(self, msg, generator, *args):
479        with self.assertRaises(TypeError) as ctx:
480            list(generator(*args))
481        self.assertEqual(msg, str(ctx.exception))
482
483class TestJunkAPIs(unittest.TestCase):
484    def test_is_line_junk_true(self):
485        for line in ['#', '  ', ' #', '# ', ' # ', '']:
486            self.assertTrue(difflib.IS_LINE_JUNK(line), repr(line))
487
488    def test_is_line_junk_false(self):
489        for line in ['##', ' ##', '## ', 'abc ', 'abc #', 'Mr. Moose is up!']:
490            self.assertFalse(difflib.IS_LINE_JUNK(line), repr(line))
491
492    def test_is_line_junk_REDOS(self):
493        evil_input = ('\t' * 1000000) + '##'
494        self.assertFalse(difflib.IS_LINE_JUNK(evil_input))
495
496    def test_is_character_junk_true(self):
497        for char in [' ', '\t']:
498            self.assertTrue(difflib.IS_CHARACTER_JUNK(char), repr(char))
499
500    def test_is_character_junk_false(self):
501        for char in ['a', '#', '\n', '\f', '\r', '\v']:
502            self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
503
504class TestFindLongest(unittest.TestCase):
505    def longer_match_exists(self, a, b, n):
506        return any(b_part in a for b_part in
507                   [b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
508
509    def test_default_args(self):
510        a = 'foo bar'
511        b = 'foo baz bar'
512        sm = difflib.SequenceMatcher(a=a, b=b)
513        match = sm.find_longest_match()
514        self.assertEqual(match.a, 0)
515        self.assertEqual(match.b, 0)
516        self.assertEqual(match.size, 6)
517        self.assertEqual(a[match.a: match.a + match.size],
518                         b[match.b: match.b + match.size])
519        self.assertFalse(self.longer_match_exists(a, b, match.size))
520
521        match = sm.find_longest_match(alo=2, blo=4)
522        self.assertEqual(match.a, 3)
523        self.assertEqual(match.b, 7)
524        self.assertEqual(match.size, 4)
525        self.assertEqual(a[match.a: match.a + match.size],
526                         b[match.b: match.b + match.size])
527        self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
528
529        match = sm.find_longest_match(bhi=5, blo=1)
530        self.assertEqual(match.a, 1)
531        self.assertEqual(match.b, 1)
532        self.assertEqual(match.size, 4)
533        self.assertEqual(a[match.a: match.a + match.size],
534                         b[match.b: match.b + match.size])
535        self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
536
537    def test_longest_match_with_popular_chars(self):
538        a = 'dabcd'
539        b = 'd'*100 + 'abc' + 'd'*100  # length over 200 so popular used
540        sm = difflib.SequenceMatcher(a=a, b=b)
541        match = sm.find_longest_match(0, len(a), 0, len(b))
542        self.assertEqual(match.a, 0)
543        self.assertEqual(match.b, 99)
544        self.assertEqual(match.size, 5)
545        self.assertEqual(a[match.a: match.a + match.size],
546                         b[match.b: match.b + match.size])
547        self.assertFalse(self.longer_match_exists(a, b, match.size))
548
549
550def setUpModule():
551    difflib.HtmlDiff._default_prefix = 0
552
553
554def load_tests(loader, tests, pattern):
555    tests.addTest(doctest.DocTestSuite(difflib))
556    return tests
557
558
559if __name__ == '__main__':
560    unittest.main()
561