1import unittest 2from test.test_tools import skip_if_missing, imports_under_tool 3from test import support 4from test.support.hypothesis_helper import hypothesis 5 6st = hypothesis.strategies 7given = hypothesis.given 8example = hypothesis.example 9 10 11skip_if_missing("unicode") 12with imports_under_tool("unicode"): 13 from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup 14 15 16@st.composite 17def char_name_db(draw, min_length=1, max_length=30): 18 m = draw(st.integers(min_value=min_length, max_value=max_length)) 19 names = draw( 20 st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m) 21 ) 22 characters = draw(st.sets(st.characters(), min_size=m, max_size=m)) 23 return list(zip(names, characters)) 24 25 26class TestDawg(unittest.TestCase): 27 """Tests for the directed acyclic word graph data structure that is used 28 to store the unicode character names in unicodedata. Tests ported from PyPy 29 """ 30 31 def test_dawg_direct_simple(self): 32 dawg = Dawg() 33 dawg.insert("a", -4) 34 dawg.insert("c", -2) 35 dawg.insert("cat", -1) 36 dawg.insert("catarr", 0) 37 dawg.insert("catnip", 1) 38 dawg.insert("zcatnip", 5) 39 packed, data, inverse = dawg.finish() 40 41 self.assertEqual(lookup(packed, data, b"a"), -4) 42 self.assertEqual(lookup(packed, data, b"c"), -2) 43 self.assertEqual(lookup(packed, data, b"cat"), -1) 44 self.assertEqual(lookup(packed, data, b"catarr"), 0) 45 self.assertEqual(lookup(packed, data, b"catnip"), 1) 46 self.assertEqual(lookup(packed, data, b"zcatnip"), 5) 47 self.assertRaises(KeyError, lookup, packed, data, b"b") 48 self.assertRaises(KeyError, lookup, packed, data, b"catni") 49 self.assertRaises(KeyError, lookup, packed, data, b"catnipp") 50 51 self.assertEqual(inverse_lookup(packed, inverse, -4), b"a") 52 self.assertEqual(inverse_lookup(packed, inverse, -2), b"c") 53 self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat") 54 self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr") 55 self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip") 56 self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip") 57 self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12) 58 59 def test_forbid_empty_dawg(self): 60 dawg = Dawg() 61 self.assertRaises(ValueError, dawg.finish) 62 63 @given(char_name_db()) 64 @example([("abc", "a"), ("abd", "b")]) 65 @example( 66 [ 67 ("bab", "1"), 68 ("a", ":"), 69 ("ad", "@"), 70 ("b", "<"), 71 ("aacc", "?"), 72 ("dab", "D"), 73 ("aa", "0"), 74 ("ab", "F"), 75 ("aaa", "7"), 76 ("cbd", "="), 77 ("abad", ";"), 78 ("ac", "B"), 79 ("abb", "4"), 80 ("bb", "2"), 81 ("aab", "9"), 82 ("caaaaba", "E"), 83 ("ca", ">"), 84 ("bbaaa", "5"), 85 ("d", "3"), 86 ("baac", "8"), 87 ("c", "6"), 88 ("ba", "A"), 89 ] 90 ) 91 @example( 92 [ 93 ("bcdac", "9"), 94 ("acc", "g"), 95 ("d", "d"), 96 ("daabdda", "0"), 97 ("aba", ";"), 98 ("c", "6"), 99 ("aa", "7"), 100 ("abbd", "c"), 101 ("badbd", "?"), 102 ("bbd", "f"), 103 ("cc", "@"), 104 ("bb", "8"), 105 ("daca", ">"), 106 ("ba", ":"), 107 ("baac", "3"), 108 ("dbdddac", "a"), 109 ("a", "2"), 110 ("cabd", "b"), 111 ("b", "="), 112 ("abd", "4"), 113 ("adcbd", "5"), 114 ("abc", "e"), 115 ("ab", "1"), 116 ] 117 ) 118 def test_dawg(self, data): 119 # suppress debug prints 120 with support.captured_stdout() as output: 121 # it's enough to build it, building will also check the result 122 build_compression_dawg(data) 123