• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import unittest
2from test.test_tools import skip_if_missing, imports_under_tool
3from test import support
4from test.support.hypothesis_helper import hypothesis
5
6st = hypothesis.strategies
7given = hypothesis.given
8example = hypothesis.example
9
10
11skip_if_missing("unicode")
12with imports_under_tool("unicode"):
13    from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup
14
15
16@st.composite
17def char_name_db(draw, min_length=1, max_length=30):
18    m = draw(st.integers(min_value=min_length, max_value=max_length))
19    names = draw(
20        st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m)
21    )
22    characters = draw(st.sets(st.characters(), min_size=m, max_size=m))
23    return list(zip(names, characters))
24
25
26class TestDawg(unittest.TestCase):
27    """Tests for the directed acyclic word graph data structure that is used
28    to store the unicode character names in unicodedata. Tests ported from PyPy
29    """
30
31    def test_dawg_direct_simple(self):
32        dawg = Dawg()
33        dawg.insert("a", -4)
34        dawg.insert("c", -2)
35        dawg.insert("cat", -1)
36        dawg.insert("catarr", 0)
37        dawg.insert("catnip", 1)
38        dawg.insert("zcatnip", 5)
39        packed, data, inverse = dawg.finish()
40
41        self.assertEqual(lookup(packed, data, b"a"), -4)
42        self.assertEqual(lookup(packed, data, b"c"), -2)
43        self.assertEqual(lookup(packed, data, b"cat"), -1)
44        self.assertEqual(lookup(packed, data, b"catarr"), 0)
45        self.assertEqual(lookup(packed, data, b"catnip"), 1)
46        self.assertEqual(lookup(packed, data, b"zcatnip"), 5)
47        self.assertRaises(KeyError, lookup, packed, data, b"b")
48        self.assertRaises(KeyError, lookup, packed, data, b"catni")
49        self.assertRaises(KeyError, lookup, packed, data, b"catnipp")
50
51        self.assertEqual(inverse_lookup(packed, inverse, -4), b"a")
52        self.assertEqual(inverse_lookup(packed, inverse, -2), b"c")
53        self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat")
54        self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr")
55        self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip")
56        self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip")
57        self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12)
58
59    def test_forbid_empty_dawg(self):
60        dawg = Dawg()
61        self.assertRaises(ValueError, dawg.finish)
62
63    @given(char_name_db())
64    @example([("abc", "a"), ("abd", "b")])
65    @example(
66        [
67            ("bab", "1"),
68            ("a", ":"),
69            ("ad", "@"),
70            ("b", "<"),
71            ("aacc", "?"),
72            ("dab", "D"),
73            ("aa", "0"),
74            ("ab", "F"),
75            ("aaa", "7"),
76            ("cbd", "="),
77            ("abad", ";"),
78            ("ac", "B"),
79            ("abb", "4"),
80            ("bb", "2"),
81            ("aab", "9"),
82            ("caaaaba", "E"),
83            ("ca", ">"),
84            ("bbaaa", "5"),
85            ("d", "3"),
86            ("baac", "8"),
87            ("c", "6"),
88            ("ba", "A"),
89        ]
90    )
91    @example(
92        [
93            ("bcdac", "9"),
94            ("acc", "g"),
95            ("d", "d"),
96            ("daabdda", "0"),
97            ("aba", ";"),
98            ("c", "6"),
99            ("aa", "7"),
100            ("abbd", "c"),
101            ("badbd", "?"),
102            ("bbd", "f"),
103            ("cc", "@"),
104            ("bb", "8"),
105            ("daca", ">"),
106            ("ba", ":"),
107            ("baac", "3"),
108            ("dbdddac", "a"),
109            ("a", "2"),
110            ("cabd", "b"),
111            ("b", "="),
112            ("abd", "4"),
113            ("adcbd", "5"),
114            ("abc", "e"),
115            ("ab", "1"),
116        ]
117    )
118    def test_dawg(self, data):
119        # suppress debug prints
120        with support.captured_stdout() as output:
121            # it's enough to build it, building will also check the result
122            build_compression_dawg(data)
123