1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Test character class manipulations.
6
7 #include <stdio.h>
8
9 #include "util/test.h"
10 #include "util/utf.h"
11 #include "re2/regexp.h"
12
13 namespace re2 {
14
15 struct CCTest {
16 struct {
17 Rune lo;
18 Rune hi;
19 } add[10];
20 int remove;
21 struct {
22 Rune lo;
23 Rune hi;
24 } final[10];
25 };
26
27 static CCTest tests[] = {
28 { { { 10, 20 }, {-1} }, -1,
29 { { 10, 20 }, {-1} } },
30
31 { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
32 { { 10, 30 }, {-1} } },
33
34 { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
35 { { 10, 40 }, {-1} } },
36
37 { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
38 { { 0, 50 }, {-1} } },
39
40 { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
41 { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
42
43 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
44 { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
45
46 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
47 { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
48
49 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
50 { { 5, 25 }, {-1} } },
51
52 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
53 { { 10, 23 }, {-1} } },
54
55 // These check boundary cases during negation.
56 { { { 0, Runemax }, {-1} }, -1,
57 { { 0, Runemax }, {-1} } },
58
59 { { { 0, 50 }, {-1} }, -1,
60 { { 0, 50 }, {-1} } },
61
62 { { { 50, Runemax }, {-1} }, -1,
63 { { 50, Runemax }, {-1} } },
64
65 // Check RemoveAbove.
66 { { { 50, Runemax }, {-1} }, 255,
67 { { 50, 255 }, {-1} } },
68
69 { { { 50, Runemax }, {-1} }, 65535,
70 { { 50, 65535 }, {-1} } },
71
72 { { { 50, Runemax }, {-1} }, Runemax,
73 { { 50, Runemax }, {-1} } },
74
75 { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
76 { { 50, 60 }, { 250, 255 }, {-1} } },
77
78 { { { 50, 60 }, {-1} }, 255,
79 { { 50, 60 }, {-1} } },
80
81 { { { 350, 360 }, {-1} }, 255,
82 { {-1} } },
83
84 { { {-1} }, 255,
85 { {-1} } },
86 };
87
88 template<class CharClass>
Broke(const char * desc,const CCTest * t,CharClass * cc)89 static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
90 if (t == NULL) {
91 printf("\t%s:", desc);
92 } else {
93 printf("\n");
94 printf("CharClass added: [%s]", desc);
95 for (int k = 0; t->add[k].lo >= 0; k++)
96 printf(" %d-%d", t->add[k].lo, t->add[k].hi);
97 printf("\n");
98 if (t->remove >= 0)
99 printf("Removed > %d\n", t->remove);
100 printf("\twant:");
101 for (int k = 0; t->final[k].lo >= 0; k++)
102 printf(" %d-%d", t->final[k].lo, t->final[k].hi);
103 printf("\n");
104 printf("\thave:");
105 }
106
107 for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
108 printf(" %d-%d", it->lo, it->hi);
109 printf("\n");
110 }
111
ShouldContain(CCTest * t,int x)112 bool ShouldContain(CCTest *t, int x) {
113 for (int j = 0; t->final[j].lo >= 0; j++)
114 if (t->final[j].lo <= x && x <= t->final[j].hi)
115 return true;
116 return false;
117 }
118
119 // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
120
Negate(CharClass * cc)121 CharClass* Negate(CharClass *cc) {
122 return cc->Negate();
123 }
124
Delete(CharClass * cc)125 void Delete(CharClass* cc) {
126 cc->Delete();
127 }
128
Negate(CharClassBuilder * cc)129 CharClassBuilder* Negate(CharClassBuilder* cc) {
130 CharClassBuilder* ncc = cc->Copy();
131 ncc->Negate();
132 return ncc;
133 }
134
Delete(CharClassBuilder * cc)135 void Delete(CharClassBuilder* cc) {
136 delete cc;
137 }
138
139 template<class CharClass>
CorrectCC(CharClass * cc,CCTest * t,const char * desc)140 bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
141 typename CharClass::iterator it = cc->begin();
142 int size = 0;
143 for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
144 if (it == cc->end() ||
145 it->lo != t->final[j].lo ||
146 it->hi != t->final[j].hi) {
147 Broke(desc, t, cc);
148 return false;
149 }
150 size += it->hi - it->lo + 1;
151 }
152 if (it != cc->end()) {
153 Broke(desc, t, cc);
154 return false;
155 }
156 if (cc->size() != size) {
157 Broke(desc, t, cc);
158 printf("wrong size: want %d have %d\n", size, cc->size());
159 return false;
160 }
161
162 for (int j = 0; j < 101; j++) {
163 if (j == 100)
164 j = Runemax;
165 if (ShouldContain(t, j) != cc->Contains(j)) {
166 Broke(desc, t, cc);
167 printf("want contains(%d)=%d, got %d\n",
168 j, ShouldContain(t, j), cc->Contains(j));
169 return false;
170 }
171 }
172
173 CharClass* ncc = Negate(cc);
174 for (int j = 0; j < 101; j++) {
175 if (j == 100)
176 j = Runemax;
177 if (ShouldContain(t, j) == ncc->Contains(j)) {
178 Broke(desc, t, cc);
179 Broke("ncc", NULL, ncc);
180 printf("want ncc contains(%d)!=%d, got %d\n",
181 j, ShouldContain(t, j), ncc->Contains(j));
182 Delete(ncc);
183 return false;
184 }
185 if (ncc->size() != Runemax+1 - cc->size()) {
186 Broke(desc, t, cc);
187 Broke("ncc", NULL, ncc);
188 printf("ncc size should be %d is %d\n",
189 Runemax+1 - cc->size(), ncc->size());
190 Delete(ncc);
191 return false;
192 }
193 }
194 Delete(ncc);
195 return true;
196 }
197
TEST(TestCharClassBuilder,Adds)198 TEST(TestCharClassBuilder, Adds) {
199 int nfail = 0;
200 for (size_t i = 0; i < arraysize(tests); i++) {
201 CharClassBuilder ccb;
202 CCTest* t = &tests[i];
203 for (int j = 0; t->add[j].lo >= 0; j++)
204 ccb.AddRange(t->add[j].lo, t->add[j].hi);
205 if (t->remove >= 0)
206 ccb.RemoveAbove(t->remove);
207 if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
208 nfail++;
209 CharClass* cc = ccb.GetCharClass();
210 if (!CorrectCC(cc, t, "before copy (CharClass)"))
211 nfail++;
212 cc->Delete();
213
214 CharClassBuilder *ccb1 = ccb.Copy();
215 if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
216 nfail++;
217 cc = ccb.GetCharClass();
218 if (!CorrectCC(cc, t, "after copy (CharClass)"))
219 nfail++;
220 cc->Delete();
221 delete ccb1;
222 }
223 EXPECT_EQ(nfail, 0);
224 }
225
226 } // namespace re2
227