• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Test character class manipulations.
6 
7 #include <stdio.h>
8 
9 #include "util/test.h"
10 #include "util/utf.h"
11 #include "re2/regexp.h"
12 
13 namespace re2 {
14 
15 struct CCTest {
16   struct {
17     Rune lo;
18     Rune hi;
19   } add[10];
20   int remove;
21   struct {
22     Rune lo;
23     Rune hi;
24   } final[10];
25 };
26 
27 static CCTest tests[] = {
28   { { { 10, 20 }, {-1} }, -1,
29     { { 10, 20 }, {-1} } },
30 
31   { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
32     { { 10, 30 }, {-1} } },
33 
34   { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
35     { { 10, 40 }, {-1} } },
36 
37   { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
38     { { 0, 50 }, {-1} } },
39 
40   { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
41     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
42 
43   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
44     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
45 
46   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
47     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
48 
49   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
50     { { 5, 25 }, {-1} } },
51 
52   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
53     { { 10, 23 }, {-1} } },
54 
55   // These check boundary cases during negation.
56   { { { 0, Runemax }, {-1} }, -1,
57     { { 0, Runemax }, {-1} } },
58 
59   { { { 0, 50 }, {-1} }, -1,
60     { { 0, 50 }, {-1} } },
61 
62   { { { 50, Runemax }, {-1} }, -1,
63     { { 50, Runemax }, {-1} } },
64 
65   // Check RemoveAbove.
66   { { { 50, Runemax }, {-1} }, 255,
67     { { 50, 255 }, {-1} } },
68 
69   { { { 50, Runemax }, {-1} }, 65535,
70     { { 50, 65535 }, {-1} } },
71 
72   { { { 50, Runemax }, {-1} }, Runemax,
73     { { 50, Runemax }, {-1} } },
74 
75   { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
76     { { 50, 60 }, { 250, 255 }, {-1} } },
77 
78   { { { 50, 60 }, {-1} }, 255,
79     { { 50, 60 }, {-1} } },
80 
81   { { { 350, 360 }, {-1} }, 255,
82     { {-1} } },
83 
84   { { {-1} }, 255,
85     { {-1} } },
86 };
87 
88 template<class CharClass>
Broke(const char * desc,const CCTest * t,CharClass * cc)89 static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
90   if (t == NULL) {
91     printf("\t%s:", desc);
92   } else {
93     printf("\n");
94     printf("CharClass added: [%s]", desc);
95     for (int k = 0; t->add[k].lo >= 0; k++)
96       printf(" %d-%d", t->add[k].lo, t->add[k].hi);
97     printf("\n");
98     if (t->remove >= 0)
99       printf("Removed > %d\n", t->remove);
100     printf("\twant:");
101     for (int k = 0; t->final[k].lo >= 0; k++)
102       printf(" %d-%d", t->final[k].lo, t->final[k].hi);
103     printf("\n");
104     printf("\thave:");
105   }
106 
107   for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
108     printf(" %d-%d", it->lo, it->hi);
109   printf("\n");
110 }
111 
ShouldContain(CCTest * t,int x)112 bool ShouldContain(CCTest *t, int x) {
113   for (int j = 0; t->final[j].lo >= 0; j++)
114     if (t->final[j].lo <= x && x <= t->final[j].hi)
115       return true;
116   return false;
117 }
118 
119 // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
120 
Negate(CharClass * cc)121 CharClass* Negate(CharClass *cc) {
122   return cc->Negate();
123 }
124 
Delete(CharClass * cc)125 void Delete(CharClass* cc) {
126   cc->Delete();
127 }
128 
Negate(CharClassBuilder * cc)129 CharClassBuilder* Negate(CharClassBuilder* cc) {
130   CharClassBuilder* ncc = cc->Copy();
131   ncc->Negate();
132   return ncc;
133 }
134 
Delete(CharClassBuilder * cc)135 void Delete(CharClassBuilder* cc) {
136   delete cc;
137 }
138 
139 template<class CharClass>
CorrectCC(CharClass * cc,CCTest * t,const char * desc)140 bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
141   typename CharClass::iterator it = cc->begin();
142   int size = 0;
143   for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
144     if (it == cc->end() ||
145         it->lo != t->final[j].lo ||
146         it->hi != t->final[j].hi) {
147       Broke(desc, t, cc);
148       return false;
149     }
150     size += it->hi - it->lo + 1;
151   }
152   if (it != cc->end()) {
153     Broke(desc, t, cc);
154     return false;
155   }
156   if (cc->size() != size) {
157     Broke(desc, t, cc);
158     printf("wrong size: want %d have %d\n", size, cc->size());
159     return false;
160   }
161 
162   for (int j = 0; j < 101; j++) {
163     if (j == 100)
164       j = Runemax;
165     if (ShouldContain(t, j) != cc->Contains(j)) {
166       Broke(desc, t, cc);
167       printf("want contains(%d)=%d, got %d\n",
168              j, ShouldContain(t, j), cc->Contains(j));
169       return false;
170     }
171   }
172 
173   CharClass* ncc = Negate(cc);
174   for (int j = 0; j < 101; j++) {
175     if (j == 100)
176       j = Runemax;
177     if (ShouldContain(t, j) == ncc->Contains(j)) {
178       Broke(desc, t, cc);
179       Broke("ncc", NULL, ncc);
180       printf("want ncc contains(%d)!=%d, got %d\n",
181              j, ShouldContain(t, j), ncc->Contains(j));
182       Delete(ncc);
183       return false;
184     }
185     if (ncc->size() != Runemax+1 - cc->size()) {
186       Broke(desc, t, cc);
187       Broke("ncc", NULL, ncc);
188       printf("ncc size should be %d is %d\n",
189              Runemax+1 - cc->size(), ncc->size());
190       Delete(ncc);
191       return false;
192     }
193   }
194   Delete(ncc);
195   return true;
196 }
197 
TEST(TestCharClassBuilder,Adds)198 TEST(TestCharClassBuilder, Adds) {
199   int nfail = 0;
200   for (size_t i = 0; i < arraysize(tests); i++) {
201     CharClassBuilder ccb;
202     CCTest* t = &tests[i];
203     for (int j = 0; t->add[j].lo >= 0; j++)
204       ccb.AddRange(t->add[j].lo, t->add[j].hi);
205     if (t->remove >= 0)
206       ccb.RemoveAbove(t->remove);
207     if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
208       nfail++;
209     CharClass* cc = ccb.GetCharClass();
210     if (!CorrectCC(cc, t, "before copy (CharClass)"))
211       nfail++;
212     cc->Delete();
213 
214     CharClassBuilder *ccb1 = ccb.Copy();
215     if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
216       nfail++;
217     cc = ccb.GetCharClass();
218     if (!CorrectCC(cc, t, "after copy (CharClass)"))
219       nfail++;
220     cc->Delete();
221     delete ccb1;
222   }
223   EXPECT_EQ(nfail, 0);
224 }
225 
226 }  // namespace re2
227