• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 
29 #include <stdlib.h>
30 
31 #include "v8.h"
32 
33 #include "string-stream.h"
34 #include "cctest.h"
35 #include "zone-inl.h"
36 #include "parser.h"
37 #include "ast.h"
38 #include "jsregexp.h"
39 #include "regexp-macro-assembler.h"
40 #include "regexp-macro-assembler-irregexp.h"
41 #ifdef V8_NATIVE_REGEXP
42 #ifdef V8_TARGET_ARCH_ARM
43 #include "arm/macro-assembler-arm.h"
44 #include "arm/regexp-macro-assembler-arm.h"
45 #endif
46 #ifdef V8_TARGET_ARCH_X64
47 #include "x64/macro-assembler-x64.h"
48 #include "x64/regexp-macro-assembler-x64.h"
49 #endif
50 #ifdef V8_TARGET_ARCH_IA32
51 #include "ia32/macro-assembler-ia32.h"
52 #include "ia32/regexp-macro-assembler-ia32.h"
53 #endif
54 #else
55 #include "interpreter-irregexp.h"
56 #endif
57 
58 using namespace v8::internal;
59 
60 
Parse(const char * input)61 static SmartPointer<const char> Parse(const char* input) {
62   V8::Initialize(NULL);
63   v8::HandleScope scope;
64   ZoneScope zone_scope(DELETE_ON_EXIT);
65   FlatStringReader reader(CStrVector(input));
66   RegExpCompileData result;
67   CHECK(v8::internal::ParseRegExp(&reader, false, &result));
68   CHECK(result.tree != NULL);
69   CHECK(result.error.is_null());
70   SmartPointer<const char> output = result.tree->ToString();
71   return output;
72 }
73 
CheckSimple(const char * input)74 static bool CheckSimple(const char* input) {
75   V8::Initialize(NULL);
76   v8::HandleScope scope;
77   unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
78   ZoneScope zone_scope(DELETE_ON_EXIT);
79   FlatStringReader reader(CStrVector(input));
80   RegExpCompileData result;
81   CHECK(v8::internal::ParseRegExp(&reader, false, &result));
82   CHECK(result.tree != NULL);
83   CHECK(result.error.is_null());
84   return result.simple;
85 }
86 
87 struct MinMaxPair {
88   int min_match;
89   int max_match;
90 };
91 
CheckMinMaxMatch(const char * input)92 static MinMaxPair CheckMinMaxMatch(const char* input) {
93   V8::Initialize(NULL);
94   v8::HandleScope scope;
95   unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
96   ZoneScope zone_scope(DELETE_ON_EXIT);
97   FlatStringReader reader(CStrVector(input));
98   RegExpCompileData result;
99   CHECK(v8::internal::ParseRegExp(&reader, false, &result));
100   CHECK(result.tree != NULL);
101   CHECK(result.error.is_null());
102   int min_match = result.tree->min_match();
103   int max_match = result.tree->max_match();
104   MinMaxPair pair = { min_match, max_match };
105   return pair;
106 }
107 
108 
109 
110 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
111 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
112 #define CHECK_MIN_MAX(input, min, max)                                         \
113   { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
114     CHECK_EQ(min, min_max.min_match);                                          \
115     CHECK_EQ(max, min_max.max_match);                                          \
116   }
117 
TEST(Parser)118 TEST(Parser) {
119   V8::Initialize(NULL);
120   CHECK_PARSE_EQ("abc", "'abc'");
121   CHECK_PARSE_EQ("", "%");
122   CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
123   CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
124   CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
125   CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
126   CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
127   CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
128   CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
129   CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
130   CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
131   CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
132   CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
133   CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
134   CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
135   CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
136   CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
137   CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
138   CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
139   CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
140   CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
141   CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
142   CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
143   CHECK_PARSE_EQ("(?:foo)", "'foo'");
144   CHECK_PARSE_EQ("(?: foo )", "' foo '");
145   CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
146   CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
147   CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
148   CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
149   CHECK_PARSE_EQ("()", "(^ %)");
150   CHECK_PARSE_EQ("(?=)", "(-> + %)");
151   CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]");   // Doesn't compile on windows
152   CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]");   // \uffff isn't in codepage 1252
153   CHECK_PARSE_EQ("[x]", "[x]");
154   CHECK_PARSE_EQ("[xyz]", "[x y z]");
155   CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
156   CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
157   CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
158   CHECK_PARSE_EQ("]", "']'");
159   CHECK_PARSE_EQ("}", "'}'");
160   CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
161   CHECK_PARSE_EQ("[\\d]", "[0-9]");
162   CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
163   CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
164   CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
165   CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
166   CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
167                  "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
168   CHECK_PARSE_EQ("\\c!", "'c!'");
169   CHECK_PARSE_EQ("\\c_", "'c_'");
170   CHECK_PARSE_EQ("\\c~", "'c~'");
171   CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
172   CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
173   CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
174   CHECK_PARSE_EQ("\\0", "'\\x00'");
175   CHECK_PARSE_EQ("\\8", "'8'");
176   CHECK_PARSE_EQ("\\9", "'9'");
177   CHECK_PARSE_EQ("\\11", "'\\x09'");
178   CHECK_PARSE_EQ("\\11a", "'\\x09a'");
179   CHECK_PARSE_EQ("\\011", "'\\x09'");
180   CHECK_PARSE_EQ("\\00011", "'\\x0011'");
181   CHECK_PARSE_EQ("\\118", "'\\x098'");
182   CHECK_PARSE_EQ("\\111", "'I'");
183   CHECK_PARSE_EQ("\\1111", "'I1'");
184   CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
185   CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
186   CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
187   CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
188   CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
189                                " (# 0 - g (<- 1)))");
190   CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
191                                " (# 0 - g (<- 2)))");
192   CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
193                                " (# 0 - g (<- 3)))");
194   CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
195                                " (# 0 - g '\\x04'))");
196   CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
197               "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
198               " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
199   CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
200               "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
201               " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
202   CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
203   CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
204   CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
205   CHECK_PARSE_EQ("(?=a)?a", "'a'");
206   CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
207   CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
208   CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
209   CHECK_PARSE_EQ("(?!a)?a", "'a'");
210   CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
211   CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
212   CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
213   CHECK_PARSE_EQ("[\\0]", "[\\x00]");
214   CHECK_PARSE_EQ("[\\11]", "[\\x09]");
215   CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
216   CHECK_PARSE_EQ("[\\011]", "[\\x09]");
217   CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
218   CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
219   CHECK_PARSE_EQ("[\\111]", "[I]");
220   CHECK_PARSE_EQ("[\\1111]", "[I 1]");
221   CHECK_PARSE_EQ("\\x34", "'\x34'");
222   CHECK_PARSE_EQ("\\x60", "'\x60'");
223   CHECK_PARSE_EQ("\\x3z", "'x3z'");
224   CHECK_PARSE_EQ("\\c", "'c'");
225   CHECK_PARSE_EQ("\\u0034", "'\x34'");
226   CHECK_PARSE_EQ("\\u003z", "'u003z'");
227   CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
228 
229   CHECK_SIMPLE("a", true);
230   CHECK_SIMPLE("a|b", false);
231   CHECK_SIMPLE("a\\n", false);
232   CHECK_SIMPLE("^a", false);
233   CHECK_SIMPLE("a$", false);
234   CHECK_SIMPLE("a\\b!", false);
235   CHECK_SIMPLE("a\\Bb", false);
236   CHECK_SIMPLE("a*", false);
237   CHECK_SIMPLE("a*?", false);
238   CHECK_SIMPLE("a?", false);
239   CHECK_SIMPLE("a??", false);
240   CHECK_SIMPLE("a{0,1}?", false);
241   CHECK_SIMPLE("a{1,1}?", false);
242   CHECK_SIMPLE("a{1,2}?", false);
243   CHECK_SIMPLE("a+?", false);
244   CHECK_SIMPLE("(a)", false);
245   CHECK_SIMPLE("(a)\\1", false);
246   CHECK_SIMPLE("(\\1a)", false);
247   CHECK_SIMPLE("\\1(a)", false);
248   CHECK_SIMPLE("a\\s", false);
249   CHECK_SIMPLE("a\\S", false);
250   CHECK_SIMPLE("a\\d", false);
251   CHECK_SIMPLE("a\\D", false);
252   CHECK_SIMPLE("a\\w", false);
253   CHECK_SIMPLE("a\\W", false);
254   CHECK_SIMPLE("a.", false);
255   CHECK_SIMPLE("a\\q", false);
256   CHECK_SIMPLE("a[a]", false);
257   CHECK_SIMPLE("a[^a]", false);
258   CHECK_SIMPLE("a[a-z]", false);
259   CHECK_SIMPLE("a[\\q]", false);
260   CHECK_SIMPLE("a(?:b)", false);
261   CHECK_SIMPLE("a(?=b)", false);
262   CHECK_SIMPLE("a(?!b)", false);
263   CHECK_SIMPLE("\\x60", false);
264   CHECK_SIMPLE("\\u0060", false);
265   CHECK_SIMPLE("\\cA", false);
266   CHECK_SIMPLE("\\q", false);
267   CHECK_SIMPLE("\\1112", false);
268   CHECK_SIMPLE("\\0", false);
269   CHECK_SIMPLE("(a)\\1", false);
270   CHECK_SIMPLE("(?=a)?a", false);
271   CHECK_SIMPLE("(?!a)?a\\1", false);
272   CHECK_SIMPLE("(?:(?=a))a\\1", false);
273 
274   CHECK_PARSE_EQ("a{}", "'a{}'");
275   CHECK_PARSE_EQ("a{,}", "'a{,}'");
276   CHECK_PARSE_EQ("a{", "'a{'");
277   CHECK_PARSE_EQ("a{z}", "'a{z}'");
278   CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
279   CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
280   CHECK_PARSE_EQ("a{12,", "'a{12,'");
281   CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
282   CHECK_PARSE_EQ("{}", "'{}'");
283   CHECK_PARSE_EQ("{,}", "'{,}'");
284   CHECK_PARSE_EQ("{", "'{'");
285   CHECK_PARSE_EQ("{z}", "'{z}'");
286   CHECK_PARSE_EQ("{1z}", "'{1z}'");
287   CHECK_PARSE_EQ("{12z}", "'{12z}'");
288   CHECK_PARSE_EQ("{12,", "'{12,'");
289   CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
290 
291   CHECK_MIN_MAX("a", 1, 1);
292   CHECK_MIN_MAX("abc", 3, 3);
293   CHECK_MIN_MAX("a[bc]d", 3, 3);
294   CHECK_MIN_MAX("a|bc", 1, 2);
295   CHECK_MIN_MAX("ab|c", 1, 2);
296   CHECK_MIN_MAX("a||bc", 0, 2);
297   CHECK_MIN_MAX("|", 0, 0);
298   CHECK_MIN_MAX("(?:ab)", 2, 2);
299   CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
300   CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
301   CHECK_MIN_MAX("(ab)", 2, 2);
302   CHECK_MIN_MAX("(ab|cde)", 2, 3);
303   CHECK_MIN_MAX("(ab)\\1", 2, 4);
304   CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
305   CHECK_MIN_MAX("(?:ab)?", 0, 2);
306   CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
307   CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
308   CHECK_MIN_MAX("a?", 0, 1);
309   CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
310   CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
311   CHECK_MIN_MAX("a??", 0, 1);
312   CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
313   CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
314   CHECK_MIN_MAX("(?:a?)?", 0, 1);
315   CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
316   CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
317   CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
318   CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
319   CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
320   CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
321   CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
322   CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
323   CHECK_MIN_MAX("a{0}", 0, 0);
324   CHECK_MIN_MAX("(?:a+){0}", 0, 0);
325   CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
326   CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
327   CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
328   CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
329   CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
330   CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
331   CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
332   CHECK_MIN_MAX("a\\bc", 2, 2);
333   CHECK_MIN_MAX("a\\Bc", 2, 2);
334   CHECK_MIN_MAX("a\\sc", 3, 3);
335   CHECK_MIN_MAX("a\\Sc", 3, 3);
336   CHECK_MIN_MAX("a(?=b)c", 2, 2);
337   CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
338   CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
339 }
340 
TEST(ParserRegression)341 TEST(ParserRegression) {
342   CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
343   CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
344   CHECK_PARSE_EQ("{", "'{'");
345   CHECK_PARSE_EQ("a|", "(| 'a' %)");
346 }
347 
ExpectError(const char * input,const char * expected)348 static void ExpectError(const char* input,
349                         const char* expected) {
350   V8::Initialize(NULL);
351   v8::HandleScope scope;
352   ZoneScope zone_scope(DELETE_ON_EXIT);
353   FlatStringReader reader(CStrVector(input));
354   RegExpCompileData result;
355   CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
356   CHECK(result.tree == NULL);
357   CHECK(!result.error.is_null());
358   SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
359   CHECK_EQ(expected, *str);
360 }
361 
362 
TEST(Errors)363 TEST(Errors) {
364   V8::Initialize(NULL);
365   const char* kEndBackslash = "\\ at end of pattern";
366   ExpectError("\\", kEndBackslash);
367   const char* kUnterminatedGroup = "Unterminated group";
368   ExpectError("(foo", kUnterminatedGroup);
369   const char* kInvalidGroup = "Invalid group";
370   ExpectError("(?", kInvalidGroup);
371   const char* kUnterminatedCharacterClass = "Unterminated character class";
372   ExpectError("[", kUnterminatedCharacterClass);
373   ExpectError("[a-", kUnterminatedCharacterClass);
374   const char* kNothingToRepeat = "Nothing to repeat";
375   ExpectError("*", kNothingToRepeat);
376   ExpectError("?", kNothingToRepeat);
377   ExpectError("+", kNothingToRepeat);
378   ExpectError("{1}", kNothingToRepeat);
379   ExpectError("{1,2}", kNothingToRepeat);
380   ExpectError("{1,}", kNothingToRepeat);
381 
382   // Check that we don't allow more than kMaxCapture captures
383   const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
384   const char* kTooManyCaptures = "Too many captures";
385   HeapStringAllocator allocator;
386   StringStream accumulator(&allocator);
387   for (int i = 0; i <= kMaxCaptures; i++) {
388     accumulator.Add("()");
389   }
390   SmartPointer<const char> many_captures(accumulator.ToCString());
391   ExpectError(*many_captures, kTooManyCaptures);
392 }
393 
394 
IsDigit(uc16 c)395 static bool IsDigit(uc16 c) {
396   return ('0' <= c && c <= '9');
397 }
398 
399 
NotDigit(uc16 c)400 static bool NotDigit(uc16 c) {
401   return !IsDigit(c);
402 }
403 
404 
IsWhiteSpace(uc16 c)405 static bool IsWhiteSpace(uc16 c) {
406   switch (c) {
407     case 0x09:
408     case 0x0A:
409     case 0x0B:
410     case 0x0C:
411     case 0x0d:
412     case 0x20:
413     case 0xA0:
414     case 0x2028:
415     case 0x2029:
416       return true;
417     default:
418       return unibrow::Space::Is(c);
419   }
420 }
421 
422 
NotWhiteSpace(uc16 c)423 static bool NotWhiteSpace(uc16 c) {
424   return !IsWhiteSpace(c);
425 }
426 
427 
NotWord(uc16 c)428 static bool NotWord(uc16 c) {
429   return !IsRegExpWord(c);
430 }
431 
432 
TestCharacterClassEscapes(uc16 c,bool (pred)(uc16 c))433 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
434   ZoneScope scope(DELETE_ON_EXIT);
435   ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
436   CharacterRange::AddClassEscape(c, ranges);
437   for (unsigned i = 0; i < (1 << 16); i++) {
438     bool in_class = false;
439     for (int j = 0; !in_class && j < ranges->length(); j++) {
440       CharacterRange& range = ranges->at(j);
441       in_class = (range.from() <= i && i <= range.to());
442     }
443     CHECK_EQ(pred(i), in_class);
444   }
445 }
446 
447 
TEST(CharacterClassEscapes)448 TEST(CharacterClassEscapes) {
449   TestCharacterClassEscapes('.', IsRegExpNewline);
450   TestCharacterClassEscapes('d', IsDigit);
451   TestCharacterClassEscapes('D', NotDigit);
452   TestCharacterClassEscapes('s', IsWhiteSpace);
453   TestCharacterClassEscapes('S', NotWhiteSpace);
454   TestCharacterClassEscapes('w', IsRegExpWord);
455   TestCharacterClassEscapes('W', NotWord);
456 }
457 
458 
Compile(const char * input,bool multiline,bool is_ascii)459 static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
460   V8::Initialize(NULL);
461   FlatStringReader reader(CStrVector(input));
462   RegExpCompileData compile_data;
463   if (!v8::internal::ParseRegExp(&reader, multiline, &compile_data))
464     return NULL;
465   Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
466   RegExpEngine::Compile(&compile_data, false, multiline, pattern, is_ascii);
467   return compile_data.node;
468 }
469 
470 
Execute(const char * input,bool multiline,bool is_ascii,bool dot_output=false)471 static void Execute(const char* input,
472                     bool multiline,
473                     bool is_ascii,
474                     bool dot_output = false) {
475   v8::HandleScope scope;
476   ZoneScope zone_scope(DELETE_ON_EXIT);
477   RegExpNode* node = Compile(input, multiline, is_ascii);
478   USE(node);
479 #ifdef DEBUG
480   if (dot_output) {
481     RegExpEngine::DotPrint(input, node, false);
482     exit(0);
483   }
484 #endif  // DEBUG
485 }
486 
487 
488 class TestConfig {
489  public:
490   typedef int Key;
491   typedef int Value;
492   static const int kNoKey;
493   static const int kNoValue;
Compare(int a,int b)494   static inline int Compare(int a, int b) {
495     if (a < b)
496       return -1;
497     else if (a > b)
498       return 1;
499     else
500       return 0;
501   }
502 };
503 
504 
505 const int TestConfig::kNoKey = 0;
506 const int TestConfig::kNoValue = 0;
507 
508 
PseudoRandom(int i,int j)509 static unsigned PseudoRandom(int i, int j) {
510   return ~(~((i * 781) ^ (j * 329)));
511 }
512 
513 
TEST(SplayTreeSimple)514 TEST(SplayTreeSimple) {
515   static const unsigned kLimit = 1000;
516   ZoneScope zone_scope(DELETE_ON_EXIT);
517   ZoneSplayTree<TestConfig> tree;
518   bool seen[kLimit];
519   for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
520 #define CHECK_MAPS_EQUAL() do {                                      \
521     for (unsigned k = 0; k < kLimit; k++)                            \
522       CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
523   } while (false)
524   for (int i = 0; i < 50; i++) {
525     for (int j = 0; j < 50; j++) {
526       unsigned next = PseudoRandom(i, j) % kLimit;
527       if (seen[next]) {
528         // We've already seen this one.  Check the value and remove
529         // it.
530         ZoneSplayTree<TestConfig>::Locator loc;
531         CHECK(tree.Find(next, &loc));
532         CHECK_EQ(next, loc.key());
533         CHECK_EQ(3 * next, loc.value());
534         tree.Remove(next);
535         seen[next] = false;
536         CHECK_MAPS_EQUAL();
537       } else {
538         // Check that it wasn't there already and then add it.
539         ZoneSplayTree<TestConfig>::Locator loc;
540         CHECK(!tree.Find(next, &loc));
541         CHECK(tree.Insert(next, &loc));
542         CHECK_EQ(next, loc.key());
543         loc.set_value(3 * next);
544         seen[next] = true;
545         CHECK_MAPS_EQUAL();
546       }
547       int val = PseudoRandom(j, i) % kLimit;
548       if (seen[val]) {
549         ZoneSplayTree<TestConfig>::Locator loc;
550         CHECK(tree.FindGreatestLessThan(val, &loc));
551         CHECK_EQ(loc.key(), val);
552         break;
553       }
554       val = PseudoRandom(i + j, i - j) % kLimit;
555       if (seen[val]) {
556         ZoneSplayTree<TestConfig>::Locator loc;
557         CHECK(tree.FindLeastGreaterThan(val, &loc));
558         CHECK_EQ(loc.key(), val);
559         break;
560       }
561     }
562   }
563 }
564 
565 
TEST(DispatchTableConstruction)566 TEST(DispatchTableConstruction) {
567   // Initialize test data.
568   static const int kLimit = 1000;
569   static const int kRangeCount = 8;
570   static const int kRangeSize = 16;
571   uc16 ranges[kRangeCount][2 * kRangeSize];
572   for (int i = 0; i < kRangeCount; i++) {
573     Vector<uc16> range(ranges[i], 2 * kRangeSize);
574     for (int j = 0; j < 2 * kRangeSize; j++) {
575       range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
576     }
577     range.Sort();
578     for (int j = 1; j < 2 * kRangeSize; j++) {
579       CHECK(range[j-1] <= range[j]);
580     }
581   }
582   // Enter test data into dispatch table.
583   ZoneScope zone_scope(DELETE_ON_EXIT);
584   DispatchTable table;
585   for (int i = 0; i < kRangeCount; i++) {
586     uc16* range = ranges[i];
587     for (int j = 0; j < 2 * kRangeSize; j += 2)
588       table.AddRange(CharacterRange(range[j], range[j + 1]), i);
589   }
590   // Check that the table looks as we would expect
591   for (int p = 0; p < kLimit; p++) {
592     OutSet* outs = table.Get(p);
593     for (int j = 0; j < kRangeCount; j++) {
594       uc16* range = ranges[j];
595       bool is_on = false;
596       for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
597         is_on = (range[k] <= p && p <= range[k + 1]);
598       CHECK_EQ(is_on, outs->Get(j));
599     }
600   }
601 }
602 
603 
604 // Tests of interpreter.
605 
606 
607 #ifdef V8_NATIVE_REGEXP
608 
609 #if V8_TARGET_ARCH_IA32
610 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
611 #elif V8_TARGET_ARCH_X64
612 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
613 #elif V8_TARGET_ARCH_ARM
614 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
615 #endif
616 
617 class ContextInitializer {
618  public:
ContextInitializer()619   ContextInitializer()
620       : env_(), scope_(), zone_(DELETE_ON_EXIT), stack_guard_() {
621     env_ = v8::Context::New();
622     env_->Enter();
623   }
~ContextInitializer()624   ~ContextInitializer() {
625     env_->Exit();
626     env_.Dispose();
627   }
628  private:
629   v8::Persistent<v8::Context> env_;
630   v8::HandleScope scope_;
631   v8::internal::ZoneScope zone_;
632   v8::internal::StackGuard stack_guard_;
633 };
634 
635 
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * captures,bool at_start)636 static ArchRegExpMacroAssembler::Result Execute(Code* code,
637                                                 String* input,
638                                                 int start_offset,
639                                                 const byte* input_start,
640                                                 const byte* input_end,
641                                                 int* captures,
642                                                 bool at_start) {
643   return NativeRegExpMacroAssembler::Execute(
644       code,
645       input,
646       start_offset,
647       input_start,
648       input_end,
649       captures,
650       at_start);
651 }
652 
653 
TEST(MacroAssemblerNativeSuccess)654 TEST(MacroAssemblerNativeSuccess) {
655   v8::V8::Initialize();
656   ContextInitializer initializer;
657 
658   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
659 
660   m.Succeed();
661 
662   Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
663   Handle<Object> code_object = m.GetCode(source);
664   Handle<Code> code = Handle<Code>::cast(code_object);
665 
666   int captures[4] = {42, 37, 87, 117};
667   Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
668   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
669   const byte* start_adr =
670       reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
671 
672   NativeRegExpMacroAssembler::Result result =
673       Execute(*code,
674               *input,
675               0,
676               start_adr,
677               start_adr + seq_input->length(),
678               captures,
679               true);
680 
681   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
682   CHECK_EQ(-1, captures[0]);
683   CHECK_EQ(-1, captures[1]);
684   CHECK_EQ(-1, captures[2]);
685   CHECK_EQ(-1, captures[3]);
686 }
687 
688 
TEST(MacroAssemblerNativeSimple)689 TEST(MacroAssemblerNativeSimple) {
690   v8::V8::Initialize();
691   ContextInitializer initializer;
692 
693   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
694 
695   uc16 foo_chars[3] = {'f', 'o', 'o'};
696   Vector<const uc16> foo(foo_chars, 3);
697 
698   Label fail;
699   m.CheckCharacters(foo, 0, &fail, true);
700   m.WriteCurrentPositionToRegister(0, 0);
701   m.AdvanceCurrentPosition(3);
702   m.WriteCurrentPositionToRegister(1, 0);
703   m.Succeed();
704   m.Bind(&fail);
705   m.Fail();
706 
707   Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
708   Handle<Object> code_object = m.GetCode(source);
709   Handle<Code> code = Handle<Code>::cast(code_object);
710 
711   int captures[4] = {42, 37, 87, 117};
712   Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
713   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
714   Address start_adr = seq_input->GetCharsAddress();
715 
716   NativeRegExpMacroAssembler::Result result =
717       Execute(*code,
718               *input,
719               0,
720               start_adr,
721               start_adr + input->length(),
722               captures,
723               true);
724 
725   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
726   CHECK_EQ(0, captures[0]);
727   CHECK_EQ(3, captures[1]);
728   CHECK_EQ(-1, captures[2]);
729   CHECK_EQ(-1, captures[3]);
730 
731   input = Factory::NewStringFromAscii(CStrVector("barbarbar"));
732   seq_input = Handle<SeqAsciiString>::cast(input);
733   start_adr = seq_input->GetCharsAddress();
734 
735   result = Execute(*code,
736                    *input,
737                    0,
738                    start_adr,
739                    start_adr + input->length(),
740                    captures,
741                    true);
742 
743   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
744 }
745 
746 
TEST(MacroAssemblerNativeSimpleUC16)747 TEST(MacroAssemblerNativeSimpleUC16) {
748   v8::V8::Initialize();
749   ContextInitializer initializer;
750 
751   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4);
752 
753   uc16 foo_chars[3] = {'f', 'o', 'o'};
754   Vector<const uc16> foo(foo_chars, 3);
755 
756   Label fail;
757   m.CheckCharacters(foo, 0, &fail, true);
758   m.WriteCurrentPositionToRegister(0, 0);
759   m.AdvanceCurrentPosition(3);
760   m.WriteCurrentPositionToRegister(1, 0);
761   m.Succeed();
762   m.Bind(&fail);
763   m.Fail();
764 
765   Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
766   Handle<Object> code_object = m.GetCode(source);
767   Handle<Code> code = Handle<Code>::cast(code_object);
768 
769   int captures[4] = {42, 37, 87, 117};
770   const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', '\xa0'};
771   Handle<String> input =
772       Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
773   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
774   Address start_adr = seq_input->GetCharsAddress();
775 
776   NativeRegExpMacroAssembler::Result result =
777       Execute(*code,
778               *input,
779               0,
780               start_adr,
781               start_adr + input->length(),
782               captures,
783               true);
784 
785   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
786   CHECK_EQ(0, captures[0]);
787   CHECK_EQ(3, captures[1]);
788   CHECK_EQ(-1, captures[2]);
789   CHECK_EQ(-1, captures[3]);
790 
791   const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', '\xa0'};
792   input = Factory::NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
793   seq_input = Handle<SeqTwoByteString>::cast(input);
794   start_adr = seq_input->GetCharsAddress();
795 
796   result = Execute(*code,
797                    *input,
798                    0,
799                    start_adr,
800                    start_adr + input->length() * 2,
801                    captures,
802                    true);
803 
804   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
805 }
806 
807 
TEST(MacroAssemblerNativeBacktrack)808 TEST(MacroAssemblerNativeBacktrack) {
809   v8::V8::Initialize();
810   ContextInitializer initializer;
811 
812   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
813 
814   Label fail;
815   Label backtrack;
816   m.LoadCurrentCharacter(10, &fail);
817   m.Succeed();
818   m.Bind(&fail);
819   m.PushBacktrack(&backtrack);
820   m.LoadCurrentCharacter(10, NULL);
821   m.Succeed();
822   m.Bind(&backtrack);
823   m.Fail();
824 
825   Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
826   Handle<Object> code_object = m.GetCode(source);
827   Handle<Code> code = Handle<Code>::cast(code_object);
828 
829   Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
830   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
831   Address start_adr = seq_input->GetCharsAddress();
832 
833   NativeRegExpMacroAssembler::Result result =
834       Execute(*code,
835               *input,
836               0,
837               start_adr,
838               start_adr + input->length(),
839               NULL,
840               true);
841 
842   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
843 }
844 
845 
TEST(MacroAssemblerNativeBackReferenceASCII)846 TEST(MacroAssemblerNativeBackReferenceASCII) {
847   v8::V8::Initialize();
848   ContextInitializer initializer;
849 
850   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
851 
852   m.WriteCurrentPositionToRegister(0, 0);
853   m.AdvanceCurrentPosition(2);
854   m.WriteCurrentPositionToRegister(1, 0);
855   Label nomatch;
856   m.CheckNotBackReference(0, &nomatch);
857   m.Fail();
858   m.Bind(&nomatch);
859   m.AdvanceCurrentPosition(2);
860   Label missing_match;
861   m.CheckNotBackReference(0, &missing_match);
862   m.WriteCurrentPositionToRegister(2, 0);
863   m.Succeed();
864   m.Bind(&missing_match);
865   m.Fail();
866 
867   Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
868   Handle<Object> code_object = m.GetCode(source);
869   Handle<Code> code = Handle<Code>::cast(code_object);
870 
871   Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
872   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
873   Address start_adr = seq_input->GetCharsAddress();
874 
875   int output[4];
876   NativeRegExpMacroAssembler::Result result =
877       Execute(*code,
878               *input,
879               0,
880               start_adr,
881               start_adr + input->length(),
882               output,
883               true);
884 
885   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
886   CHECK_EQ(0, output[0]);
887   CHECK_EQ(2, output[1]);
888   CHECK_EQ(6, output[2]);
889   CHECK_EQ(-1, output[3]);
890 }
891 
892 
TEST(MacroAssemblerNativeBackReferenceUC16)893 TEST(MacroAssemblerNativeBackReferenceUC16) {
894   v8::V8::Initialize();
895   ContextInitializer initializer;
896 
897   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4);
898 
899   m.WriteCurrentPositionToRegister(0, 0);
900   m.AdvanceCurrentPosition(2);
901   m.WriteCurrentPositionToRegister(1, 0);
902   Label nomatch;
903   m.CheckNotBackReference(0, &nomatch);
904   m.Fail();
905   m.Bind(&nomatch);
906   m.AdvanceCurrentPosition(2);
907   Label missing_match;
908   m.CheckNotBackReference(0, &missing_match);
909   m.WriteCurrentPositionToRegister(2, 0);
910   m.Succeed();
911   m.Bind(&missing_match);
912   m.Fail();
913 
914   Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
915   Handle<Object> code_object = m.GetCode(source);
916   Handle<Code> code = Handle<Code>::cast(code_object);
917 
918   const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
919   Handle<String> input =
920       Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
921   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
922   Address start_adr = seq_input->GetCharsAddress();
923 
924   int output[4];
925   NativeRegExpMacroAssembler::Result result =
926       Execute(*code,
927                   *input,
928                   0,
929                   start_adr,
930                   start_adr + input->length() * 2,
931                   output,
932                   true);
933 
934   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
935   CHECK_EQ(0, output[0]);
936   CHECK_EQ(2, output[1]);
937   CHECK_EQ(6, output[2]);
938   CHECK_EQ(-1, output[3]);
939 }
940 
941 
942 
TEST(MacroAssemblernativeAtStart)943 TEST(MacroAssemblernativeAtStart) {
944   v8::V8::Initialize();
945   ContextInitializer initializer;
946 
947   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
948 
949   Label not_at_start, newline, fail;
950   m.CheckNotAtStart(&not_at_start);
951   // Check that prevchar = '\n' and current = 'f'.
952   m.CheckCharacter('\n', &newline);
953   m.Bind(&fail);
954   m.Fail();
955   m.Bind(&newline);
956   m.LoadCurrentCharacter(0, &fail);
957   m.CheckNotCharacter('f', &fail);
958   m.Succeed();
959 
960   m.Bind(&not_at_start);
961   // Check that prevchar = 'o' and current = 'b'.
962   Label prevo;
963   m.CheckCharacter('o', &prevo);
964   m.Fail();
965   m.Bind(&prevo);
966   m.LoadCurrentCharacter(0, &fail);
967   m.CheckNotCharacter('b', &fail);
968   m.Succeed();
969 
970   Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
971   Handle<Object> code_object = m.GetCode(source);
972   Handle<Code> code = Handle<Code>::cast(code_object);
973 
974   Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
975   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
976   Address start_adr = seq_input->GetCharsAddress();
977 
978   NativeRegExpMacroAssembler::Result result =
979       Execute(*code,
980               *input,
981               0,
982               start_adr,
983               start_adr + input->length(),
984               NULL,
985               true);
986 
987   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
988 
989   result = Execute(*code,
990                    *input,
991                    3,
992                    start_adr + 3,
993                    start_adr + input->length(),
994                    NULL,
995                    false);
996 
997   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
998 }
999 
1000 
TEST(MacroAssemblerNativeBackRefNoCase)1001 TEST(MacroAssemblerNativeBackRefNoCase) {
1002   v8::V8::Initialize();
1003   ContextInitializer initializer;
1004 
1005   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
1006 
1007   Label fail, succ;
1008 
1009   m.WriteCurrentPositionToRegister(0, 0);
1010   m.WriteCurrentPositionToRegister(2, 0);
1011   m.AdvanceCurrentPosition(3);
1012   m.WriteCurrentPositionToRegister(3, 0);
1013   m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "AbC".
1014   m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "ABC".
1015   Label expected_fail;
1016   m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1017   m.Bind(&fail);
1018   m.Fail();
1019 
1020   m.Bind(&expected_fail);
1021   m.AdvanceCurrentPosition(3);  // Skip "xYz"
1022   m.CheckNotBackReferenceIgnoreCase(2, &succ);
1023   m.Fail();
1024 
1025   m.Bind(&succ);
1026   m.WriteCurrentPositionToRegister(1, 0);
1027   m.Succeed();
1028 
1029   Handle<String> source =
1030       Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1031   Handle<Object> code_object = m.GetCode(source);
1032   Handle<Code> code = Handle<Code>::cast(code_object);
1033 
1034   Handle<String> input =
1035       Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1036   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1037   Address start_adr = seq_input->GetCharsAddress();
1038 
1039   int output[4];
1040   NativeRegExpMacroAssembler::Result result =
1041       Execute(*code,
1042               *input,
1043               0,
1044               start_adr,
1045               start_adr + input->length(),
1046               output,
1047               true);
1048 
1049   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1050   CHECK_EQ(0, output[0]);
1051   CHECK_EQ(12, output[1]);
1052   CHECK_EQ(0, output[2]);
1053   CHECK_EQ(3, output[3]);
1054 }
1055 
1056 
1057 
TEST(MacroAssemblerNativeRegisters)1058 TEST(MacroAssemblerNativeRegisters) {
1059   v8::V8::Initialize();
1060   ContextInitializer initializer;
1061 
1062   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6);
1063 
1064   uc16 foo_chars[3] = {'f', 'o', 'o'};
1065   Vector<const uc16> foo(foo_chars, 3);
1066 
1067   enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1068   Label fail;
1069   Label backtrack;
1070   m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
1071   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1072   m.PushBacktrack(&backtrack);
1073   m.WriteStackPointerToRegister(sp);
1074   // Fill stack and registers
1075   m.AdvanceCurrentPosition(2);
1076   m.WriteCurrentPositionToRegister(out1, 0);
1077   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1078   m.PushBacktrack(&fail);
1079   // Drop backtrack stack frames.
1080   m.ReadStackPointerFromRegister(sp);
1081   // And take the first backtrack (to &backtrack)
1082   m.Backtrack();
1083 
1084   m.PushCurrentPosition();
1085   m.AdvanceCurrentPosition(2);
1086   m.PopCurrentPosition();
1087 
1088   m.Bind(&backtrack);
1089   m.PopRegister(out1);
1090   m.ReadCurrentPositionFromRegister(out1);
1091   m.AdvanceCurrentPosition(3);
1092   m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
1093 
1094   Label loop;
1095   m.SetRegister(loop_cnt, 0);  // loop counter
1096   m.Bind(&loop);
1097   m.AdvanceRegister(loop_cnt, 1);
1098   m.AdvanceCurrentPosition(1);
1099   m.IfRegisterLT(loop_cnt, 3, &loop);
1100   m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
1101 
1102   Label loop2;
1103   m.SetRegister(loop_cnt, 2);  // loop counter
1104   m.Bind(&loop2);
1105   m.AdvanceRegister(loop_cnt, -1);
1106   m.AdvanceCurrentPosition(1);
1107   m.IfRegisterGE(loop_cnt, 0, &loop2);
1108   m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
1109 
1110   Label loop3;
1111   Label exit_loop3;
1112   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1113   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1114   m.ReadCurrentPositionFromRegister(out3);
1115   m.Bind(&loop3);
1116   m.AdvanceCurrentPosition(1);
1117   m.CheckGreedyLoop(&exit_loop3);
1118   m.GoTo(&loop3);
1119   m.Bind(&exit_loop3);
1120   m.PopCurrentPosition();
1121   m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
1122 
1123   m.Succeed();
1124 
1125   m.Bind(&fail);
1126   m.Fail();
1127 
1128   Handle<String> source =
1129       Factory::NewStringFromAscii(CStrVector("<loop test>"));
1130   Handle<Object> code_object = m.GetCode(source);
1131   Handle<Code> code = Handle<Code>::cast(code_object);
1132 
1133   // String long enough for test (content doesn't matter).
1134   Handle<String> input =
1135       Factory::NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1136   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1137   Address start_adr = seq_input->GetCharsAddress();
1138 
1139   int output[6];
1140   NativeRegExpMacroAssembler::Result result =
1141       Execute(*code,
1142               *input,
1143               0,
1144               start_adr,
1145               start_adr + input->length(),
1146               output,
1147               true);
1148 
1149   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1150   CHECK_EQ(0, output[0]);
1151   CHECK_EQ(3, output[1]);
1152   CHECK_EQ(6, output[2]);
1153   CHECK_EQ(9, output[3]);
1154   CHECK_EQ(9, output[4]);
1155   CHECK_EQ(-1, output[5]);
1156 }
1157 
1158 
TEST(MacroAssemblerStackOverflow)1159 TEST(MacroAssemblerStackOverflow) {
1160   v8::V8::Initialize();
1161   ContextInitializer initializer;
1162 
1163   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
1164 
1165   Label loop;
1166   m.Bind(&loop);
1167   m.PushBacktrack(&loop);
1168   m.GoTo(&loop);
1169 
1170   Handle<String> source =
1171       Factory::NewStringFromAscii(CStrVector("<stack overflow test>"));
1172   Handle<Object> code_object = m.GetCode(source);
1173   Handle<Code> code = Handle<Code>::cast(code_object);
1174 
1175   // String long enough for test (content doesn't matter).
1176   Handle<String> input =
1177       Factory::NewStringFromAscii(CStrVector("dummy"));
1178   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1179   Address start_adr = seq_input->GetCharsAddress();
1180 
1181   NativeRegExpMacroAssembler::Result result =
1182       Execute(*code,
1183               *input,
1184               0,
1185               start_adr,
1186               start_adr + input->length(),
1187               NULL,
1188               true);
1189 
1190   CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1191   CHECK(Top::has_pending_exception());
1192   Top::clear_pending_exception();
1193 }
1194 
1195 
TEST(MacroAssemblerNativeLotsOfRegisters)1196 TEST(MacroAssemblerNativeLotsOfRegisters) {
1197   v8::V8::Initialize();
1198   ContextInitializer initializer;
1199 
1200   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2);
1201 
1202   // At least 2048, to ensure the allocated space for registers
1203   // span one full page.
1204   const int large_number = 8000;
1205   m.WriteCurrentPositionToRegister(large_number, 42);
1206   m.WriteCurrentPositionToRegister(0, 0);
1207   m.WriteCurrentPositionToRegister(1, 1);
1208   Label done;
1209   m.CheckNotBackReference(0, &done);  // Performs a system-stack push.
1210   m.Bind(&done);
1211   m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1212   m.PopRegister(1);
1213   m.Succeed();
1214 
1215   Handle<String> source =
1216       Factory::NewStringFromAscii(CStrVector("<huge register space test>"));
1217   Handle<Object> code_object = m.GetCode(source);
1218   Handle<Code> code = Handle<Code>::cast(code_object);
1219 
1220   // String long enough for test (content doesn't matter).
1221   Handle<String> input =
1222       Factory::NewStringFromAscii(CStrVector("sample text"));
1223   Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1224   Address start_adr = seq_input->GetCharsAddress();
1225 
1226   int captures[2];
1227   NativeRegExpMacroAssembler::Result result =
1228       Execute(*code,
1229               *input,
1230               0,
1231               start_adr,
1232               start_adr + input->length(),
1233               captures,
1234               true);
1235 
1236   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1237   CHECK_EQ(0, captures[0]);
1238   CHECK_EQ(42, captures[1]);
1239 
1240   Top::clear_pending_exception();
1241 }
1242 
1243 #else  // ! V8_REGEX_NATIVE
1244 
TEST(MacroAssembler)1245 TEST(MacroAssembler) {
1246   V8::Initialize(NULL);
1247   byte codes[1024];
1248   RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
1249   // ^f(o)o.
1250   Label fail, fail2, start;
1251   uc16 foo_chars[3];
1252   foo_chars[0] = 'f';
1253   foo_chars[1] = 'o';
1254   foo_chars[2] = 'o';
1255   Vector<const uc16> foo(foo_chars, 3);
1256   m.SetRegister(4, 42);
1257   m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1258   m.AdvanceRegister(4, 42);
1259   m.GoTo(&start);
1260   m.Fail();
1261   m.Bind(&start);
1262   m.PushBacktrack(&fail2);
1263   m.CheckCharacters(foo, 0, &fail, true);
1264   m.WriteCurrentPositionToRegister(0, 0);
1265   m.PushCurrentPosition();
1266   m.AdvanceCurrentPosition(3);
1267   m.WriteCurrentPositionToRegister(1, 0);
1268   m.PopCurrentPosition();
1269   m.AdvanceCurrentPosition(1);
1270   m.WriteCurrentPositionToRegister(2, 0);
1271   m.AdvanceCurrentPosition(1);
1272   m.WriteCurrentPositionToRegister(3, 0);
1273   m.Succeed();
1274 
1275   m.Bind(&fail);
1276   m.Backtrack();
1277   m.Succeed();
1278 
1279   m.Bind(&fail2);
1280   m.PopRegister(0);
1281   m.Fail();
1282 
1283   v8::HandleScope scope;
1284 
1285   Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
1286   Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1287   int captures[5];
1288 
1289   const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1290   Handle<String> f1_16 =
1291       Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
1292 
1293   CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
1294   CHECK_EQ(0, captures[0]);
1295   CHECK_EQ(3, captures[1]);
1296   CHECK_EQ(1, captures[2]);
1297   CHECK_EQ(2, captures[3]);
1298   CHECK_EQ(84, captures[4]);
1299 
1300   const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1301   Handle<String> f2_16 =
1302       Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
1303 
1304   CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
1305   CHECK_EQ(42, captures[0]);
1306 }
1307 
1308 #endif  // ! V8_REGEXP_NATIVE
1309 
1310 
TEST(AddInverseToTable)1311 TEST(AddInverseToTable) {
1312   static const int kLimit = 1000;
1313   static const int kRangeCount = 16;
1314   for (int t = 0; t < 10; t++) {
1315     ZoneScope zone_scope(DELETE_ON_EXIT);
1316     ZoneList<CharacterRange>* ranges =
1317         new ZoneList<CharacterRange>(kRangeCount);
1318     for (int i = 0; i < kRangeCount; i++) {
1319       int from = PseudoRandom(t + 87, i + 25) % kLimit;
1320       int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1321       if (to > kLimit) to = kLimit;
1322       ranges->Add(CharacterRange(from, to));
1323     }
1324     DispatchTable table;
1325     DispatchTableConstructor cons(&table, false);
1326     cons.set_choice_index(0);
1327     cons.AddInverse(ranges);
1328     for (int i = 0; i < kLimit; i++) {
1329       bool is_on = false;
1330       for (int j = 0; !is_on && j < kRangeCount; j++)
1331         is_on = ranges->at(j).Contains(i);
1332       OutSet* set = table.Get(i);
1333       CHECK_EQ(is_on, set->Get(0) == false);
1334     }
1335   }
1336   ZoneScope zone_scope(DELETE_ON_EXIT);
1337   ZoneList<CharacterRange>* ranges =
1338           new ZoneList<CharacterRange>(1);
1339   ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
1340   DispatchTable table;
1341   DispatchTableConstructor cons(&table, false);
1342   cons.set_choice_index(0);
1343   cons.AddInverse(ranges);
1344   CHECK(!table.Get(0xFFFE)->Get(0));
1345   CHECK(table.Get(0xFFFF)->Get(0));
1346 }
1347 
1348 
canonicalize(uc32 c)1349 static uc32 canonicalize(uc32 c) {
1350   unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1351   int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1352   if (count == 0) {
1353     return c;
1354   } else {
1355     CHECK_EQ(1, count);
1356     return canon[0];
1357   }
1358 }
1359 
1360 
TEST(LatinCanonicalize)1361 TEST(LatinCanonicalize) {
1362   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1363   for (char lower = 'a'; lower <= 'z'; lower++) {
1364     char upper = lower + ('A' - 'a');
1365     CHECK_EQ(canonicalize(lower), canonicalize(upper));
1366     unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1367     int length = un_canonicalize.get(lower, '\0', uncanon);
1368     CHECK_EQ(2, length);
1369     CHECK_EQ(upper, uncanon[0]);
1370     CHECK_EQ(lower, uncanon[1]);
1371   }
1372   for (uc32 c = 128; c < (1 << 21); c++)
1373     CHECK_GE(canonicalize(c), 128);
1374   unibrow::Mapping<unibrow::ToUppercase> to_upper;
1375   for (uc32 c = 0; c < (1 << 21); c++) {
1376     unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1377     int length = to_upper.get(c, '\0', upper);
1378     if (length == 0) {
1379       length = 1;
1380       upper[0] = c;
1381     }
1382     uc32 u = upper[0];
1383     if (length > 1 || (c >= 128 && u < 128))
1384       u = c;
1385     CHECK_EQ(u, canonicalize(c));
1386   }
1387 }
1388 
1389 
CanonRange(uc32 c)1390 static uc32 CanonRange(uc32 c) {
1391   unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1392   int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1393   if (count == 0) {
1394     return c;
1395   } else {
1396     CHECK_EQ(1, count);
1397     return canon[0];
1398   }
1399 }
1400 
1401 
TEST(RangeCanonicalization)1402 TEST(RangeCanonicalization) {
1403   CHECK_NE(CanonRange(0) & CharacterRange::kStartMarker, 0);
1404   // Check that we arrive at the same result when using the basic
1405   // range canonicalization primitives as when using immediate
1406   // canonicalization.
1407   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1408   for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
1409     int range = CanonRange(i);
1410     int indirect_length = 0;
1411     unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1412     if ((range & CharacterRange::kStartMarker) == 0) {
1413       indirect_length = un_canonicalize.get(i - range, '\0', indirect);
1414       for (int i = 0; i < indirect_length; i++)
1415         indirect[i] += range;
1416     } else {
1417       indirect_length = un_canonicalize.get(i, '\0', indirect);
1418     }
1419     unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1420     int direct_length = un_canonicalize.get(i, '\0', direct);
1421     CHECK_EQ(direct_length, indirect_length);
1422   }
1423   // Check that we arrive at the same results when skipping over
1424   // canonicalization ranges.
1425   int next_block = 0;
1426   while (next_block < CharacterRange::kRangeCanonicalizeMax) {
1427     uc32 start = CanonRange(next_block);
1428     CHECK_NE((start & CharacterRange::kStartMarker), 0);
1429     unsigned dist = start & CharacterRange::kPayloadMask;
1430     unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1431     int first_length = un_canonicalize.get(next_block, '\0', first);
1432     for (unsigned i = 1; i < dist; i++) {
1433       CHECK_EQ(i, CanonRange(next_block + i));
1434       unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1435       int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
1436       CHECK_EQ(first_length, succ_length);
1437       for (int j = 0; j < succ_length; j++) {
1438         int calc = first[j] + i;
1439         int found = succ[j];
1440         CHECK_EQ(calc, found);
1441       }
1442     }
1443     next_block = next_block + dist;
1444   }
1445 }
1446 
1447 
TEST(UncanonicalizeEquivalence)1448 TEST(UncanonicalizeEquivalence) {
1449   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1450   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1451   for (int i = 0; i < (1 << 16); i++) {
1452     int length = un_canonicalize.get(i, '\0', chars);
1453     for (int j = 0; j < length; j++) {
1454       unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1455       int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1456       CHECK_EQ(length, length2);
1457       for (int k = 0; k < length; k++)
1458         CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1459     }
1460   }
1461 }
1462 
1463 
TestRangeCaseIndependence(CharacterRange input,Vector<CharacterRange> expected)1464 static void TestRangeCaseIndependence(CharacterRange input,
1465                                       Vector<CharacterRange> expected) {
1466   ZoneScope zone_scope(DELETE_ON_EXIT);
1467   int count = expected.length();
1468   ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
1469   input.AddCaseEquivalents(list);
1470   CHECK_EQ(count, list->length());
1471   for (int i = 0; i < list->length(); i++) {
1472     CHECK_EQ(expected[i].from(), list->at(i).from());
1473     CHECK_EQ(expected[i].to(), list->at(i).to());
1474   }
1475 }
1476 
1477 
TestSimpleRangeCaseIndependence(CharacterRange input,CharacterRange expected)1478 static void TestSimpleRangeCaseIndependence(CharacterRange input,
1479                                             CharacterRange expected) {
1480   EmbeddedVector<CharacterRange, 1> vector;
1481   vector[0] = expected;
1482   TestRangeCaseIndependence(input, vector);
1483 }
1484 
1485 
TEST(CharacterRangeCaseIndependence)1486 TEST(CharacterRangeCaseIndependence) {
1487   TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1488                                   CharacterRange::Singleton('A'));
1489   TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1490                                   CharacterRange::Singleton('Z'));
1491   TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1492                                   CharacterRange('A', 'Z'));
1493   TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1494                                   CharacterRange('C', 'F'));
1495   TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1496                                   CharacterRange('A', 'B'));
1497   TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1498                                   CharacterRange('Y', 'Z'));
1499   TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1500                                   CharacterRange('A', 'Z'));
1501   TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1502                                   CharacterRange('a', 'z'));
1503   TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1504                                   CharacterRange('c', 'f'));
1505   TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1506                                   CharacterRange('a', 'z'));
1507   // Here we need to add [l-z] to complete the case independence of
1508   // [A-Za-z] but we expect [a-z] to be added since we always add a
1509   // whole block at a time.
1510   TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1511                                   CharacterRange('a', 'z'));
1512 }
1513 
1514 
InClass(uc16 c,ZoneList<CharacterRange> * ranges)1515 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1516   if (ranges == NULL)
1517     return false;
1518   for (int i = 0; i < ranges->length(); i++) {
1519     CharacterRange range = ranges->at(i);
1520     if (range.from() <= c && c <= range.to())
1521       return true;
1522   }
1523   return false;
1524 }
1525 
1526 
TEST(CharClassDifference)1527 TEST(CharClassDifference) {
1528   ZoneScope zone_scope(DELETE_ON_EXIT);
1529   ZoneList<CharacterRange>* base = new ZoneList<CharacterRange>(1);
1530   base->Add(CharacterRange::Everything());
1531   Vector<const uc16> overlay = CharacterRange::GetWordBounds();
1532   ZoneList<CharacterRange>* included = NULL;
1533   ZoneList<CharacterRange>* excluded = NULL;
1534   CharacterRange::Split(base, overlay, &included, &excluded);
1535   for (int i = 0; i < (1 << 16); i++) {
1536     bool in_base = InClass(i, base);
1537     if (in_base) {
1538       bool in_overlay = false;
1539       for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1540         if (overlay[j] <= i && i <= overlay[j+1])
1541           in_overlay = true;
1542       }
1543       CHECK_EQ(in_overlay, InClass(i, included));
1544       CHECK_EQ(!in_overlay, InClass(i, excluded));
1545     } else {
1546       CHECK(!InClass(i, included));
1547       CHECK(!InClass(i, excluded));
1548     }
1549   }
1550 }
1551 
1552 
TEST(Graph)1553 TEST(Graph) {
1554   V8::Initialize(NULL);
1555   Execute("(?:(?:x(.))?\1)+$", false, true, true);
1556 }
1557