1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include <cstdlib>
29 #include <sstream>
30
31 #include "include/v8.h"
32 #include "src/v8.h"
33
34 #include "src/ast/ast.h"
35 #include "src/char-predicates-inl.h"
36 #include "src/ostreams.h"
37 #include "src/regexp/jsregexp.h"
38 #include "src/regexp/regexp-macro-assembler.h"
39 #include "src/regexp/regexp-macro-assembler-irregexp.h"
40 #include "src/regexp/regexp-parser.h"
41 #include "src/splay-tree-inl.h"
42 #include "src/string-stream.h"
43 #ifdef V8_INTERPRETED_REGEXP
44 #include "src/regexp/interpreter-irregexp.h"
45 #else // V8_INTERPRETED_REGEXP
46 #include "src/macro-assembler.h"
47 #if V8_TARGET_ARCH_ARM
48 #include "src/arm/assembler-arm.h" // NOLINT
49 #include "src/arm/macro-assembler-arm.h"
50 #include "src/regexp/arm/regexp-macro-assembler-arm.h"
51 #endif
52 #if V8_TARGET_ARCH_ARM64
53 #include "src/arm64/assembler-arm64.h"
54 #include "src/arm64/macro-assembler-arm64.h"
55 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
56 #endif
57 #if V8_TARGET_ARCH_PPC
58 #include "src/ppc/assembler-ppc.h"
59 #include "src/ppc/macro-assembler-ppc.h"
60 #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
61 #endif
62 #if V8_TARGET_ARCH_MIPS
63 #include "src/mips/assembler-mips.h"
64 #include "src/mips/macro-assembler-mips.h"
65 #include "src/regexp/mips/regexp-macro-assembler-mips.h"
66 #endif
67 #if V8_TARGET_ARCH_MIPS64
68 #include "src/mips64/assembler-mips64.h"
69 #include "src/mips64/macro-assembler-mips64.h"
70 #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
71 #endif
72 #if V8_TARGET_ARCH_X64
73 #include "src/regexp/x64/regexp-macro-assembler-x64.h"
74 #include "src/x64/assembler-x64.h"
75 #include "src/x64/macro-assembler-x64.h"
76 #endif
77 #if V8_TARGET_ARCH_IA32
78 #include "src/ia32/assembler-ia32.h"
79 #include "src/ia32/macro-assembler-ia32.h"
80 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
81 #endif
82 #if V8_TARGET_ARCH_X87
83 #include "src/regexp/x87/regexp-macro-assembler-x87.h"
84 #include "src/x87/assembler-x87.h"
85 #include "src/x87/macro-assembler-x87.h"
86 #endif
87 #endif // V8_INTERPRETED_REGEXP
88 #include "test/cctest/cctest.h"
89
90 using namespace v8::internal;
91
92
CheckParse(const char * input)93 static bool CheckParse(const char* input) {
94 v8::HandleScope scope(CcTest::isolate());
95 Zone zone;
96 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
97 RegExpCompileData result;
98 return v8::internal::RegExpParser::ParseRegExp(
99 CcTest::i_isolate(), &zone, &reader, false, false, &result);
100 }
101
102
CheckParseEq(const char * input,const char * expected,bool unicode=false)103 static void CheckParseEq(const char* input, const char* expected,
104 bool unicode = false) {
105 v8::HandleScope scope(CcTest::isolate());
106 Zone zone;
107 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
108 RegExpCompileData result;
109 CHECK(v8::internal::RegExpParser::ParseRegExp(
110 CcTest::i_isolate(), &zone, &reader, false, unicode, &result));
111 CHECK(result.tree != NULL);
112 CHECK(result.error.is_null());
113 std::ostringstream os;
114 result.tree->Print(os, &zone);
115 if (strcmp(expected, os.str().c_str()) != 0) {
116 printf("%s | %s\n", expected, os.str().c_str());
117 }
118 CHECK_EQ(0, strcmp(expected, os.str().c_str()));
119 }
120
121
CheckSimple(const char * input)122 static bool CheckSimple(const char* input) {
123 v8::HandleScope scope(CcTest::isolate());
124 Zone zone;
125 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
126 RegExpCompileData result;
127 CHECK(v8::internal::RegExpParser::ParseRegExp(
128 CcTest::i_isolate(), &zone, &reader, false, false, &result));
129 CHECK(result.tree != NULL);
130 CHECK(result.error.is_null());
131 return result.simple;
132 }
133
134 struct MinMaxPair {
135 int min_match;
136 int max_match;
137 };
138
139
CheckMinMaxMatch(const char * input)140 static MinMaxPair CheckMinMaxMatch(const char* input) {
141 v8::HandleScope scope(CcTest::isolate());
142 Zone zone;
143 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
144 RegExpCompileData result;
145 CHECK(v8::internal::RegExpParser::ParseRegExp(
146 CcTest::i_isolate(), &zone, &reader, false, false, &result));
147 CHECK(result.tree != NULL);
148 CHECK(result.error.is_null());
149 int min_match = result.tree->min_match();
150 int max_match = result.tree->max_match();
151 MinMaxPair pair = { min_match, max_match };
152 return pair;
153 }
154
155
156 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
157 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
158 #define CHECK_MIN_MAX(input, min, max) \
159 { MinMaxPair min_max = CheckMinMaxMatch(input); \
160 CHECK_EQ(min, min_max.min_match); \
161 CHECK_EQ(max, min_max.max_match); \
162 }
163
164
TestRegExpParser(bool lookbehind)165 void TestRegExpParser(bool lookbehind) {
166 FLAG_harmony_regexp_lookbehind = lookbehind;
167 FLAG_harmony_unicode_regexps = true;
168
169 CHECK_PARSE_ERROR("?");
170
171 CheckParseEq("abc", "'abc'");
172 CheckParseEq("", "%");
173 CheckParseEq("abc|def", "(| 'abc' 'def')");
174 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
175 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
176 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
177 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
178 CheckParseEq("a*", "(# 0 - g 'a')");
179 CheckParseEq("a*?", "(# 0 - n 'a')");
180 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
181 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
182 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
183 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
184 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
185 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
186 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
187 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
188 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
189 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
190 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
191 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
192 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
193 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
194 CheckParseEq("(?:foo)", "'foo'");
195 CheckParseEq("(?: foo )", "' foo '");
196 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
197 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
198 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
199 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
200 if (lookbehind) {
201 CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
202 CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
203 } else {
204 CHECK_PARSE_ERROR("foo(?<=bar)baz");
205 CHECK_PARSE_ERROR("foo(?<!bar)baz");
206 }
207 CheckParseEq("()", "(^ %)");
208 CheckParseEq("(?=)", "(-> + %)");
209 CheckParseEq("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
210 CheckParseEq("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
211 CheckParseEq("[x]", "[x]");
212 CheckParseEq("[xyz]", "[x y z]");
213 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
214 CheckParseEq("[-123]", "[- 1 2 3]");
215 CheckParseEq("[^123]", "^[1 2 3]");
216 CheckParseEq("]", "']'");
217 CheckParseEq("}", "'}'");
218 CheckParseEq("[a-b-c]", "[a-b - c]");
219 CheckParseEq("[\\d]", "[0-9]");
220 CheckParseEq("[x\\dz]", "[x 0-9 z]");
221 CheckParseEq("[\\d-z]", "[0-9 - z]");
222 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
223 CheckParseEq("[z-\\d]", "[z - 0-9]");
224 // Control character outside character class.
225 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
226 CheckParseEq("\\c!", "'\\c!'");
227 CheckParseEq("\\c_", "'\\c_'");
228 CheckParseEq("\\c~", "'\\c~'");
229 CheckParseEq("\\c1", "'\\c1'");
230 // Control character inside character class.
231 CheckParseEq("[\\c!]", "[\\ c !]");
232 CheckParseEq("[\\c_]", "[\\x1f]");
233 CheckParseEq("[\\c~]", "[\\ c ~]");
234 CheckParseEq("[\\ca]", "[\\x01]");
235 CheckParseEq("[\\cz]", "[\\x1a]");
236 CheckParseEq("[\\cA]", "[\\x01]");
237 CheckParseEq("[\\cZ]", "[\\x1a]");
238 CheckParseEq("[\\c1]", "[\\x11]");
239
240 CheckParseEq("[a\\]c]", "[a ] c]");
241 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
242 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
243 CheckParseEq("\\0", "'\\x00'");
244 CheckParseEq("\\8", "'8'");
245 CheckParseEq("\\9", "'9'");
246 CheckParseEq("\\11", "'\\x09'");
247 CheckParseEq("\\11a", "'\\x09a'");
248 CheckParseEq("\\011", "'\\x09'");
249 CheckParseEq("\\00011", "'\\x0011'");
250 CheckParseEq("\\118", "'\\x098'");
251 CheckParseEq("\\111", "'I'");
252 CheckParseEq("\\1111", "'I1'");
253 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
254 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
255 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
256 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
257 CheckParseEq("(x)(x)(x)\\1*",
258 "(: (^ 'x') (^ 'x') (^ 'x')"
259 " (# 0 - g (<- 1)))");
260 CheckParseEq("(x)(x)(x)\\2*",
261 "(: (^ 'x') (^ 'x') (^ 'x')"
262 " (# 0 - g (<- 2)))");
263 CheckParseEq("(x)(x)(x)\\3*",
264 "(: (^ 'x') (^ 'x') (^ 'x')"
265 " (# 0 - g (<- 3)))");
266 CheckParseEq("(x)(x)(x)\\4*",
267 "(: (^ 'x') (^ 'x') (^ 'x')"
268 " (# 0 - g '\\x04'))");
269 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
270 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
271 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
272 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
273 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
274 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
275 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
276 CheckParseEq("(a\\1)", "(^ 'a')");
277 CheckParseEq("(\\1a)", "(^ 'a')");
278 CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
279 CheckParseEq("(?=a)?a", "'a'");
280 CheckParseEq("(?=a){0,10}a", "'a'");
281 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
282 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
283 CheckParseEq("(?!a)?a", "'a'");
284 CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
285 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
286 CheckParseEq("(?!\\1(a\\1)\\1)\\1",
287 "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
288 CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
289 "(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))");
290 if (lookbehind) {
291 CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
292 "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
293 }
294 CheckParseEq("[\\0]", "[\\x00]");
295 CheckParseEq("[\\11]", "[\\x09]");
296 CheckParseEq("[\\11a]", "[\\x09 a]");
297 CheckParseEq("[\\011]", "[\\x09]");
298 CheckParseEq("[\\00011]", "[\\x00 1 1]");
299 CheckParseEq("[\\118]", "[\\x09 8]");
300 CheckParseEq("[\\111]", "[I]");
301 CheckParseEq("[\\1111]", "[I 1]");
302 CheckParseEq("\\x34", "'\x34'");
303 CheckParseEq("\\x60", "'\x60'");
304 CheckParseEq("\\x3z", "'x3z'");
305 CheckParseEq("\\c", "'\\c'");
306 CheckParseEq("\\u0034", "'\x34'");
307 CheckParseEq("\\u003z", "'u003z'");
308 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
309
310 // Unicode regexps
311 CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
312 CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
313 true);
314 CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
315 true);
316 CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
317 CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
318
319 CHECK_SIMPLE("", false);
320 CHECK_SIMPLE("a", true);
321 CHECK_SIMPLE("a|b", false);
322 CHECK_SIMPLE("a\\n", false);
323 CHECK_SIMPLE("^a", false);
324 CHECK_SIMPLE("a$", false);
325 CHECK_SIMPLE("a\\b!", false);
326 CHECK_SIMPLE("a\\Bb", false);
327 CHECK_SIMPLE("a*", false);
328 CHECK_SIMPLE("a*?", false);
329 CHECK_SIMPLE("a?", false);
330 CHECK_SIMPLE("a??", false);
331 CHECK_SIMPLE("a{0,1}?", false);
332 CHECK_SIMPLE("a{1,1}?", false);
333 CHECK_SIMPLE("a{1,2}?", false);
334 CHECK_SIMPLE("a+?", false);
335 CHECK_SIMPLE("(a)", false);
336 CHECK_SIMPLE("(a)\\1", false);
337 CHECK_SIMPLE("(\\1a)", false);
338 CHECK_SIMPLE("\\1(a)", false);
339 CHECK_SIMPLE("a\\s", false);
340 CHECK_SIMPLE("a\\S", false);
341 CHECK_SIMPLE("a\\d", false);
342 CHECK_SIMPLE("a\\D", false);
343 CHECK_SIMPLE("a\\w", false);
344 CHECK_SIMPLE("a\\W", false);
345 CHECK_SIMPLE("a.", false);
346 CHECK_SIMPLE("a\\q", false);
347 CHECK_SIMPLE("a[a]", false);
348 CHECK_SIMPLE("a[^a]", false);
349 CHECK_SIMPLE("a[a-z]", false);
350 CHECK_SIMPLE("a[\\q]", false);
351 CHECK_SIMPLE("a(?:b)", false);
352 CHECK_SIMPLE("a(?=b)", false);
353 CHECK_SIMPLE("a(?!b)", false);
354 CHECK_SIMPLE("\\x60", false);
355 CHECK_SIMPLE("\\u0060", false);
356 CHECK_SIMPLE("\\cA", false);
357 CHECK_SIMPLE("\\q", false);
358 CHECK_SIMPLE("\\1112", false);
359 CHECK_SIMPLE("\\0", false);
360 CHECK_SIMPLE("(a)\\1", false);
361 CHECK_SIMPLE("(?=a)?a", false);
362 CHECK_SIMPLE("(?!a)?a\\1", false);
363 CHECK_SIMPLE("(?:(?=a))a\\1", false);
364
365 CheckParseEq("a{}", "'a{}'");
366 CheckParseEq("a{,}", "'a{,}'");
367 CheckParseEq("a{", "'a{'");
368 CheckParseEq("a{z}", "'a{z}'");
369 CheckParseEq("a{1z}", "'a{1z}'");
370 CheckParseEq("a{12z}", "'a{12z}'");
371 CheckParseEq("a{12,", "'a{12,'");
372 CheckParseEq("a{12,3b", "'a{12,3b'");
373 CheckParseEq("{}", "'{}'");
374 CheckParseEq("{,}", "'{,}'");
375 CheckParseEq("{", "'{'");
376 CheckParseEq("{z}", "'{z}'");
377 CheckParseEq("{1z}", "'{1z}'");
378 CheckParseEq("{12z}", "'{12z}'");
379 CheckParseEq("{12,", "'{12,'");
380 CheckParseEq("{12,3b", "'{12,3b'");
381
382 CHECK_MIN_MAX("a", 1, 1);
383 CHECK_MIN_MAX("abc", 3, 3);
384 CHECK_MIN_MAX("a[bc]d", 3, 3);
385 CHECK_MIN_MAX("a|bc", 1, 2);
386 CHECK_MIN_MAX("ab|c", 1, 2);
387 CHECK_MIN_MAX("a||bc", 0, 2);
388 CHECK_MIN_MAX("|", 0, 0);
389 CHECK_MIN_MAX("(?:ab)", 2, 2);
390 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
391 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
392 CHECK_MIN_MAX("(ab)", 2, 2);
393 CHECK_MIN_MAX("(ab|cde)", 2, 3);
394 CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
395 CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
396 CHECK_MIN_MAX("(?:ab)?", 0, 2);
397 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
398 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
399 CHECK_MIN_MAX("a?", 0, 1);
400 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
401 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
402 CHECK_MIN_MAX("a??", 0, 1);
403 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
404 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
405 CHECK_MIN_MAX("(?:a?)?", 0, 1);
406 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
407 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
408 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
409 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
410 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
411 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
412 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
413 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
414 CHECK_MIN_MAX("a{0}", 0, 0);
415 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
416 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
417 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
418 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
419 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
420 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
421 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
422 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
423 CHECK_MIN_MAX("a\\bc", 2, 2);
424 CHECK_MIN_MAX("a\\Bc", 2, 2);
425 CHECK_MIN_MAX("a\\sc", 3, 3);
426 CHECK_MIN_MAX("a\\Sc", 3, 3);
427 CHECK_MIN_MAX("a(?=b)c", 2, 2);
428 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
429 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
430 }
431
432
TEST(ParserWithLookbehind)433 TEST(ParserWithLookbehind) {
434 TestRegExpParser(true); // Lookbehind enabled.
435 }
436
437
TEST(ParserWithoutLookbehind)438 TEST(ParserWithoutLookbehind) {
439 TestRegExpParser(true); // Lookbehind enabled.
440 }
441
442
TEST(ParserRegression)443 TEST(ParserRegression) {
444 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
445 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
446 CheckParseEq("{", "'{'");
447 CheckParseEq("a|", "(| 'a' %)");
448 }
449
ExpectError(const char * input,const char * expected)450 static void ExpectError(const char* input,
451 const char* expected) {
452 v8::HandleScope scope(CcTest::isolate());
453 Zone zone;
454 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
455 RegExpCompileData result;
456 CHECK(!v8::internal::RegExpParser::ParseRegExp(
457 CcTest::i_isolate(), &zone, &reader, false, false, &result));
458 CHECK(result.tree == NULL);
459 CHECK(!result.error.is_null());
460 v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
461 CHECK_EQ(0, strcmp(expected, str.get()));
462 }
463
464
TEST(Errors)465 TEST(Errors) {
466 const char* kEndBackslash = "\\ at end of pattern";
467 ExpectError("\\", kEndBackslash);
468 const char* kUnterminatedGroup = "Unterminated group";
469 ExpectError("(foo", kUnterminatedGroup);
470 const char* kInvalidGroup = "Invalid group";
471 ExpectError("(?", kInvalidGroup);
472 const char* kUnterminatedCharacterClass = "Unterminated character class";
473 ExpectError("[", kUnterminatedCharacterClass);
474 ExpectError("[a-", kUnterminatedCharacterClass);
475 const char* kNothingToRepeat = "Nothing to repeat";
476 ExpectError("*", kNothingToRepeat);
477 ExpectError("?", kNothingToRepeat);
478 ExpectError("+", kNothingToRepeat);
479 ExpectError("{1}", kNothingToRepeat);
480 ExpectError("{1,2}", kNothingToRepeat);
481 ExpectError("{1,}", kNothingToRepeat);
482
483 // Check that we don't allow more than kMaxCapture captures
484 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
485 const char* kTooManyCaptures = "Too many captures";
486 std::ostringstream os;
487 for (int i = 0; i <= kMaxCaptures; i++) {
488 os << "()";
489 }
490 ExpectError(os.str().c_str(), kTooManyCaptures);
491 }
492
493
IsDigit(uc16 c)494 static bool IsDigit(uc16 c) {
495 return ('0' <= c && c <= '9');
496 }
497
498
NotDigit(uc16 c)499 static bool NotDigit(uc16 c) {
500 return !IsDigit(c);
501 }
502
503
IsWhiteSpaceOrLineTerminator(uc16 c)504 static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
505 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
506 // WhiteSpace (7.2) and LineTerminator (7.3) values.
507 return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
508 }
509
510
NotWhiteSpaceNorLineTermiantor(uc16 c)511 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
512 return !IsWhiteSpaceOrLineTerminator(c);
513 }
514
515
NotWord(uc16 c)516 static bool NotWord(uc16 c) {
517 return !IsRegExpWord(c);
518 }
519
520
TestCharacterClassEscapes(uc16 c,bool (pred)(uc16 c))521 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
522 Zone zone;
523 ZoneList<CharacterRange>* ranges =
524 new(&zone) ZoneList<CharacterRange>(2, &zone);
525 CharacterRange::AddClassEscape(c, ranges, &zone);
526 for (unsigned i = 0; i < (1 << 16); i++) {
527 bool in_class = false;
528 for (int j = 0; !in_class && j < ranges->length(); j++) {
529 CharacterRange& range = ranges->at(j);
530 in_class = (range.from() <= i && i <= range.to());
531 }
532 CHECK_EQ(pred(i), in_class);
533 }
534 }
535
536
TEST(CharacterClassEscapes)537 TEST(CharacterClassEscapes) {
538 TestCharacterClassEscapes('.', IsRegExpNewline);
539 TestCharacterClassEscapes('d', IsDigit);
540 TestCharacterClassEscapes('D', NotDigit);
541 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
542 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
543 TestCharacterClassEscapes('w', IsRegExpWord);
544 TestCharacterClassEscapes('W', NotWord);
545 }
546
547
Compile(const char * input,bool multiline,bool unicode,bool is_one_byte,Zone * zone)548 static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
549 bool is_one_byte, Zone* zone) {
550 Isolate* isolate = CcTest::i_isolate();
551 FlatStringReader reader(isolate, CStrVector(input));
552 RegExpCompileData compile_data;
553 if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
554 &reader, multiline, unicode,
555 &compile_data))
556 return NULL;
557 Handle<String> pattern = isolate->factory()
558 ->NewStringFromUtf8(CStrVector(input))
559 .ToHandleChecked();
560 Handle<String> sample_subject =
561 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
562 RegExpEngine::Compile(isolate, zone, &compile_data, false, false, multiline,
563 false, pattern, sample_subject, is_one_byte);
564 return compile_data.node;
565 }
566
567
Execute(const char * input,bool multiline,bool unicode,bool is_one_byte,bool dot_output=false)568 static void Execute(const char* input, bool multiline, bool unicode,
569 bool is_one_byte, bool dot_output = false) {
570 v8::HandleScope scope(CcTest::isolate());
571 Zone zone;
572 RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
573 USE(node);
574 #ifdef DEBUG
575 if (dot_output) {
576 RegExpEngine::DotPrint(input, node, false);
577 }
578 #endif // DEBUG
579 }
580
581
582 class TestConfig {
583 public:
584 typedef int Key;
585 typedef int Value;
586 static const int kNoKey;
NoValue()587 static int NoValue() { return 0; }
Compare(int a,int b)588 static inline int Compare(int a, int b) {
589 if (a < b)
590 return -1;
591 else if (a > b)
592 return 1;
593 else
594 return 0;
595 }
596 };
597
598
599 const int TestConfig::kNoKey = 0;
600
601
PseudoRandom(int i,int j)602 static unsigned PseudoRandom(int i, int j) {
603 return ~(~((i * 781) ^ (j * 329)));
604 }
605
606
TEST(SplayTreeSimple)607 TEST(SplayTreeSimple) {
608 static const unsigned kLimit = 1000;
609 Zone zone;
610 ZoneSplayTree<TestConfig> tree(&zone);
611 bool seen[kLimit];
612 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
613 #define CHECK_MAPS_EQUAL() do { \
614 for (unsigned k = 0; k < kLimit; k++) \
615 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
616 } while (false)
617 for (int i = 0; i < 50; i++) {
618 for (int j = 0; j < 50; j++) {
619 int next = PseudoRandom(i, j) % kLimit;
620 if (seen[next]) {
621 // We've already seen this one. Check the value and remove
622 // it.
623 ZoneSplayTree<TestConfig>::Locator loc;
624 CHECK(tree.Find(next, &loc));
625 CHECK_EQ(next, loc.key());
626 CHECK_EQ(3 * next, loc.value());
627 tree.Remove(next);
628 seen[next] = false;
629 CHECK_MAPS_EQUAL();
630 } else {
631 // Check that it wasn't there already and then add it.
632 ZoneSplayTree<TestConfig>::Locator loc;
633 CHECK(!tree.Find(next, &loc));
634 CHECK(tree.Insert(next, &loc));
635 CHECK_EQ(next, loc.key());
636 loc.set_value(3 * next);
637 seen[next] = true;
638 CHECK_MAPS_EQUAL();
639 }
640 int val = PseudoRandom(j, i) % kLimit;
641 if (seen[val]) {
642 ZoneSplayTree<TestConfig>::Locator loc;
643 CHECK(tree.FindGreatestLessThan(val, &loc));
644 CHECK_EQ(loc.key(), val);
645 break;
646 }
647 val = PseudoRandom(i + j, i - j) % kLimit;
648 if (seen[val]) {
649 ZoneSplayTree<TestConfig>::Locator loc;
650 CHECK(tree.FindLeastGreaterThan(val, &loc));
651 CHECK_EQ(loc.key(), val);
652 break;
653 }
654 }
655 }
656 }
657
658
TEST(DispatchTableConstruction)659 TEST(DispatchTableConstruction) {
660 // Initialize test data.
661 static const int kLimit = 1000;
662 static const int kRangeCount = 8;
663 static const int kRangeSize = 16;
664 uc16 ranges[kRangeCount][2 * kRangeSize];
665 for (int i = 0; i < kRangeCount; i++) {
666 Vector<uc16> range(ranges[i], 2 * kRangeSize);
667 for (int j = 0; j < 2 * kRangeSize; j++) {
668 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
669 }
670 range.Sort();
671 for (int j = 1; j < 2 * kRangeSize; j++) {
672 CHECK(range[j-1] <= range[j]);
673 }
674 }
675 // Enter test data into dispatch table.
676 Zone zone;
677 DispatchTable table(&zone);
678 for (int i = 0; i < kRangeCount; i++) {
679 uc16* range = ranges[i];
680 for (int j = 0; j < 2 * kRangeSize; j += 2)
681 table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
682 }
683 // Check that the table looks as we would expect
684 for (int p = 0; p < kLimit; p++) {
685 OutSet* outs = table.Get(p);
686 for (int j = 0; j < kRangeCount; j++) {
687 uc16* range = ranges[j];
688 bool is_on = false;
689 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
690 is_on = (range[k] <= p && p <= range[k + 1]);
691 CHECK_EQ(is_on, outs->Get(j));
692 }
693 }
694 }
695
696
697 // Test of debug-only syntax.
698 #ifdef DEBUG
699
TEST(ParsePossessiveRepetition)700 TEST(ParsePossessiveRepetition) {
701 bool old_flag_value = FLAG_regexp_possessive_quantifier;
702
703 // Enable possessive quantifier syntax.
704 FLAG_regexp_possessive_quantifier = true;
705
706 CheckParseEq("a*+", "(# 0 - p 'a')");
707 CheckParseEq("a++", "(# 1 - p 'a')");
708 CheckParseEq("a?+", "(# 0 1 p 'a')");
709 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
710 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
711
712 // Disable possessive quantifier syntax.
713 FLAG_regexp_possessive_quantifier = false;
714
715 CHECK_PARSE_ERROR("a*+");
716 CHECK_PARSE_ERROR("a++");
717 CHECK_PARSE_ERROR("a?+");
718 CHECK_PARSE_ERROR("a{10,20}+");
719 CHECK_PARSE_ERROR("a{10,20}+b");
720
721 FLAG_regexp_possessive_quantifier = old_flag_value;
722 }
723
724 #endif
725
726 // Tests of interpreter.
727
728
729 #ifndef V8_INTERPRETED_REGEXP
730
731 #if V8_TARGET_ARCH_IA32
732 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
733 #elif V8_TARGET_ARCH_X64
734 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
735 #elif V8_TARGET_ARCH_ARM
736 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
737 #elif V8_TARGET_ARCH_ARM64
738 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
739 #elif V8_TARGET_ARCH_PPC
740 typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
741 #elif V8_TARGET_ARCH_MIPS
742 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
743 #elif V8_TARGET_ARCH_MIPS64
744 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
745 #elif V8_TARGET_ARCH_X87
746 typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
747 #endif
748
749 class ContextInitializer {
750 public:
ContextInitializer()751 ContextInitializer()
752 : scope_(CcTest::isolate()),
753 env_(v8::Context::New(CcTest::isolate())) {
754 env_->Enter();
755 }
~ContextInitializer()756 ~ContextInitializer() {
757 env_->Exit();
758 }
759 private:
760 v8::HandleScope scope_;
761 v8::Local<v8::Context> env_;
762 };
763
764
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * captures)765 static ArchRegExpMacroAssembler::Result Execute(Code* code,
766 String* input,
767 int start_offset,
768 const byte* input_start,
769 const byte* input_end,
770 int* captures) {
771 return NativeRegExpMacroAssembler::Execute(
772 code,
773 input,
774 start_offset,
775 input_start,
776 input_end,
777 captures,
778 0,
779 CcTest::i_isolate());
780 }
781
782
TEST(MacroAssemblerNativeSuccess)783 TEST(MacroAssemblerNativeSuccess) {
784 v8::V8::Initialize();
785 ContextInitializer initializer;
786 Isolate* isolate = CcTest::i_isolate();
787 Factory* factory = isolate->factory();
788 Zone zone;
789
790 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
791 4);
792
793 m.Succeed();
794
795 Handle<String> source = factory->NewStringFromStaticChars("");
796 Handle<Object> code_object = m.GetCode(source);
797 Handle<Code> code = Handle<Code>::cast(code_object);
798
799 int captures[4] = {42, 37, 87, 117};
800 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
801 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
802 const byte* start_adr =
803 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
804
805 NativeRegExpMacroAssembler::Result result =
806 Execute(*code,
807 *input,
808 0,
809 start_adr,
810 start_adr + seq_input->length(),
811 captures);
812
813 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
814 CHECK_EQ(-1, captures[0]);
815 CHECK_EQ(-1, captures[1]);
816 CHECK_EQ(-1, captures[2]);
817 CHECK_EQ(-1, captures[3]);
818 }
819
820
TEST(MacroAssemblerNativeSimple)821 TEST(MacroAssemblerNativeSimple) {
822 v8::V8::Initialize();
823 ContextInitializer initializer;
824 Isolate* isolate = CcTest::i_isolate();
825 Factory* factory = isolate->factory();
826 Zone zone;
827
828 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
829 4);
830
831 Label fail, backtrack;
832 m.PushBacktrack(&fail);
833 m.CheckNotAtStart(0, NULL);
834 m.LoadCurrentCharacter(2, NULL);
835 m.CheckNotCharacter('o', NULL);
836 m.LoadCurrentCharacter(1, NULL, false);
837 m.CheckNotCharacter('o', NULL);
838 m.LoadCurrentCharacter(0, NULL, false);
839 m.CheckNotCharacter('f', NULL);
840 m.WriteCurrentPositionToRegister(0, 0);
841 m.WriteCurrentPositionToRegister(1, 3);
842 m.AdvanceCurrentPosition(3);
843 m.PushBacktrack(&backtrack);
844 m.Succeed();
845 m.Bind(&backtrack);
846 m.Backtrack();
847 m.Bind(&fail);
848 m.Fail();
849
850 Handle<String> source = factory->NewStringFromStaticChars("^foo");
851 Handle<Object> code_object = m.GetCode(source);
852 Handle<Code> code = Handle<Code>::cast(code_object);
853
854 int captures[4] = {42, 37, 87, 117};
855 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
856 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
857 Address start_adr = seq_input->GetCharsAddress();
858
859 NativeRegExpMacroAssembler::Result result =
860 Execute(*code,
861 *input,
862 0,
863 start_adr,
864 start_adr + input->length(),
865 captures);
866
867 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
868 CHECK_EQ(0, captures[0]);
869 CHECK_EQ(3, captures[1]);
870 CHECK_EQ(-1, captures[2]);
871 CHECK_EQ(-1, captures[3]);
872
873 input = factory->NewStringFromStaticChars("barbarbar");
874 seq_input = Handle<SeqOneByteString>::cast(input);
875 start_adr = seq_input->GetCharsAddress();
876
877 result = Execute(*code,
878 *input,
879 0,
880 start_adr,
881 start_adr + input->length(),
882 captures);
883
884 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
885 }
886
887
TEST(MacroAssemblerNativeSimpleUC16)888 TEST(MacroAssemblerNativeSimpleUC16) {
889 v8::V8::Initialize();
890 ContextInitializer initializer;
891 Isolate* isolate = CcTest::i_isolate();
892 Factory* factory = isolate->factory();
893 Zone zone;
894
895 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
896 4);
897
898 Label fail, backtrack;
899 m.PushBacktrack(&fail);
900 m.CheckNotAtStart(0, NULL);
901 m.LoadCurrentCharacter(2, NULL);
902 m.CheckNotCharacter('o', NULL);
903 m.LoadCurrentCharacter(1, NULL, false);
904 m.CheckNotCharacter('o', NULL);
905 m.LoadCurrentCharacter(0, NULL, false);
906 m.CheckNotCharacter('f', NULL);
907 m.WriteCurrentPositionToRegister(0, 0);
908 m.WriteCurrentPositionToRegister(1, 3);
909 m.AdvanceCurrentPosition(3);
910 m.PushBacktrack(&backtrack);
911 m.Succeed();
912 m.Bind(&backtrack);
913 m.Backtrack();
914 m.Bind(&fail);
915 m.Fail();
916
917 Handle<String> source = factory->NewStringFromStaticChars("^foo");
918 Handle<Object> code_object = m.GetCode(source);
919 Handle<Code> code = Handle<Code>::cast(code_object);
920
921 int captures[4] = {42, 37, 87, 117};
922 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
923 static_cast<uc16>(0x2603)};
924 Handle<String> input = factory->NewStringFromTwoByte(
925 Vector<const uc16>(input_data, 6)).ToHandleChecked();
926 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
927 Address start_adr = seq_input->GetCharsAddress();
928
929 NativeRegExpMacroAssembler::Result result =
930 Execute(*code,
931 *input,
932 0,
933 start_adr,
934 start_adr + input->length(),
935 captures);
936
937 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
938 CHECK_EQ(0, captures[0]);
939 CHECK_EQ(3, captures[1]);
940 CHECK_EQ(-1, captures[2]);
941 CHECK_EQ(-1, captures[3]);
942
943 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
944 static_cast<uc16>(0x2603)};
945 input = factory->NewStringFromTwoByte(
946 Vector<const uc16>(input_data2, 9)).ToHandleChecked();
947 seq_input = Handle<SeqTwoByteString>::cast(input);
948 start_adr = seq_input->GetCharsAddress();
949
950 result = Execute(*code,
951 *input,
952 0,
953 start_adr,
954 start_adr + input->length() * 2,
955 captures);
956
957 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
958 }
959
960
TEST(MacroAssemblerNativeBacktrack)961 TEST(MacroAssemblerNativeBacktrack) {
962 v8::V8::Initialize();
963 ContextInitializer initializer;
964 Isolate* isolate = CcTest::i_isolate();
965 Factory* factory = isolate->factory();
966 Zone zone;
967
968 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
969 0);
970
971 Label fail;
972 Label backtrack;
973 m.LoadCurrentCharacter(10, &fail);
974 m.Succeed();
975 m.Bind(&fail);
976 m.PushBacktrack(&backtrack);
977 m.LoadCurrentCharacter(10, NULL);
978 m.Succeed();
979 m.Bind(&backtrack);
980 m.Fail();
981
982 Handle<String> source = factory->NewStringFromStaticChars("..........");
983 Handle<Object> code_object = m.GetCode(source);
984 Handle<Code> code = Handle<Code>::cast(code_object);
985
986 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
987 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
988 Address start_adr = seq_input->GetCharsAddress();
989
990 NativeRegExpMacroAssembler::Result result =
991 Execute(*code,
992 *input,
993 0,
994 start_adr,
995 start_adr + input->length(),
996 NULL);
997
998 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
999 }
1000
1001
TEST(MacroAssemblerNativeBackReferenceLATIN1)1002 TEST(MacroAssemblerNativeBackReferenceLATIN1) {
1003 v8::V8::Initialize();
1004 ContextInitializer initializer;
1005 Isolate* isolate = CcTest::i_isolate();
1006 Factory* factory = isolate->factory();
1007 Zone zone;
1008
1009 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1010 4);
1011
1012 m.WriteCurrentPositionToRegister(0, 0);
1013 m.AdvanceCurrentPosition(2);
1014 m.WriteCurrentPositionToRegister(1, 0);
1015 Label nomatch;
1016 m.CheckNotBackReference(0, false, &nomatch);
1017 m.Fail();
1018 m.Bind(&nomatch);
1019 m.AdvanceCurrentPosition(2);
1020 Label missing_match;
1021 m.CheckNotBackReference(0, false, &missing_match);
1022 m.WriteCurrentPositionToRegister(2, 0);
1023 m.Succeed();
1024 m.Bind(&missing_match);
1025 m.Fail();
1026
1027 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1028 Handle<Object> code_object = m.GetCode(source);
1029 Handle<Code> code = Handle<Code>::cast(code_object);
1030
1031 Handle<String> input = factory->NewStringFromStaticChars("fooofo");
1032 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1033 Address start_adr = seq_input->GetCharsAddress();
1034
1035 int output[4];
1036 NativeRegExpMacroAssembler::Result result =
1037 Execute(*code,
1038 *input,
1039 0,
1040 start_adr,
1041 start_adr + input->length(),
1042 output);
1043
1044 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1045 CHECK_EQ(0, output[0]);
1046 CHECK_EQ(2, output[1]);
1047 CHECK_EQ(6, output[2]);
1048 CHECK_EQ(-1, output[3]);
1049 }
1050
1051
TEST(MacroAssemblerNativeBackReferenceUC16)1052 TEST(MacroAssemblerNativeBackReferenceUC16) {
1053 v8::V8::Initialize();
1054 ContextInitializer initializer;
1055 Isolate* isolate = CcTest::i_isolate();
1056 Factory* factory = isolate->factory();
1057 Zone zone;
1058
1059 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
1060 4);
1061
1062 m.WriteCurrentPositionToRegister(0, 0);
1063 m.AdvanceCurrentPosition(2);
1064 m.WriteCurrentPositionToRegister(1, 0);
1065 Label nomatch;
1066 m.CheckNotBackReference(0, false, &nomatch);
1067 m.Fail();
1068 m.Bind(&nomatch);
1069 m.AdvanceCurrentPosition(2);
1070 Label missing_match;
1071 m.CheckNotBackReference(0, false, &missing_match);
1072 m.WriteCurrentPositionToRegister(2, 0);
1073 m.Succeed();
1074 m.Bind(&missing_match);
1075 m.Fail();
1076
1077 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1078 Handle<Object> code_object = m.GetCode(source);
1079 Handle<Code> code = Handle<Code>::cast(code_object);
1080
1081 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1082 Handle<String> input = factory->NewStringFromTwoByte(
1083 Vector<const uc16>(input_data, 6)).ToHandleChecked();
1084 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1085 Address start_adr = seq_input->GetCharsAddress();
1086
1087 int output[4];
1088 NativeRegExpMacroAssembler::Result result =
1089 Execute(*code,
1090 *input,
1091 0,
1092 start_adr,
1093 start_adr + input->length() * 2,
1094 output);
1095
1096 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1097 CHECK_EQ(0, output[0]);
1098 CHECK_EQ(2, output[1]);
1099 CHECK_EQ(6, output[2]);
1100 CHECK_EQ(-1, output[3]);
1101 }
1102
1103
1104
TEST(MacroAssemblernativeAtStart)1105 TEST(MacroAssemblernativeAtStart) {
1106 v8::V8::Initialize();
1107 ContextInitializer initializer;
1108 Isolate* isolate = CcTest::i_isolate();
1109 Factory* factory = isolate->factory();
1110 Zone zone;
1111
1112 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1113 0);
1114
1115 Label not_at_start, newline, fail;
1116 m.CheckNotAtStart(0, ¬_at_start);
1117 // Check that prevchar = '\n' and current = 'f'.
1118 m.CheckCharacter('\n', &newline);
1119 m.Bind(&fail);
1120 m.Fail();
1121 m.Bind(&newline);
1122 m.LoadCurrentCharacter(0, &fail);
1123 m.CheckNotCharacter('f', &fail);
1124 m.Succeed();
1125
1126 m.Bind(¬_at_start);
1127 // Check that prevchar = 'o' and current = 'b'.
1128 Label prevo;
1129 m.CheckCharacter('o', &prevo);
1130 m.Fail();
1131 m.Bind(&prevo);
1132 m.LoadCurrentCharacter(0, &fail);
1133 m.CheckNotCharacter('b', &fail);
1134 m.Succeed();
1135
1136 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
1137 Handle<Object> code_object = m.GetCode(source);
1138 Handle<Code> code = Handle<Code>::cast(code_object);
1139
1140 Handle<String> input = factory->NewStringFromStaticChars("foobar");
1141 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1142 Address start_adr = seq_input->GetCharsAddress();
1143
1144 NativeRegExpMacroAssembler::Result result =
1145 Execute(*code,
1146 *input,
1147 0,
1148 start_adr,
1149 start_adr + input->length(),
1150 NULL);
1151
1152 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1153
1154 result = Execute(*code,
1155 *input,
1156 3,
1157 start_adr + 3,
1158 start_adr + input->length(),
1159 NULL);
1160
1161 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1162 }
1163
1164
TEST(MacroAssemblerNativeBackRefNoCase)1165 TEST(MacroAssemblerNativeBackRefNoCase) {
1166 v8::V8::Initialize();
1167 ContextInitializer initializer;
1168 Isolate* isolate = CcTest::i_isolate();
1169 Factory* factory = isolate->factory();
1170 Zone zone;
1171
1172 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1173 4);
1174
1175 Label fail, succ;
1176
1177 m.WriteCurrentPositionToRegister(0, 0);
1178 m.WriteCurrentPositionToRegister(2, 0);
1179 m.AdvanceCurrentPosition(3);
1180 m.WriteCurrentPositionToRegister(3, 0);
1181 m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "AbC".
1182 m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "ABC".
1183 Label expected_fail;
1184 m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
1185 m.Bind(&fail);
1186 m.Fail();
1187
1188 m.Bind(&expected_fail);
1189 m.AdvanceCurrentPosition(3); // Skip "xYz"
1190 m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
1191 m.Fail();
1192
1193 m.Bind(&succ);
1194 m.WriteCurrentPositionToRegister(1, 0);
1195 m.Succeed();
1196
1197 Handle<String> source =
1198 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
1199 Handle<Object> code_object = m.GetCode(source);
1200 Handle<Code> code = Handle<Code>::cast(code_object);
1201
1202 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1203 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1204 Address start_adr = seq_input->GetCharsAddress();
1205
1206 int output[4];
1207 NativeRegExpMacroAssembler::Result result =
1208 Execute(*code,
1209 *input,
1210 0,
1211 start_adr,
1212 start_adr + input->length(),
1213 output);
1214
1215 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1216 CHECK_EQ(0, output[0]);
1217 CHECK_EQ(12, output[1]);
1218 CHECK_EQ(0, output[2]);
1219 CHECK_EQ(3, output[3]);
1220 }
1221
1222
1223
TEST(MacroAssemblerNativeRegisters)1224 TEST(MacroAssemblerNativeRegisters) {
1225 v8::V8::Initialize();
1226 ContextInitializer initializer;
1227 Isolate* isolate = CcTest::i_isolate();
1228 Factory* factory = isolate->factory();
1229 Zone zone;
1230
1231 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1232 6);
1233
1234 uc16 foo_chars[3] = {'f', 'o', 'o'};
1235 Vector<const uc16> foo(foo_chars, 3);
1236
1237 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1238 Label fail;
1239 Label backtrack;
1240 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1241 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1242 m.PushBacktrack(&backtrack);
1243 m.WriteStackPointerToRegister(sp);
1244 // Fill stack and registers
1245 m.AdvanceCurrentPosition(2);
1246 m.WriteCurrentPositionToRegister(out1, 0);
1247 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1248 m.PushBacktrack(&fail);
1249 // Drop backtrack stack frames.
1250 m.ReadStackPointerFromRegister(sp);
1251 // And take the first backtrack (to &backtrack)
1252 m.Backtrack();
1253
1254 m.PushCurrentPosition();
1255 m.AdvanceCurrentPosition(2);
1256 m.PopCurrentPosition();
1257
1258 m.Bind(&backtrack);
1259 m.PopRegister(out1);
1260 m.ReadCurrentPositionFromRegister(out1);
1261 m.AdvanceCurrentPosition(3);
1262 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1263
1264 Label loop;
1265 m.SetRegister(loop_cnt, 0); // loop counter
1266 m.Bind(&loop);
1267 m.AdvanceRegister(loop_cnt, 1);
1268 m.AdvanceCurrentPosition(1);
1269 m.IfRegisterLT(loop_cnt, 3, &loop);
1270 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1271
1272 Label loop2;
1273 m.SetRegister(loop_cnt, 2); // loop counter
1274 m.Bind(&loop2);
1275 m.AdvanceRegister(loop_cnt, -1);
1276 m.AdvanceCurrentPosition(1);
1277 m.IfRegisterGE(loop_cnt, 0, &loop2);
1278 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1279
1280 Label loop3;
1281 Label exit_loop3;
1282 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1283 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1284 m.ReadCurrentPositionFromRegister(out3);
1285 m.Bind(&loop3);
1286 m.AdvanceCurrentPosition(1);
1287 m.CheckGreedyLoop(&exit_loop3);
1288 m.GoTo(&loop3);
1289 m.Bind(&exit_loop3);
1290 m.PopCurrentPosition();
1291 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1292
1293 m.Succeed();
1294
1295 m.Bind(&fail);
1296 m.Fail();
1297
1298 Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
1299 Handle<Object> code_object = m.GetCode(source);
1300 Handle<Code> code = Handle<Code>::cast(code_object);
1301
1302 // String long enough for test (content doesn't matter).
1303 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1304 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1305 Address start_adr = seq_input->GetCharsAddress();
1306
1307 int output[6];
1308 NativeRegExpMacroAssembler::Result result =
1309 Execute(*code,
1310 *input,
1311 0,
1312 start_adr,
1313 start_adr + input->length(),
1314 output);
1315
1316 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1317 CHECK_EQ(0, output[0]);
1318 CHECK_EQ(3, output[1]);
1319 CHECK_EQ(6, output[2]);
1320 CHECK_EQ(9, output[3]);
1321 CHECK_EQ(9, output[4]);
1322 CHECK_EQ(-1, output[5]);
1323 }
1324
1325
TEST(MacroAssemblerStackOverflow)1326 TEST(MacroAssemblerStackOverflow) {
1327 v8::V8::Initialize();
1328 ContextInitializer initializer;
1329 Isolate* isolate = CcTest::i_isolate();
1330 Factory* factory = isolate->factory();
1331 Zone zone;
1332
1333 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1334 0);
1335
1336 Label loop;
1337 m.Bind(&loop);
1338 m.PushBacktrack(&loop);
1339 m.GoTo(&loop);
1340
1341 Handle<String> source =
1342 factory->NewStringFromStaticChars("<stack overflow test>");
1343 Handle<Object> code_object = m.GetCode(source);
1344 Handle<Code> code = Handle<Code>::cast(code_object);
1345
1346 // String long enough for test (content doesn't matter).
1347 Handle<String> input = factory->NewStringFromStaticChars("dummy");
1348 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1349 Address start_adr = seq_input->GetCharsAddress();
1350
1351 NativeRegExpMacroAssembler::Result result =
1352 Execute(*code,
1353 *input,
1354 0,
1355 start_adr,
1356 start_adr + input->length(),
1357 NULL);
1358
1359 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1360 CHECK(isolate->has_pending_exception());
1361 isolate->clear_pending_exception();
1362 }
1363
1364
TEST(MacroAssemblerNativeLotsOfRegisters)1365 TEST(MacroAssemblerNativeLotsOfRegisters) {
1366 v8::V8::Initialize();
1367 ContextInitializer initializer;
1368 Isolate* isolate = CcTest::i_isolate();
1369 Factory* factory = isolate->factory();
1370 Zone zone;
1371
1372 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1373 2);
1374
1375 // At least 2048, to ensure the allocated space for registers
1376 // span one full page.
1377 const int large_number = 8000;
1378 m.WriteCurrentPositionToRegister(large_number, 42);
1379 m.WriteCurrentPositionToRegister(0, 0);
1380 m.WriteCurrentPositionToRegister(1, 1);
1381 Label done;
1382 m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
1383 m.Bind(&done);
1384 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1385 m.PopRegister(1);
1386 m.Succeed();
1387
1388 Handle<String> source =
1389 factory->NewStringFromStaticChars("<huge register space test>");
1390 Handle<Object> code_object = m.GetCode(source);
1391 Handle<Code> code = Handle<Code>::cast(code_object);
1392
1393 // String long enough for test (content doesn't matter).
1394 Handle<String> input = factory->NewStringFromStaticChars("sample text");
1395 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1396 Address start_adr = seq_input->GetCharsAddress();
1397
1398 int captures[2];
1399 NativeRegExpMacroAssembler::Result result =
1400 Execute(*code,
1401 *input,
1402 0,
1403 start_adr,
1404 start_adr + input->length(),
1405 captures);
1406
1407 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1408 CHECK_EQ(0, captures[0]);
1409 CHECK_EQ(42, captures[1]);
1410
1411 isolate->clear_pending_exception();
1412 }
1413
1414 #else // V8_INTERPRETED_REGEXP
1415
TEST(MacroAssembler)1416 TEST(MacroAssembler) {
1417 byte codes[1024];
1418 Zone zone;
1419 RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
1420 &zone);
1421 // ^f(o)o.
1422 Label start, fail, backtrack;
1423
1424 m.SetRegister(4, 42);
1425 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1426 m.AdvanceRegister(4, 42);
1427 m.GoTo(&start);
1428 m.Fail();
1429 m.Bind(&start);
1430 m.PushBacktrack(&fail);
1431 m.CheckNotAtStart(0, NULL);
1432 m.LoadCurrentCharacter(0, NULL);
1433 m.CheckNotCharacter('f', NULL);
1434 m.LoadCurrentCharacter(1, NULL);
1435 m.CheckNotCharacter('o', NULL);
1436 m.LoadCurrentCharacter(2, NULL);
1437 m.CheckNotCharacter('o', NULL);
1438 m.WriteCurrentPositionToRegister(0, 0);
1439 m.WriteCurrentPositionToRegister(1, 3);
1440 m.WriteCurrentPositionToRegister(2, 1);
1441 m.WriteCurrentPositionToRegister(3, 2);
1442 m.AdvanceCurrentPosition(3);
1443 m.PushBacktrack(&backtrack);
1444 m.Succeed();
1445 m.Bind(&backtrack);
1446 m.ClearRegisters(2, 3);
1447 m.Backtrack();
1448 m.Bind(&fail);
1449 m.PopRegister(0);
1450 m.Fail();
1451
1452 Isolate* isolate = CcTest::i_isolate();
1453 Factory* factory = isolate->factory();
1454 HandleScope scope(isolate);
1455
1456 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
1457 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1458 int captures[5];
1459
1460 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1461 Handle<String> f1_16 = factory->NewStringFromTwoByte(
1462 Vector<const uc16>(str1, 6)).ToHandleChecked();
1463
1464 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1465 CHECK_EQ(0, captures[0]);
1466 CHECK_EQ(3, captures[1]);
1467 CHECK_EQ(1, captures[2]);
1468 CHECK_EQ(2, captures[3]);
1469 CHECK_EQ(84, captures[4]);
1470
1471 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1472 Handle<String> f2_16 = factory->NewStringFromTwoByte(
1473 Vector<const uc16>(str2, 6)).ToHandleChecked();
1474
1475 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1476 CHECK_EQ(42, captures[0]);
1477 }
1478
1479 #endif // V8_INTERPRETED_REGEXP
1480
1481
TEST(AddInverseToTable)1482 TEST(AddInverseToTable) {
1483 static const int kLimit = 1000;
1484 static const int kRangeCount = 16;
1485 for (int t = 0; t < 10; t++) {
1486 Zone zone;
1487 ZoneList<CharacterRange>* ranges =
1488 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1489 for (int i = 0; i < kRangeCount; i++) {
1490 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1491 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1492 if (to > kLimit) to = kLimit;
1493 ranges->Add(CharacterRange(from, to), &zone);
1494 }
1495 DispatchTable table(&zone);
1496 DispatchTableConstructor cons(&table, false, &zone);
1497 cons.set_choice_index(0);
1498 cons.AddInverse(ranges);
1499 for (int i = 0; i < kLimit; i++) {
1500 bool is_on = false;
1501 for (int j = 0; !is_on && j < kRangeCount; j++)
1502 is_on = ranges->at(j).Contains(i);
1503 OutSet* set = table.Get(i);
1504 CHECK_EQ(is_on, set->Get(0) == false);
1505 }
1506 }
1507 Zone zone;
1508 ZoneList<CharacterRange>* ranges =
1509 new(&zone) ZoneList<CharacterRange>(1, &zone);
1510 ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
1511 DispatchTable table(&zone);
1512 DispatchTableConstructor cons(&table, false, &zone);
1513 cons.set_choice_index(0);
1514 cons.AddInverse(ranges);
1515 CHECK(!table.Get(0xFFFE)->Get(0));
1516 CHECK(table.Get(0xFFFF)->Get(0));
1517 }
1518
1519
canonicalize(uc32 c)1520 static uc32 canonicalize(uc32 c) {
1521 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1522 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1523 if (count == 0) {
1524 return c;
1525 } else {
1526 CHECK_EQ(1, count);
1527 return canon[0];
1528 }
1529 }
1530
1531
TEST(LatinCanonicalize)1532 TEST(LatinCanonicalize) {
1533 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1534 for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
1535 unibrow::uchar upper = lower + ('A' - 'a');
1536 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1537 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1538 int length = un_canonicalize.get(lower, '\0', uncanon);
1539 CHECK_EQ(2, length);
1540 CHECK_EQ(upper, uncanon[0]);
1541 CHECK_EQ(lower, uncanon[1]);
1542 }
1543 for (uc32 c = 128; c < (1 << 21); c++)
1544 CHECK_GE(canonicalize(c), 128);
1545 unibrow::Mapping<unibrow::ToUppercase> to_upper;
1546 // Canonicalization is only defined for the Basic Multilingual Plane.
1547 for (uc32 c = 0; c < (1 << 16); c++) {
1548 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1549 int length = to_upper.get(c, '\0', upper);
1550 if (length == 0) {
1551 length = 1;
1552 upper[0] = c;
1553 }
1554 uc32 u = upper[0];
1555 if (length > 1 || (c >= 128 && u < 128))
1556 u = c;
1557 CHECK_EQ(u, canonicalize(c));
1558 }
1559 }
1560
1561
CanonRangeEnd(uc32 c)1562 static uc32 CanonRangeEnd(uc32 c) {
1563 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1564 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1565 if (count == 0) {
1566 return c;
1567 } else {
1568 CHECK_EQ(1, count);
1569 return canon[0];
1570 }
1571 }
1572
1573
TEST(RangeCanonicalization)1574 TEST(RangeCanonicalization) {
1575 // Check that we arrive at the same result when using the basic
1576 // range canonicalization primitives as when using immediate
1577 // canonicalization.
1578 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1579 int block_start = 0;
1580 while (block_start <= 0xFFFF) {
1581 uc32 block_end = CanonRangeEnd(block_start);
1582 unsigned block_length = block_end - block_start + 1;
1583 if (block_length > 1) {
1584 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1585 int first_length = un_canonicalize.get(block_start, '\0', first);
1586 for (unsigned i = 1; i < block_length; i++) {
1587 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1588 int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1589 CHECK_EQ(first_length, succ_length);
1590 for (int j = 0; j < succ_length; j++) {
1591 int calc = first[j] + i;
1592 int found = succ[j];
1593 CHECK_EQ(calc, found);
1594 }
1595 }
1596 }
1597 block_start = block_start + block_length;
1598 }
1599 }
1600
1601
TEST(UncanonicalizeEquivalence)1602 TEST(UncanonicalizeEquivalence) {
1603 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1604 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1605 for (int i = 0; i < (1 << 16); i++) {
1606 int length = un_canonicalize.get(i, '\0', chars);
1607 for (int j = 0; j < length; j++) {
1608 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1609 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1610 CHECK_EQ(length, length2);
1611 for (int k = 0; k < length; k++)
1612 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1613 }
1614 }
1615 }
1616
1617
TestRangeCaseIndependence(Isolate * isolate,CharacterRange input,Vector<CharacterRange> expected)1618 static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
1619 Vector<CharacterRange> expected) {
1620 Zone zone;
1621 int count = expected.length();
1622 ZoneList<CharacterRange>* list =
1623 new(&zone) ZoneList<CharacterRange>(count, &zone);
1624 input.AddCaseEquivalents(isolate, &zone, list, false);
1625 CHECK_EQ(count, list->length());
1626 for (int i = 0; i < list->length(); i++) {
1627 CHECK_EQ(expected[i].from(), list->at(i).from());
1628 CHECK_EQ(expected[i].to(), list->at(i).to());
1629 }
1630 }
1631
1632
TestSimpleRangeCaseIndependence(Isolate * isolate,CharacterRange input,CharacterRange expected)1633 static void TestSimpleRangeCaseIndependence(Isolate* isolate,
1634 CharacterRange input,
1635 CharacterRange expected) {
1636 EmbeddedVector<CharacterRange, 1> vector;
1637 vector[0] = expected;
1638 TestRangeCaseIndependence(isolate, input, vector);
1639 }
1640
1641
TEST(CharacterRangeCaseIndependence)1642 TEST(CharacterRangeCaseIndependence) {
1643 Isolate* isolate = CcTest::i_isolate();
1644 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
1645 CharacterRange::Singleton('A'));
1646 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
1647 CharacterRange::Singleton('Z'));
1648 TestSimpleRangeCaseIndependence(isolate, CharacterRange('a', 'z'),
1649 CharacterRange('A', 'Z'));
1650 TestSimpleRangeCaseIndependence(isolate, CharacterRange('c', 'f'),
1651 CharacterRange('C', 'F'));
1652 TestSimpleRangeCaseIndependence(isolate, CharacterRange('a', 'b'),
1653 CharacterRange('A', 'B'));
1654 TestSimpleRangeCaseIndependence(isolate, CharacterRange('y', 'z'),
1655 CharacterRange('Y', 'Z'));
1656 TestSimpleRangeCaseIndependence(isolate, CharacterRange('a' - 1, 'z' + 1),
1657 CharacterRange('A', 'Z'));
1658 TestSimpleRangeCaseIndependence(isolate, CharacterRange('A', 'Z'),
1659 CharacterRange('a', 'z'));
1660 TestSimpleRangeCaseIndependence(isolate, CharacterRange('C', 'F'),
1661 CharacterRange('c', 'f'));
1662 TestSimpleRangeCaseIndependence(isolate, CharacterRange('A' - 1, 'Z' + 1),
1663 CharacterRange('a', 'z'));
1664 // Here we need to add [l-z] to complete the case independence of
1665 // [A-Za-z] but we expect [a-z] to be added since we always add a
1666 // whole block at a time.
1667 TestSimpleRangeCaseIndependence(isolate, CharacterRange('A', 'k'),
1668 CharacterRange('a', 'z'));
1669 }
1670
1671
InClass(uc16 c,ZoneList<CharacterRange> * ranges)1672 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1673 if (ranges == NULL)
1674 return false;
1675 for (int i = 0; i < ranges->length(); i++) {
1676 CharacterRange range = ranges->at(i);
1677 if (range.from() <= c && c <= range.to())
1678 return true;
1679 }
1680 return false;
1681 }
1682
1683
TEST(CharClassDifference)1684 TEST(CharClassDifference) {
1685 Zone zone;
1686 ZoneList<CharacterRange>* base =
1687 new(&zone) ZoneList<CharacterRange>(1, &zone);
1688 base->Add(CharacterRange::Everything(), &zone);
1689 Vector<const int> overlay = CharacterRange::GetWordBounds();
1690 ZoneList<CharacterRange>* included = NULL;
1691 ZoneList<CharacterRange>* excluded = NULL;
1692 CharacterRange::Split(base, overlay, &included, &excluded, &zone);
1693 for (int i = 0; i < (1 << 16); i++) {
1694 bool in_base = InClass(i, base);
1695 if (in_base) {
1696 bool in_overlay = false;
1697 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1698 if (overlay[j] <= i && i < overlay[j+1])
1699 in_overlay = true;
1700 }
1701 CHECK_EQ(in_overlay, InClass(i, included));
1702 CHECK_EQ(!in_overlay, InClass(i, excluded));
1703 } else {
1704 CHECK(!InClass(i, included));
1705 CHECK(!InClass(i, excluded));
1706 }
1707 }
1708 }
1709
1710
TEST(CanonicalizeCharacterSets)1711 TEST(CanonicalizeCharacterSets) {
1712 Zone zone;
1713 ZoneList<CharacterRange>* list =
1714 new(&zone) ZoneList<CharacterRange>(4, &zone);
1715 CharacterSet set(list);
1716
1717 list->Add(CharacterRange(10, 20), &zone);
1718 list->Add(CharacterRange(30, 40), &zone);
1719 list->Add(CharacterRange(50, 60), &zone);
1720 set.Canonicalize();
1721 CHECK_EQ(3, list->length());
1722 CHECK_EQ(10, list->at(0).from());
1723 CHECK_EQ(20, list->at(0).to());
1724 CHECK_EQ(30, list->at(1).from());
1725 CHECK_EQ(40, list->at(1).to());
1726 CHECK_EQ(50, list->at(2).from());
1727 CHECK_EQ(60, list->at(2).to());
1728
1729 list->Rewind(0);
1730 list->Add(CharacterRange(10, 20), &zone);
1731 list->Add(CharacterRange(50, 60), &zone);
1732 list->Add(CharacterRange(30, 40), &zone);
1733 set.Canonicalize();
1734 CHECK_EQ(3, list->length());
1735 CHECK_EQ(10, list->at(0).from());
1736 CHECK_EQ(20, list->at(0).to());
1737 CHECK_EQ(30, list->at(1).from());
1738 CHECK_EQ(40, list->at(1).to());
1739 CHECK_EQ(50, list->at(2).from());
1740 CHECK_EQ(60, list->at(2).to());
1741
1742 list->Rewind(0);
1743 list->Add(CharacterRange(30, 40), &zone);
1744 list->Add(CharacterRange(10, 20), &zone);
1745 list->Add(CharacterRange(25, 25), &zone);
1746 list->Add(CharacterRange(100, 100), &zone);
1747 list->Add(CharacterRange(1, 1), &zone);
1748 set.Canonicalize();
1749 CHECK_EQ(5, list->length());
1750 CHECK_EQ(1, list->at(0).from());
1751 CHECK_EQ(1, list->at(0).to());
1752 CHECK_EQ(10, list->at(1).from());
1753 CHECK_EQ(20, list->at(1).to());
1754 CHECK_EQ(25, list->at(2).from());
1755 CHECK_EQ(25, list->at(2).to());
1756 CHECK_EQ(30, list->at(3).from());
1757 CHECK_EQ(40, list->at(3).to());
1758 CHECK_EQ(100, list->at(4).from());
1759 CHECK_EQ(100, list->at(4).to());
1760
1761 list->Rewind(0);
1762 list->Add(CharacterRange(10, 19), &zone);
1763 list->Add(CharacterRange(21, 30), &zone);
1764 list->Add(CharacterRange(20, 20), &zone);
1765 set.Canonicalize();
1766 CHECK_EQ(1, list->length());
1767 CHECK_EQ(10, list->at(0).from());
1768 CHECK_EQ(30, list->at(0).to());
1769 }
1770
1771
TEST(CharacterRangeMerge)1772 TEST(CharacterRangeMerge) {
1773 Zone zone;
1774 ZoneList<CharacterRange> l1(4, &zone);
1775 ZoneList<CharacterRange> l2(4, &zone);
1776 // Create all combinations of intersections of ranges, both singletons and
1777 // longer.
1778
1779 int offset = 0;
1780
1781 // The five kinds of singleton intersections:
1782 // X
1783 // Y - outside before
1784 // Y - outside touching start
1785 // Y - overlap
1786 // Y - outside touching end
1787 // Y - outside after
1788
1789 for (int i = 0; i < 5; i++) {
1790 l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1791 l2.Add(CharacterRange::Singleton(offset + i), &zone);
1792 offset += 6;
1793 }
1794
1795 // The seven kinds of singleton/non-singleton intersections:
1796 // XXX
1797 // Y - outside before
1798 // Y - outside touching start
1799 // Y - inside touching start
1800 // Y - entirely inside
1801 // Y - inside touching end
1802 // Y - outside touching end
1803 // Y - disjoint after
1804
1805 for (int i = 0; i < 7; i++) {
1806 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1807 l2.Add(CharacterRange::Singleton(offset + i), &zone);
1808 offset += 8;
1809 }
1810
1811 // The eleven kinds of non-singleton intersections:
1812 //
1813 // XXXXXXXX
1814 // YYYY - outside before.
1815 // YYYY - outside touching start.
1816 // YYYY - overlapping start
1817 // YYYY - inside touching start
1818 // YYYY - entirely inside
1819 // YYYY - inside touching end
1820 // YYYY - overlapping end
1821 // YYYY - outside touching end
1822 // YYYY - outside after
1823 // YYYYYYYY - identical
1824 // YYYYYYYYYYYY - containing entirely.
1825
1826 for (int i = 0; i < 9; i++) {
1827 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1828 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1829 offset += 22;
1830 }
1831 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1832 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1833 offset += 22;
1834 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1835 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1836 offset += 22;
1837
1838 // Different kinds of multi-range overlap:
1839 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1840 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1841
1842 l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1843 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1844 for (int i = 0; i < 6; i++) {
1845 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1846 l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1847 offset += 9;
1848 }
1849
1850 CHECK(CharacterRange::IsCanonical(&l1));
1851 CHECK(CharacterRange::IsCanonical(&l2));
1852
1853 ZoneList<CharacterRange> first_only(4, &zone);
1854 ZoneList<CharacterRange> second_only(4, &zone);
1855 ZoneList<CharacterRange> both(4, &zone);
1856 }
1857
1858
TEST(Graph)1859 TEST(Graph) {
1860 Execute("\\b\\w+\\b", false, true, true);
1861 }
1862
1863
1864 namespace {
1865
1866 int* global_use_counts = NULL;
1867
MockUseCounterCallback(v8::Isolate * isolate,v8::Isolate::UseCounterFeature feature)1868 void MockUseCounterCallback(v8::Isolate* isolate,
1869 v8::Isolate::UseCounterFeature feature) {
1870 ++global_use_counts[feature];
1871 }
1872 }
1873
1874
1875 // Test that ES2015 RegExp compatibility fixes are in place, that they
1876 // are not overly broad, and the appropriate UseCounters are incremented
TEST(UseCountRegExp)1877 TEST(UseCountRegExp) {
1878 i::FLAG_harmony_regexps = true;
1879 v8::Isolate* isolate = CcTest::isolate();
1880 v8::HandleScope scope(isolate);
1881 LocalContext env;
1882 int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
1883 global_use_counts = use_counts;
1884 CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
1885
1886 // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
1887 v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
1888 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1889 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1890 CHECK(resultSticky->IsUndefined());
1891
1892 // re.sticky has approriate value and doesn't touch UseCounter
1893 v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
1894 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1895 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1896 CHECK(resultReSticky->IsFalse());
1897
1898 // When the getter is caleld on another object, throw an exception
1899 // and don't increment the UseCounter
1900 v8::Local<v8::Value> resultStickyError = CompileRun(
1901 "var exception;"
1902 "try { "
1903 " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
1904 " .get.call(null);"
1905 "} catch (e) {"
1906 " exception = e;"
1907 "}"
1908 "exception");
1909 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1910 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1911 CHECK(resultStickyError->IsObject());
1912
1913 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
1914 // a UseCounter is incremented to track it.
1915 v8::Local<v8::Value> resultToString =
1916 CompileRun("RegExp.prototype.toString().length");
1917 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1918 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1919 CHECK(resultToString->IsInt32());
1920 CHECK_EQ(6,
1921 resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1922
1923 // .toString() works on normal RegExps
1924 v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
1925 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1926 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1927 CHECK(resultReToString->IsInt32());
1928 CHECK_EQ(
1929 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1930
1931 // .toString() throws on non-RegExps that aren't RegExp.prototype
1932 v8::Local<v8::Value> resultToStringError = CompileRun(
1933 "var exception;"
1934 "try { RegExp.prototype.toString.call(null) }"
1935 "catch (e) { exception = e; }"
1936 "exception");
1937 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1938 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1939 CHECK(resultToStringError->IsObject());
1940 }
1941