1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include <cstdlib>
29 #include <sstream>
30
31 #include "include/v8.h"
32 #include "src/v8.h"
33
34 #include "src/ast/ast.h"
35 #include "src/char-predicates-inl.h"
36 #include "src/ostreams.h"
37 #include "src/regexp/jsregexp.h"
38 #include "src/regexp/regexp-macro-assembler.h"
39 #include "src/regexp/regexp-macro-assembler-irregexp.h"
40 #include "src/regexp/regexp-parser.h"
41 #include "src/splay-tree-inl.h"
42 #include "src/string-stream.h"
43 #ifdef V8_INTERPRETED_REGEXP
44 #include "src/regexp/interpreter-irregexp.h"
45 #else // V8_INTERPRETED_REGEXP
46 #include "src/macro-assembler.h"
47 #if V8_TARGET_ARCH_ARM
48 #include "src/arm/assembler-arm.h" // NOLINT
49 #include "src/arm/macro-assembler-arm.h"
50 #include "src/regexp/arm/regexp-macro-assembler-arm.h"
51 #endif
52 #if V8_TARGET_ARCH_ARM64
53 #include "src/arm64/assembler-arm64.h"
54 #include "src/arm64/macro-assembler-arm64.h"
55 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
56 #endif
57 #if V8_TARGET_ARCH_S390
58 #include "src/regexp/s390/regexp-macro-assembler-s390.h"
59 #include "src/s390/assembler-s390.h"
60 #include "src/s390/macro-assembler-s390.h"
61 #endif
62 #if V8_TARGET_ARCH_PPC
63 #include "src/ppc/assembler-ppc.h"
64 #include "src/ppc/macro-assembler-ppc.h"
65 #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
66 #endif
67 #if V8_TARGET_ARCH_MIPS
68 #include "src/mips/assembler-mips.h"
69 #include "src/mips/macro-assembler-mips.h"
70 #include "src/regexp/mips/regexp-macro-assembler-mips.h"
71 #endif
72 #if V8_TARGET_ARCH_MIPS64
73 #include "src/mips64/assembler-mips64.h"
74 #include "src/mips64/macro-assembler-mips64.h"
75 #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
76 #endif
77 #if V8_TARGET_ARCH_X64
78 #include "src/regexp/x64/regexp-macro-assembler-x64.h"
79 #include "src/x64/assembler-x64.h"
80 #include "src/x64/macro-assembler-x64.h"
81 #endif
82 #if V8_TARGET_ARCH_IA32
83 #include "src/ia32/assembler-ia32.h"
84 #include "src/ia32/macro-assembler-ia32.h"
85 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
86 #endif
87 #if V8_TARGET_ARCH_X87
88 #include "src/regexp/x87/regexp-macro-assembler-x87.h"
89 #include "src/x87/assembler-x87.h"
90 #include "src/x87/macro-assembler-x87.h"
91 #endif
92 #endif // V8_INTERPRETED_REGEXP
93 #include "test/cctest/cctest.h"
94
95 using namespace v8::internal;
96
97
CheckParse(const char * input)98 static bool CheckParse(const char* input) {
99 v8::HandleScope scope(CcTest::isolate());
100 Zone zone(CcTest::i_isolate()->allocator());
101 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
102 RegExpCompileData result;
103 return v8::internal::RegExpParser::ParseRegExp(
104 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
105 }
106
107
CheckParseEq(const char * input,const char * expected,bool unicode=false)108 static void CheckParseEq(const char* input, const char* expected,
109 bool unicode = false) {
110 v8::HandleScope scope(CcTest::isolate());
111 Zone zone(CcTest::i_isolate()->allocator());
112 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
113 RegExpCompileData result;
114 JSRegExp::Flags flags = JSRegExp::kNone;
115 if (unicode) flags |= JSRegExp::kUnicode;
116 CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
117 &reader, flags, &result));
118 CHECK(result.tree != NULL);
119 CHECK(result.error.is_null());
120 std::ostringstream os;
121 result.tree->Print(os, &zone);
122 if (strcmp(expected, os.str().c_str()) != 0) {
123 printf("%s | %s\n", expected, os.str().c_str());
124 }
125 CHECK_EQ(0, strcmp(expected, os.str().c_str()));
126 }
127
128
CheckSimple(const char * input)129 static bool CheckSimple(const char* input) {
130 v8::HandleScope scope(CcTest::isolate());
131 Zone zone(CcTest::i_isolate()->allocator());
132 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
133 RegExpCompileData result;
134 CHECK(v8::internal::RegExpParser::ParseRegExp(
135 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
136 CHECK(result.tree != NULL);
137 CHECK(result.error.is_null());
138 return result.simple;
139 }
140
141 struct MinMaxPair {
142 int min_match;
143 int max_match;
144 };
145
146
CheckMinMaxMatch(const char * input)147 static MinMaxPair CheckMinMaxMatch(const char* input) {
148 v8::HandleScope scope(CcTest::isolate());
149 Zone zone(CcTest::i_isolate()->allocator());
150 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
151 RegExpCompileData result;
152 CHECK(v8::internal::RegExpParser::ParseRegExp(
153 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
154 CHECK(result.tree != NULL);
155 CHECK(result.error.is_null());
156 int min_match = result.tree->min_match();
157 int max_match = result.tree->max_match();
158 MinMaxPair pair = { min_match, max_match };
159 return pair;
160 }
161
162
163 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
164 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
165 #define CHECK_MIN_MAX(input, min, max) \
166 { MinMaxPair min_max = CheckMinMaxMatch(input); \
167 CHECK_EQ(min, min_max.min_match); \
168 CHECK_EQ(max, min_max.max_match); \
169 }
170
171
TestRegExpParser(bool lookbehind)172 void TestRegExpParser(bool lookbehind) {
173 FLAG_harmony_regexp_lookbehind = lookbehind;
174
175 CHECK_PARSE_ERROR("?");
176
177 CheckParseEq("abc", "'abc'");
178 CheckParseEq("", "%");
179 CheckParseEq("abc|def", "(| 'abc' 'def')");
180 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
181 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
182 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
183 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
184 CheckParseEq("a*", "(# 0 - g 'a')");
185 CheckParseEq("a*?", "(# 0 - n 'a')");
186 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
187 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
188 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
189 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
190 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
191 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
192 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
193 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
194 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
195 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
196 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
197 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
198 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
199 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
200 CheckParseEq("(?:foo)", "'foo'");
201 CheckParseEq("(?: foo )", "' foo '");
202 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
203 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
204 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
205 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
206 if (lookbehind) {
207 CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
208 CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
209 } else {
210 CHECK_PARSE_ERROR("foo(?<=bar)baz");
211 CHECK_PARSE_ERROR("foo(?<!bar)baz");
212 }
213 CheckParseEq("()", "(^ %)");
214 CheckParseEq("(?=)", "(-> + %)");
215 CheckParseEq("[]", "^[\\x00-\\u{10ffff}]"); // Doesn't compile on windows
216 CheckParseEq("[^]", "[\\x00-\\u{10ffff}]"); // \uffff isn't in codepage 1252
217 CheckParseEq("[x]", "[x]");
218 CheckParseEq("[xyz]", "[x y z]");
219 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
220 CheckParseEq("[-123]", "[- 1 2 3]");
221 CheckParseEq("[^123]", "^[1 2 3]");
222 CheckParseEq("]", "']'");
223 CheckParseEq("}", "'}'");
224 CheckParseEq("[a-b-c]", "[a-b - c]");
225 CheckParseEq("[\\d]", "[0-9]");
226 CheckParseEq("[x\\dz]", "[x 0-9 z]");
227 CheckParseEq("[\\d-z]", "[0-9 - z]");
228 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
229 CheckParseEq("[z-\\d]", "[z - 0-9]");
230 // Control character outside character class.
231 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
232 CheckParseEq("\\c!", "'\\c!'");
233 CheckParseEq("\\c_", "'\\c_'");
234 CheckParseEq("\\c~", "'\\c~'");
235 CheckParseEq("\\c1", "'\\c1'");
236 // Control character inside character class.
237 CheckParseEq("[\\c!]", "[\\ c !]");
238 CheckParseEq("[\\c_]", "[\\x1f]");
239 CheckParseEq("[\\c~]", "[\\ c ~]");
240 CheckParseEq("[\\ca]", "[\\x01]");
241 CheckParseEq("[\\cz]", "[\\x1a]");
242 CheckParseEq("[\\cA]", "[\\x01]");
243 CheckParseEq("[\\cZ]", "[\\x1a]");
244 CheckParseEq("[\\c1]", "[\\x11]");
245
246 CheckParseEq("[a\\]c]", "[a ] c]");
247 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
248 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
249 CheckParseEq("\\0", "'\\x00'");
250 CheckParseEq("\\8", "'8'");
251 CheckParseEq("\\9", "'9'");
252 CheckParseEq("\\11", "'\\x09'");
253 CheckParseEq("\\11a", "'\\x09a'");
254 CheckParseEq("\\011", "'\\x09'");
255 CheckParseEq("\\00011", "'\\x0011'");
256 CheckParseEq("\\118", "'\\x098'");
257 CheckParseEq("\\111", "'I'");
258 CheckParseEq("\\1111", "'I1'");
259 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
260 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
261 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
262 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
263 CheckParseEq("(x)(x)(x)\\1*",
264 "(: (^ 'x') (^ 'x') (^ 'x')"
265 " (# 0 - g (<- 1)))");
266 CheckParseEq("(x)(x)(x)\\2*",
267 "(: (^ 'x') (^ 'x') (^ 'x')"
268 " (# 0 - g (<- 2)))");
269 CheckParseEq("(x)(x)(x)\\3*",
270 "(: (^ 'x') (^ 'x') (^ 'x')"
271 " (# 0 - g (<- 3)))");
272 CheckParseEq("(x)(x)(x)\\4*",
273 "(: (^ 'x') (^ 'x') (^ 'x')"
274 " (# 0 - g '\\x04'))");
275 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
276 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
277 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
278 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
279 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
280 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
281 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
282 CheckParseEq("(a\\1)", "(^ 'a')");
283 CheckParseEq("(\\1a)", "(^ 'a')");
284 CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
285 CheckParseEq("(?=a)?a", "'a'");
286 CheckParseEq("(?=a){0,10}a", "'a'");
287 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
288 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
289 CheckParseEq("(?!a)?a", "'a'");
290 CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
291 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
292 CheckParseEq("(?!\\1(a\\1)\\1)\\1",
293 "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
294 CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
295 "(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))");
296 if (lookbehind) {
297 CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
298 "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
299 }
300 CheckParseEq("[\\0]", "[\\x00]");
301 CheckParseEq("[\\11]", "[\\x09]");
302 CheckParseEq("[\\11a]", "[\\x09 a]");
303 CheckParseEq("[\\011]", "[\\x09]");
304 CheckParseEq("[\\00011]", "[\\x00 1 1]");
305 CheckParseEq("[\\118]", "[\\x09 8]");
306 CheckParseEq("[\\111]", "[I]");
307 CheckParseEq("[\\1111]", "[I 1]");
308 CheckParseEq("\\x34", "'\x34'");
309 CheckParseEq("\\x60", "'\x60'");
310 CheckParseEq("\\x3z", "'x3z'");
311 CheckParseEq("\\c", "'\\c'");
312 CheckParseEq("\\u0034", "'\x34'");
313 CheckParseEq("\\u003z", "'u003z'");
314 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
315
316 // Unicode regexps
317 CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
318 CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
319 true);
320 CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
321 true);
322 CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
323 CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
324
325 CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
326 CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
327 true);
328
329 CHECK_SIMPLE("", false);
330 CHECK_SIMPLE("a", true);
331 CHECK_SIMPLE("a|b", false);
332 CHECK_SIMPLE("a\\n", false);
333 CHECK_SIMPLE("^a", false);
334 CHECK_SIMPLE("a$", false);
335 CHECK_SIMPLE("a\\b!", false);
336 CHECK_SIMPLE("a\\Bb", false);
337 CHECK_SIMPLE("a*", false);
338 CHECK_SIMPLE("a*?", false);
339 CHECK_SIMPLE("a?", false);
340 CHECK_SIMPLE("a??", false);
341 CHECK_SIMPLE("a{0,1}?", false);
342 CHECK_SIMPLE("a{1,1}?", false);
343 CHECK_SIMPLE("a{1,2}?", false);
344 CHECK_SIMPLE("a+?", false);
345 CHECK_SIMPLE("(a)", false);
346 CHECK_SIMPLE("(a)\\1", false);
347 CHECK_SIMPLE("(\\1a)", false);
348 CHECK_SIMPLE("\\1(a)", false);
349 CHECK_SIMPLE("a\\s", false);
350 CHECK_SIMPLE("a\\S", false);
351 CHECK_SIMPLE("a\\d", false);
352 CHECK_SIMPLE("a\\D", false);
353 CHECK_SIMPLE("a\\w", false);
354 CHECK_SIMPLE("a\\W", false);
355 CHECK_SIMPLE("a.", false);
356 CHECK_SIMPLE("a\\q", false);
357 CHECK_SIMPLE("a[a]", false);
358 CHECK_SIMPLE("a[^a]", false);
359 CHECK_SIMPLE("a[a-z]", false);
360 CHECK_SIMPLE("a[\\q]", false);
361 CHECK_SIMPLE("a(?:b)", false);
362 CHECK_SIMPLE("a(?=b)", false);
363 CHECK_SIMPLE("a(?!b)", false);
364 CHECK_SIMPLE("\\x60", false);
365 CHECK_SIMPLE("\\u0060", false);
366 CHECK_SIMPLE("\\cA", false);
367 CHECK_SIMPLE("\\q", false);
368 CHECK_SIMPLE("\\1112", false);
369 CHECK_SIMPLE("\\0", false);
370 CHECK_SIMPLE("(a)\\1", false);
371 CHECK_SIMPLE("(?=a)?a", false);
372 CHECK_SIMPLE("(?!a)?a\\1", false);
373 CHECK_SIMPLE("(?:(?=a))a\\1", false);
374
375 CheckParseEq("a{}", "'a{}'");
376 CheckParseEq("a{,}", "'a{,}'");
377 CheckParseEq("a{", "'a{'");
378 CheckParseEq("a{z}", "'a{z}'");
379 CheckParseEq("a{1z}", "'a{1z}'");
380 CheckParseEq("a{12z}", "'a{12z}'");
381 CheckParseEq("a{12,", "'a{12,'");
382 CheckParseEq("a{12,3b", "'a{12,3b'");
383 CheckParseEq("{}", "'{}'");
384 CheckParseEq("{,}", "'{,}'");
385 CheckParseEq("{", "'{'");
386 CheckParseEq("{z}", "'{z}'");
387 CheckParseEq("{1z}", "'{1z}'");
388 CheckParseEq("{12z}", "'{12z}'");
389 CheckParseEq("{12,", "'{12,'");
390 CheckParseEq("{12,3b", "'{12,3b'");
391
392 CHECK_MIN_MAX("a", 1, 1);
393 CHECK_MIN_MAX("abc", 3, 3);
394 CHECK_MIN_MAX("a[bc]d", 3, 3);
395 CHECK_MIN_MAX("a|bc", 1, 2);
396 CHECK_MIN_MAX("ab|c", 1, 2);
397 CHECK_MIN_MAX("a||bc", 0, 2);
398 CHECK_MIN_MAX("|", 0, 0);
399 CHECK_MIN_MAX("(?:ab)", 2, 2);
400 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
401 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
402 CHECK_MIN_MAX("(ab)", 2, 2);
403 CHECK_MIN_MAX("(ab|cde)", 2, 3);
404 CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
405 CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
406 CHECK_MIN_MAX("(?:ab)?", 0, 2);
407 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
408 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
409 CHECK_MIN_MAX("a?", 0, 1);
410 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
411 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
412 CHECK_MIN_MAX("a??", 0, 1);
413 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
414 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
415 CHECK_MIN_MAX("(?:a?)?", 0, 1);
416 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
417 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
418 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
419 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
420 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
421 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
422 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
423 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
424 CHECK_MIN_MAX("a{0}", 0, 0);
425 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
426 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
427 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
428 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
429 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
430 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
431 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
432 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
433 CHECK_MIN_MAX("a\\bc", 2, 2);
434 CHECK_MIN_MAX("a\\Bc", 2, 2);
435 CHECK_MIN_MAX("a\\sc", 3, 3);
436 CHECK_MIN_MAX("a\\Sc", 3, 3);
437 CHECK_MIN_MAX("a(?=b)c", 2, 2);
438 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
439 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
440
441 FLAG_harmony_regexp_named_captures = true;
442 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
443 "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
444 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
445 "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
446 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
447 "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
448 CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
449 CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
450 CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
451 CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
452 CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
453
454 CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
455 CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
456 FLAG_harmony_regexp_named_captures = false;
457 }
458
459
TEST(ParserWithLookbehind)460 TEST(ParserWithLookbehind) {
461 TestRegExpParser(true); // Lookbehind enabled.
462 }
463
464
TEST(ParserWithoutLookbehind)465 TEST(ParserWithoutLookbehind) {
466 TestRegExpParser(true); // Lookbehind enabled.
467 }
468
TEST(ParserRegression)469 TEST(ParserRegression) {
470 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
471 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
472 CheckParseEq("{", "'{'");
473 CheckParseEq("a|", "(| 'a' %)");
474 }
475
ExpectError(const char * input,const char * expected,bool unicode=false)476 static void ExpectError(const char* input, const char* expected,
477 bool unicode = false) {
478 v8::HandleScope scope(CcTest::isolate());
479 Zone zone(CcTest::i_isolate()->allocator());
480 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
481 RegExpCompileData result;
482 JSRegExp::Flags flags = JSRegExp::kNone;
483 if (unicode) flags |= JSRegExp::kUnicode;
484 CHECK(!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
485 &reader, flags, &result));
486 CHECK(result.tree == NULL);
487 CHECK(!result.error.is_null());
488 v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
489 CHECK_EQ(0, strcmp(expected, str.get()));
490 }
491
492
TEST(Errors)493 TEST(Errors) {
494 const char* kEndBackslash = "\\ at end of pattern";
495 ExpectError("\\", kEndBackslash);
496 const char* kUnterminatedGroup = "Unterminated group";
497 ExpectError("(foo", kUnterminatedGroup);
498 const char* kInvalidGroup = "Invalid group";
499 ExpectError("(?", kInvalidGroup);
500 const char* kUnterminatedCharacterClass = "Unterminated character class";
501 ExpectError("[", kUnterminatedCharacterClass);
502 ExpectError("[a-", kUnterminatedCharacterClass);
503 const char* kNothingToRepeat = "Nothing to repeat";
504 ExpectError("*", kNothingToRepeat);
505 ExpectError("?", kNothingToRepeat);
506 ExpectError("+", kNothingToRepeat);
507 ExpectError("{1}", kNothingToRepeat);
508 ExpectError("{1,2}", kNothingToRepeat);
509 ExpectError("{1,}", kNothingToRepeat);
510
511 // Check that we don't allow more than kMaxCapture captures
512 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
513 const char* kTooManyCaptures = "Too many captures";
514 std::ostringstream os;
515 for (int i = 0; i <= kMaxCaptures; i++) {
516 os << "()";
517 }
518 ExpectError(os.str().c_str(), kTooManyCaptures);
519
520 FLAG_harmony_regexp_named_captures = true;
521 const char* kInvalidCaptureName = "Invalid capture group name";
522 ExpectError("(?<>.)", kInvalidCaptureName, true);
523 ExpectError("(?<1>.)", kInvalidCaptureName, true);
524 ExpectError("(?<_%>.)", kInvalidCaptureName, true);
525 ExpectError("\\k<a", kInvalidCaptureName, true);
526 const char* kDuplicateCaptureName = "Duplicate capture group name";
527 ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
528 const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
529 ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
530 const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
531 ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
532 ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
533 const char* kInvalidNamedReference = "Invalid named reference";
534 ExpectError("\\ka", kInvalidNamedReference, true);
535 FLAG_harmony_regexp_named_captures = false;
536 }
537
538
IsDigit(uc16 c)539 static bool IsDigit(uc16 c) {
540 return ('0' <= c && c <= '9');
541 }
542
543
NotDigit(uc16 c)544 static bool NotDigit(uc16 c) {
545 return !IsDigit(c);
546 }
547
548
IsWhiteSpaceOrLineTerminator(uc16 c)549 static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
550 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
551 // WhiteSpace (7.2) and LineTerminator (7.3) values.
552 return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
553 }
554
555
NotWhiteSpaceNorLineTermiantor(uc16 c)556 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
557 return !IsWhiteSpaceOrLineTerminator(c);
558 }
559
560
NotWord(uc16 c)561 static bool NotWord(uc16 c) {
562 return !IsRegExpWord(c);
563 }
564
565
TestCharacterClassEscapes(uc16 c,bool (pred)(uc16 c))566 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
567 Zone zone(CcTest::i_isolate()->allocator());
568 ZoneList<CharacterRange>* ranges =
569 new(&zone) ZoneList<CharacterRange>(2, &zone);
570 CharacterRange::AddClassEscape(c, ranges, &zone);
571 for (uc32 i = 0; i < (1 << 16); i++) {
572 bool in_class = false;
573 for (int j = 0; !in_class && j < ranges->length(); j++) {
574 CharacterRange& range = ranges->at(j);
575 in_class = (range.from() <= i && i <= range.to());
576 }
577 CHECK_EQ(pred(i), in_class);
578 }
579 }
580
581
TEST(CharacterClassEscapes)582 TEST(CharacterClassEscapes) {
583 TestCharacterClassEscapes('.', IsRegExpNewline);
584 TestCharacterClassEscapes('d', IsDigit);
585 TestCharacterClassEscapes('D', NotDigit);
586 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
587 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
588 TestCharacterClassEscapes('w', IsRegExpWord);
589 TestCharacterClassEscapes('W', NotWord);
590 }
591
592
Compile(const char * input,bool multiline,bool unicode,bool is_one_byte,Zone * zone)593 static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
594 bool is_one_byte, Zone* zone) {
595 Isolate* isolate = CcTest::i_isolate();
596 FlatStringReader reader(isolate, CStrVector(input));
597 RegExpCompileData compile_data;
598 JSRegExp::Flags flags = JSRegExp::kNone;
599 if (multiline) flags = JSRegExp::kMultiline;
600 if (unicode) flags = JSRegExp::kUnicode;
601 if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
602 &reader, flags, &compile_data))
603 return NULL;
604 Handle<String> pattern = isolate->factory()
605 ->NewStringFromUtf8(CStrVector(input))
606 .ToHandleChecked();
607 Handle<String> sample_subject =
608 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
609 RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
610 sample_subject, is_one_byte);
611 return compile_data.node;
612 }
613
614
Execute(const char * input,bool multiline,bool unicode,bool is_one_byte,bool dot_output=false)615 static void Execute(const char* input, bool multiline, bool unicode,
616 bool is_one_byte, bool dot_output = false) {
617 v8::HandleScope scope(CcTest::isolate());
618 Zone zone(CcTest::i_isolate()->allocator());
619 RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
620 USE(node);
621 #ifdef DEBUG
622 if (dot_output) {
623 RegExpEngine::DotPrint(input, node, false);
624 }
625 #endif // DEBUG
626 }
627
628
629 class TestConfig {
630 public:
631 typedef int Key;
632 typedef int Value;
633 static const int kNoKey;
NoValue()634 static int NoValue() { return 0; }
Compare(int a,int b)635 static inline int Compare(int a, int b) {
636 if (a < b)
637 return -1;
638 else if (a > b)
639 return 1;
640 else
641 return 0;
642 }
643 };
644
645
646 const int TestConfig::kNoKey = 0;
647
648
PseudoRandom(int i,int j)649 static unsigned PseudoRandom(int i, int j) {
650 return ~(~((i * 781) ^ (j * 329)));
651 }
652
653
TEST(SplayTreeSimple)654 TEST(SplayTreeSimple) {
655 static const unsigned kLimit = 1000;
656 Zone zone(CcTest::i_isolate()->allocator());
657 ZoneSplayTree<TestConfig> tree(&zone);
658 bool seen[kLimit];
659 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
660 #define CHECK_MAPS_EQUAL() do { \
661 for (unsigned k = 0; k < kLimit; k++) \
662 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
663 } while (false)
664 for (int i = 0; i < 50; i++) {
665 for (int j = 0; j < 50; j++) {
666 int next = PseudoRandom(i, j) % kLimit;
667 if (seen[next]) {
668 // We've already seen this one. Check the value and remove
669 // it.
670 ZoneSplayTree<TestConfig>::Locator loc;
671 CHECK(tree.Find(next, &loc));
672 CHECK_EQ(next, loc.key());
673 CHECK_EQ(3 * next, loc.value());
674 tree.Remove(next);
675 seen[next] = false;
676 CHECK_MAPS_EQUAL();
677 } else {
678 // Check that it wasn't there already and then add it.
679 ZoneSplayTree<TestConfig>::Locator loc;
680 CHECK(!tree.Find(next, &loc));
681 CHECK(tree.Insert(next, &loc));
682 CHECK_EQ(next, loc.key());
683 loc.set_value(3 * next);
684 seen[next] = true;
685 CHECK_MAPS_EQUAL();
686 }
687 int val = PseudoRandom(j, i) % kLimit;
688 if (seen[val]) {
689 ZoneSplayTree<TestConfig>::Locator loc;
690 CHECK(tree.FindGreatestLessThan(val, &loc));
691 CHECK_EQ(loc.key(), val);
692 break;
693 }
694 val = PseudoRandom(i + j, i - j) % kLimit;
695 if (seen[val]) {
696 ZoneSplayTree<TestConfig>::Locator loc;
697 CHECK(tree.FindLeastGreaterThan(val, &loc));
698 CHECK_EQ(loc.key(), val);
699 break;
700 }
701 }
702 }
703 }
704
705
TEST(DispatchTableConstruction)706 TEST(DispatchTableConstruction) {
707 // Initialize test data.
708 static const int kLimit = 1000;
709 static const int kRangeCount = 8;
710 static const int kRangeSize = 16;
711 uc16 ranges[kRangeCount][2 * kRangeSize];
712 for (int i = 0; i < kRangeCount; i++) {
713 Vector<uc16> range(ranges[i], 2 * kRangeSize);
714 for (int j = 0; j < 2 * kRangeSize; j++) {
715 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
716 }
717 range.Sort();
718 for (int j = 1; j < 2 * kRangeSize; j++) {
719 CHECK(range[j-1] <= range[j]);
720 }
721 }
722 // Enter test data into dispatch table.
723 Zone zone(CcTest::i_isolate()->allocator());
724 DispatchTable table(&zone);
725 for (int i = 0; i < kRangeCount; i++) {
726 uc16* range = ranges[i];
727 for (int j = 0; j < 2 * kRangeSize; j += 2)
728 table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
729 }
730 // Check that the table looks as we would expect
731 for (int p = 0; p < kLimit; p++) {
732 OutSet* outs = table.Get(p);
733 for (int j = 0; j < kRangeCount; j++) {
734 uc16* range = ranges[j];
735 bool is_on = false;
736 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
737 is_on = (range[k] <= p && p <= range[k + 1]);
738 CHECK_EQ(is_on, outs->Get(j));
739 }
740 }
741 }
742
743
744 // Test of debug-only syntax.
745 #ifdef DEBUG
746
TEST(ParsePossessiveRepetition)747 TEST(ParsePossessiveRepetition) {
748 bool old_flag_value = FLAG_regexp_possessive_quantifier;
749
750 // Enable possessive quantifier syntax.
751 FLAG_regexp_possessive_quantifier = true;
752
753 CheckParseEq("a*+", "(# 0 - p 'a')");
754 CheckParseEq("a++", "(# 1 - p 'a')");
755 CheckParseEq("a?+", "(# 0 1 p 'a')");
756 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
757 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
758
759 // Disable possessive quantifier syntax.
760 FLAG_regexp_possessive_quantifier = false;
761
762 CHECK_PARSE_ERROR("a*+");
763 CHECK_PARSE_ERROR("a++");
764 CHECK_PARSE_ERROR("a?+");
765 CHECK_PARSE_ERROR("a{10,20}+");
766 CHECK_PARSE_ERROR("a{10,20}+b");
767
768 FLAG_regexp_possessive_quantifier = old_flag_value;
769 }
770
771 #endif
772
773 // Tests of interpreter.
774
775
776 #ifndef V8_INTERPRETED_REGEXP
777
778 #if V8_TARGET_ARCH_IA32
779 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
780 #elif V8_TARGET_ARCH_X64
781 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
782 #elif V8_TARGET_ARCH_ARM
783 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
784 #elif V8_TARGET_ARCH_ARM64
785 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
786 #elif V8_TARGET_ARCH_S390
787 typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
788 #elif V8_TARGET_ARCH_PPC
789 typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
790 #elif V8_TARGET_ARCH_MIPS
791 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
792 #elif V8_TARGET_ARCH_MIPS64
793 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
794 #elif V8_TARGET_ARCH_X87
795 typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
796 #endif
797
798 class ContextInitializer {
799 public:
ContextInitializer()800 ContextInitializer()
801 : scope_(CcTest::isolate()),
802 env_(v8::Context::New(CcTest::isolate())) {
803 env_->Enter();
804 }
~ContextInitializer()805 ~ContextInitializer() {
806 env_->Exit();
807 }
808 private:
809 v8::HandleScope scope_;
810 v8::Local<v8::Context> env_;
811 };
812
813
Execute(Code * code,String * input,int start_offset,const byte * input_start,const byte * input_end,int * captures)814 static ArchRegExpMacroAssembler::Result Execute(Code* code,
815 String* input,
816 int start_offset,
817 const byte* input_start,
818 const byte* input_end,
819 int* captures) {
820 return NativeRegExpMacroAssembler::Execute(
821 code,
822 input,
823 start_offset,
824 input_start,
825 input_end,
826 captures,
827 0,
828 CcTest::i_isolate());
829 }
830
831
TEST(MacroAssemblerNativeSuccess)832 TEST(MacroAssemblerNativeSuccess) {
833 v8::V8::Initialize();
834 ContextInitializer initializer;
835 Isolate* isolate = CcTest::i_isolate();
836 Factory* factory = isolate->factory();
837 Zone zone(CcTest::i_isolate()->allocator());
838
839 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
840 4);
841
842 m.Succeed();
843
844 Handle<String> source = factory->NewStringFromStaticChars("");
845 Handle<Object> code_object = m.GetCode(source);
846 Handle<Code> code = Handle<Code>::cast(code_object);
847
848 int captures[4] = {42, 37, 87, 117};
849 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
850 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
851 const byte* start_adr =
852 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
853
854 NativeRegExpMacroAssembler::Result result =
855 Execute(*code,
856 *input,
857 0,
858 start_adr,
859 start_adr + seq_input->length(),
860 captures);
861
862 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
863 CHECK_EQ(-1, captures[0]);
864 CHECK_EQ(-1, captures[1]);
865 CHECK_EQ(-1, captures[2]);
866 CHECK_EQ(-1, captures[3]);
867 }
868
869
TEST(MacroAssemblerNativeSimple)870 TEST(MacroAssemblerNativeSimple) {
871 v8::V8::Initialize();
872 ContextInitializer initializer;
873 Isolate* isolate = CcTest::i_isolate();
874 Factory* factory = isolate->factory();
875 Zone zone(CcTest::i_isolate()->allocator());
876
877 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
878 4);
879
880 Label fail, backtrack;
881 m.PushBacktrack(&fail);
882 m.CheckNotAtStart(0, NULL);
883 m.LoadCurrentCharacter(2, NULL);
884 m.CheckNotCharacter('o', NULL);
885 m.LoadCurrentCharacter(1, NULL, false);
886 m.CheckNotCharacter('o', NULL);
887 m.LoadCurrentCharacter(0, NULL, false);
888 m.CheckNotCharacter('f', NULL);
889 m.WriteCurrentPositionToRegister(0, 0);
890 m.WriteCurrentPositionToRegister(1, 3);
891 m.AdvanceCurrentPosition(3);
892 m.PushBacktrack(&backtrack);
893 m.Succeed();
894 m.Bind(&backtrack);
895 m.Backtrack();
896 m.Bind(&fail);
897 m.Fail();
898
899 Handle<String> source = factory->NewStringFromStaticChars("^foo");
900 Handle<Object> code_object = m.GetCode(source);
901 Handle<Code> code = Handle<Code>::cast(code_object);
902
903 int captures[4] = {42, 37, 87, 117};
904 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
905 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
906 Address start_adr = seq_input->GetCharsAddress();
907
908 NativeRegExpMacroAssembler::Result result =
909 Execute(*code,
910 *input,
911 0,
912 start_adr,
913 start_adr + input->length(),
914 captures);
915
916 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
917 CHECK_EQ(0, captures[0]);
918 CHECK_EQ(3, captures[1]);
919 CHECK_EQ(-1, captures[2]);
920 CHECK_EQ(-1, captures[3]);
921
922 input = factory->NewStringFromStaticChars("barbarbar");
923 seq_input = Handle<SeqOneByteString>::cast(input);
924 start_adr = seq_input->GetCharsAddress();
925
926 result = Execute(*code,
927 *input,
928 0,
929 start_adr,
930 start_adr + input->length(),
931 captures);
932
933 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
934 }
935
936
TEST(MacroAssemblerNativeSimpleUC16)937 TEST(MacroAssemblerNativeSimpleUC16) {
938 v8::V8::Initialize();
939 ContextInitializer initializer;
940 Isolate* isolate = CcTest::i_isolate();
941 Factory* factory = isolate->factory();
942 Zone zone(CcTest::i_isolate()->allocator());
943
944 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
945 4);
946
947 Label fail, backtrack;
948 m.PushBacktrack(&fail);
949 m.CheckNotAtStart(0, NULL);
950 m.LoadCurrentCharacter(2, NULL);
951 m.CheckNotCharacter('o', NULL);
952 m.LoadCurrentCharacter(1, NULL, false);
953 m.CheckNotCharacter('o', NULL);
954 m.LoadCurrentCharacter(0, NULL, false);
955 m.CheckNotCharacter('f', NULL);
956 m.WriteCurrentPositionToRegister(0, 0);
957 m.WriteCurrentPositionToRegister(1, 3);
958 m.AdvanceCurrentPosition(3);
959 m.PushBacktrack(&backtrack);
960 m.Succeed();
961 m.Bind(&backtrack);
962 m.Backtrack();
963 m.Bind(&fail);
964 m.Fail();
965
966 Handle<String> source = factory->NewStringFromStaticChars("^foo");
967 Handle<Object> code_object = m.GetCode(source);
968 Handle<Code> code = Handle<Code>::cast(code_object);
969
970 int captures[4] = {42, 37, 87, 117};
971 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
972 static_cast<uc16>(0x2603)};
973 Handle<String> input = factory->NewStringFromTwoByte(
974 Vector<const uc16>(input_data, 6)).ToHandleChecked();
975 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
976 Address start_adr = seq_input->GetCharsAddress();
977
978 NativeRegExpMacroAssembler::Result result =
979 Execute(*code,
980 *input,
981 0,
982 start_adr,
983 start_adr + input->length(),
984 captures);
985
986 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
987 CHECK_EQ(0, captures[0]);
988 CHECK_EQ(3, captures[1]);
989 CHECK_EQ(-1, captures[2]);
990 CHECK_EQ(-1, captures[3]);
991
992 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
993 static_cast<uc16>(0x2603)};
994 input = factory->NewStringFromTwoByte(
995 Vector<const uc16>(input_data2, 9)).ToHandleChecked();
996 seq_input = Handle<SeqTwoByteString>::cast(input);
997 start_adr = seq_input->GetCharsAddress();
998
999 result = Execute(*code,
1000 *input,
1001 0,
1002 start_adr,
1003 start_adr + input->length() * 2,
1004 captures);
1005
1006 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
1007 }
1008
1009
TEST(MacroAssemblerNativeBacktrack)1010 TEST(MacroAssemblerNativeBacktrack) {
1011 v8::V8::Initialize();
1012 ContextInitializer initializer;
1013 Isolate* isolate = CcTest::i_isolate();
1014 Factory* factory = isolate->factory();
1015 Zone zone(CcTest::i_isolate()->allocator());
1016
1017 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1018 0);
1019
1020 Label fail;
1021 Label backtrack;
1022 m.LoadCurrentCharacter(10, &fail);
1023 m.Succeed();
1024 m.Bind(&fail);
1025 m.PushBacktrack(&backtrack);
1026 m.LoadCurrentCharacter(10, NULL);
1027 m.Succeed();
1028 m.Bind(&backtrack);
1029 m.Fail();
1030
1031 Handle<String> source = factory->NewStringFromStaticChars("..........");
1032 Handle<Object> code_object = m.GetCode(source);
1033 Handle<Code> code = Handle<Code>::cast(code_object);
1034
1035 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
1036 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1037 Address start_adr = seq_input->GetCharsAddress();
1038
1039 NativeRegExpMacroAssembler::Result result =
1040 Execute(*code,
1041 *input,
1042 0,
1043 start_adr,
1044 start_adr + input->length(),
1045 NULL);
1046
1047 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
1048 }
1049
1050
TEST(MacroAssemblerNativeBackReferenceLATIN1)1051 TEST(MacroAssemblerNativeBackReferenceLATIN1) {
1052 v8::V8::Initialize();
1053 ContextInitializer initializer;
1054 Isolate* isolate = CcTest::i_isolate();
1055 Factory* factory = isolate->factory();
1056 Zone zone(CcTest::i_isolate()->allocator());
1057
1058 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1059 4);
1060
1061 m.WriteCurrentPositionToRegister(0, 0);
1062 m.AdvanceCurrentPosition(2);
1063 m.WriteCurrentPositionToRegister(1, 0);
1064 Label nomatch;
1065 m.CheckNotBackReference(0, false, &nomatch);
1066 m.Fail();
1067 m.Bind(&nomatch);
1068 m.AdvanceCurrentPosition(2);
1069 Label missing_match;
1070 m.CheckNotBackReference(0, false, &missing_match);
1071 m.WriteCurrentPositionToRegister(2, 0);
1072 m.Succeed();
1073 m.Bind(&missing_match);
1074 m.Fail();
1075
1076 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1077 Handle<Object> code_object = m.GetCode(source);
1078 Handle<Code> code = Handle<Code>::cast(code_object);
1079
1080 Handle<String> input = factory->NewStringFromStaticChars("fooofo");
1081 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1082 Address start_adr = seq_input->GetCharsAddress();
1083
1084 int output[4];
1085 NativeRegExpMacroAssembler::Result result =
1086 Execute(*code,
1087 *input,
1088 0,
1089 start_adr,
1090 start_adr + input->length(),
1091 output);
1092
1093 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1094 CHECK_EQ(0, output[0]);
1095 CHECK_EQ(2, output[1]);
1096 CHECK_EQ(6, output[2]);
1097 CHECK_EQ(-1, output[3]);
1098 }
1099
1100
TEST(MacroAssemblerNativeBackReferenceUC16)1101 TEST(MacroAssemblerNativeBackReferenceUC16) {
1102 v8::V8::Initialize();
1103 ContextInitializer initializer;
1104 Isolate* isolate = CcTest::i_isolate();
1105 Factory* factory = isolate->factory();
1106 Zone zone(CcTest::i_isolate()->allocator());
1107
1108 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
1109 4);
1110
1111 m.WriteCurrentPositionToRegister(0, 0);
1112 m.AdvanceCurrentPosition(2);
1113 m.WriteCurrentPositionToRegister(1, 0);
1114 Label nomatch;
1115 m.CheckNotBackReference(0, false, &nomatch);
1116 m.Fail();
1117 m.Bind(&nomatch);
1118 m.AdvanceCurrentPosition(2);
1119 Label missing_match;
1120 m.CheckNotBackReference(0, false, &missing_match);
1121 m.WriteCurrentPositionToRegister(2, 0);
1122 m.Succeed();
1123 m.Bind(&missing_match);
1124 m.Fail();
1125
1126 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1127 Handle<Object> code_object = m.GetCode(source);
1128 Handle<Code> code = Handle<Code>::cast(code_object);
1129
1130 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1131 Handle<String> input = factory->NewStringFromTwoByte(
1132 Vector<const uc16>(input_data, 6)).ToHandleChecked();
1133 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1134 Address start_adr = seq_input->GetCharsAddress();
1135
1136 int output[4];
1137 NativeRegExpMacroAssembler::Result result =
1138 Execute(*code,
1139 *input,
1140 0,
1141 start_adr,
1142 start_adr + input->length() * 2,
1143 output);
1144
1145 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1146 CHECK_EQ(0, output[0]);
1147 CHECK_EQ(2, output[1]);
1148 CHECK_EQ(6, output[2]);
1149 CHECK_EQ(-1, output[3]);
1150 }
1151
1152
1153
TEST(MacroAssemblernativeAtStart)1154 TEST(MacroAssemblernativeAtStart) {
1155 v8::V8::Initialize();
1156 ContextInitializer initializer;
1157 Isolate* isolate = CcTest::i_isolate();
1158 Factory* factory = isolate->factory();
1159 Zone zone(CcTest::i_isolate()->allocator());
1160
1161 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1162 0);
1163
1164 Label not_at_start, newline, fail;
1165 m.CheckNotAtStart(0, ¬_at_start);
1166 // Check that prevchar = '\n' and current = 'f'.
1167 m.CheckCharacter('\n', &newline);
1168 m.Bind(&fail);
1169 m.Fail();
1170 m.Bind(&newline);
1171 m.LoadCurrentCharacter(0, &fail);
1172 m.CheckNotCharacter('f', &fail);
1173 m.Succeed();
1174
1175 m.Bind(¬_at_start);
1176 // Check that prevchar = 'o' and current = 'b'.
1177 Label prevo;
1178 m.CheckCharacter('o', &prevo);
1179 m.Fail();
1180 m.Bind(&prevo);
1181 m.LoadCurrentCharacter(0, &fail);
1182 m.CheckNotCharacter('b', &fail);
1183 m.Succeed();
1184
1185 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
1186 Handle<Object> code_object = m.GetCode(source);
1187 Handle<Code> code = Handle<Code>::cast(code_object);
1188
1189 Handle<String> input = factory->NewStringFromStaticChars("foobar");
1190 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1191 Address start_adr = seq_input->GetCharsAddress();
1192
1193 NativeRegExpMacroAssembler::Result result =
1194 Execute(*code,
1195 *input,
1196 0,
1197 start_adr,
1198 start_adr + input->length(),
1199 NULL);
1200
1201 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1202
1203 result = Execute(*code,
1204 *input,
1205 3,
1206 start_adr + 3,
1207 start_adr + input->length(),
1208 NULL);
1209
1210 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1211 }
1212
1213
TEST(MacroAssemblerNativeBackRefNoCase)1214 TEST(MacroAssemblerNativeBackRefNoCase) {
1215 v8::V8::Initialize();
1216 ContextInitializer initializer;
1217 Isolate* isolate = CcTest::i_isolate();
1218 Factory* factory = isolate->factory();
1219 Zone zone(CcTest::i_isolate()->allocator());
1220
1221 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1222 4);
1223
1224 Label fail, succ;
1225
1226 m.WriteCurrentPositionToRegister(0, 0);
1227 m.WriteCurrentPositionToRegister(2, 0);
1228 m.AdvanceCurrentPosition(3);
1229 m.WriteCurrentPositionToRegister(3, 0);
1230 m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC".
1231 m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC".
1232 Label expected_fail;
1233 m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
1234 m.Bind(&fail);
1235 m.Fail();
1236
1237 m.Bind(&expected_fail);
1238 m.AdvanceCurrentPosition(3); // Skip "xYz"
1239 m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
1240 m.Fail();
1241
1242 m.Bind(&succ);
1243 m.WriteCurrentPositionToRegister(1, 0);
1244 m.Succeed();
1245
1246 Handle<String> source =
1247 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
1248 Handle<Object> code_object = m.GetCode(source);
1249 Handle<Code> code = Handle<Code>::cast(code_object);
1250
1251 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1252 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1253 Address start_adr = seq_input->GetCharsAddress();
1254
1255 int output[4];
1256 NativeRegExpMacroAssembler::Result result =
1257 Execute(*code,
1258 *input,
1259 0,
1260 start_adr,
1261 start_adr + input->length(),
1262 output);
1263
1264 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1265 CHECK_EQ(0, output[0]);
1266 CHECK_EQ(12, output[1]);
1267 CHECK_EQ(0, output[2]);
1268 CHECK_EQ(3, output[3]);
1269 }
1270
1271
1272
TEST(MacroAssemblerNativeRegisters)1273 TEST(MacroAssemblerNativeRegisters) {
1274 v8::V8::Initialize();
1275 ContextInitializer initializer;
1276 Isolate* isolate = CcTest::i_isolate();
1277 Factory* factory = isolate->factory();
1278 Zone zone(CcTest::i_isolate()->allocator());
1279
1280 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1281 6);
1282
1283 uc16 foo_chars[3] = {'f', 'o', 'o'};
1284 Vector<const uc16> foo(foo_chars, 3);
1285
1286 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1287 Label fail;
1288 Label backtrack;
1289 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1290 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1291 m.PushBacktrack(&backtrack);
1292 m.WriteStackPointerToRegister(sp);
1293 // Fill stack and registers
1294 m.AdvanceCurrentPosition(2);
1295 m.WriteCurrentPositionToRegister(out1, 0);
1296 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1297 m.PushBacktrack(&fail);
1298 // Drop backtrack stack frames.
1299 m.ReadStackPointerFromRegister(sp);
1300 // And take the first backtrack (to &backtrack)
1301 m.Backtrack();
1302
1303 m.PushCurrentPosition();
1304 m.AdvanceCurrentPosition(2);
1305 m.PopCurrentPosition();
1306
1307 m.Bind(&backtrack);
1308 m.PopRegister(out1);
1309 m.ReadCurrentPositionFromRegister(out1);
1310 m.AdvanceCurrentPosition(3);
1311 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1312
1313 Label loop;
1314 m.SetRegister(loop_cnt, 0); // loop counter
1315 m.Bind(&loop);
1316 m.AdvanceRegister(loop_cnt, 1);
1317 m.AdvanceCurrentPosition(1);
1318 m.IfRegisterLT(loop_cnt, 3, &loop);
1319 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1320
1321 Label loop2;
1322 m.SetRegister(loop_cnt, 2); // loop counter
1323 m.Bind(&loop2);
1324 m.AdvanceRegister(loop_cnt, -1);
1325 m.AdvanceCurrentPosition(1);
1326 m.IfRegisterGE(loop_cnt, 0, &loop2);
1327 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1328
1329 Label loop3;
1330 Label exit_loop3;
1331 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1332 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1333 m.ReadCurrentPositionFromRegister(out3);
1334 m.Bind(&loop3);
1335 m.AdvanceCurrentPosition(1);
1336 m.CheckGreedyLoop(&exit_loop3);
1337 m.GoTo(&loop3);
1338 m.Bind(&exit_loop3);
1339 m.PopCurrentPosition();
1340 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1341
1342 m.Succeed();
1343
1344 m.Bind(&fail);
1345 m.Fail();
1346
1347 Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
1348 Handle<Object> code_object = m.GetCode(source);
1349 Handle<Code> code = Handle<Code>::cast(code_object);
1350
1351 // String long enough for test (content doesn't matter).
1352 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1353 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1354 Address start_adr = seq_input->GetCharsAddress();
1355
1356 int output[6];
1357 NativeRegExpMacroAssembler::Result result =
1358 Execute(*code,
1359 *input,
1360 0,
1361 start_adr,
1362 start_adr + input->length(),
1363 output);
1364
1365 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1366 CHECK_EQ(0, output[0]);
1367 CHECK_EQ(3, output[1]);
1368 CHECK_EQ(6, output[2]);
1369 CHECK_EQ(9, output[3]);
1370 CHECK_EQ(9, output[4]);
1371 CHECK_EQ(-1, output[5]);
1372 }
1373
1374
TEST(MacroAssemblerStackOverflow)1375 TEST(MacroAssemblerStackOverflow) {
1376 v8::V8::Initialize();
1377 ContextInitializer initializer;
1378 Isolate* isolate = CcTest::i_isolate();
1379 Factory* factory = isolate->factory();
1380 Zone zone(CcTest::i_isolate()->allocator());
1381
1382 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1383 0);
1384
1385 Label loop;
1386 m.Bind(&loop);
1387 m.PushBacktrack(&loop);
1388 m.GoTo(&loop);
1389
1390 Handle<String> source =
1391 factory->NewStringFromStaticChars("<stack overflow test>");
1392 Handle<Object> code_object = m.GetCode(source);
1393 Handle<Code> code = Handle<Code>::cast(code_object);
1394
1395 // String long enough for test (content doesn't matter).
1396 Handle<String> input = factory->NewStringFromStaticChars("dummy");
1397 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1398 Address start_adr = seq_input->GetCharsAddress();
1399
1400 NativeRegExpMacroAssembler::Result result =
1401 Execute(*code,
1402 *input,
1403 0,
1404 start_adr,
1405 start_adr + input->length(),
1406 NULL);
1407
1408 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1409 CHECK(isolate->has_pending_exception());
1410 isolate->clear_pending_exception();
1411 }
1412
1413
TEST(MacroAssemblerNativeLotsOfRegisters)1414 TEST(MacroAssemblerNativeLotsOfRegisters) {
1415 v8::V8::Initialize();
1416 ContextInitializer initializer;
1417 Isolate* isolate = CcTest::i_isolate();
1418 Factory* factory = isolate->factory();
1419 Zone zone(CcTest::i_isolate()->allocator());
1420
1421 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1422 2);
1423
1424 // At least 2048, to ensure the allocated space for registers
1425 // span one full page.
1426 const int large_number = 8000;
1427 m.WriteCurrentPositionToRegister(large_number, 42);
1428 m.WriteCurrentPositionToRegister(0, 0);
1429 m.WriteCurrentPositionToRegister(1, 1);
1430 Label done;
1431 m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
1432 m.Bind(&done);
1433 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1434 m.PopRegister(1);
1435 m.Succeed();
1436
1437 Handle<String> source =
1438 factory->NewStringFromStaticChars("<huge register space test>");
1439 Handle<Object> code_object = m.GetCode(source);
1440 Handle<Code> code = Handle<Code>::cast(code_object);
1441
1442 // String long enough for test (content doesn't matter).
1443 Handle<String> input = factory->NewStringFromStaticChars("sample text");
1444 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1445 Address start_adr = seq_input->GetCharsAddress();
1446
1447 int captures[2];
1448 NativeRegExpMacroAssembler::Result result =
1449 Execute(*code,
1450 *input,
1451 0,
1452 start_adr,
1453 start_adr + input->length(),
1454 captures);
1455
1456 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1457 CHECK_EQ(0, captures[0]);
1458 CHECK_EQ(42, captures[1]);
1459
1460 isolate->clear_pending_exception();
1461 }
1462
1463 #else // V8_INTERPRETED_REGEXP
1464
TEST(MacroAssembler)1465 TEST(MacroAssembler) {
1466 byte codes[1024];
1467 Zone zone(CcTest::i_isolate()->allocator());
1468 RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
1469 &zone);
1470 // ^f(o)o.
1471 Label start, fail, backtrack;
1472
1473 m.SetRegister(4, 42);
1474 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1475 m.AdvanceRegister(4, 42);
1476 m.GoTo(&start);
1477 m.Fail();
1478 m.Bind(&start);
1479 m.PushBacktrack(&fail);
1480 m.CheckNotAtStart(0, NULL);
1481 m.LoadCurrentCharacter(0, NULL);
1482 m.CheckNotCharacter('f', NULL);
1483 m.LoadCurrentCharacter(1, NULL);
1484 m.CheckNotCharacter('o', NULL);
1485 m.LoadCurrentCharacter(2, NULL);
1486 m.CheckNotCharacter('o', NULL);
1487 m.WriteCurrentPositionToRegister(0, 0);
1488 m.WriteCurrentPositionToRegister(1, 3);
1489 m.WriteCurrentPositionToRegister(2, 1);
1490 m.WriteCurrentPositionToRegister(3, 2);
1491 m.AdvanceCurrentPosition(3);
1492 m.PushBacktrack(&backtrack);
1493 m.Succeed();
1494 m.Bind(&backtrack);
1495 m.ClearRegisters(2, 3);
1496 m.Backtrack();
1497 m.Bind(&fail);
1498 m.PopRegister(0);
1499 m.Fail();
1500
1501 Isolate* isolate = CcTest::i_isolate();
1502 Factory* factory = isolate->factory();
1503 HandleScope scope(isolate);
1504
1505 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
1506 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1507 int captures[5];
1508
1509 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1510 Handle<String> f1_16 = factory->NewStringFromTwoByte(
1511 Vector<const uc16>(str1, 6)).ToHandleChecked();
1512
1513 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1514 CHECK_EQ(0, captures[0]);
1515 CHECK_EQ(3, captures[1]);
1516 CHECK_EQ(1, captures[2]);
1517 CHECK_EQ(2, captures[3]);
1518 CHECK_EQ(84, captures[4]);
1519
1520 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1521 Handle<String> f2_16 = factory->NewStringFromTwoByte(
1522 Vector<const uc16>(str2, 6)).ToHandleChecked();
1523
1524 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1525 CHECK_EQ(42, captures[0]);
1526 }
1527
1528 #endif // V8_INTERPRETED_REGEXP
1529
1530
TEST(AddInverseToTable)1531 TEST(AddInverseToTable) {
1532 static const int kLimit = 1000;
1533 static const int kRangeCount = 16;
1534 for (int t = 0; t < 10; t++) {
1535 Zone zone(CcTest::i_isolate()->allocator());
1536 ZoneList<CharacterRange>* ranges =
1537 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1538 for (int i = 0; i < kRangeCount; i++) {
1539 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1540 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1541 if (to > kLimit) to = kLimit;
1542 ranges->Add(CharacterRange::Range(from, to), &zone);
1543 }
1544 DispatchTable table(&zone);
1545 DispatchTableConstructor cons(&table, false, &zone);
1546 cons.set_choice_index(0);
1547 cons.AddInverse(ranges);
1548 for (int i = 0; i < kLimit; i++) {
1549 bool is_on = false;
1550 for (int j = 0; !is_on && j < kRangeCount; j++)
1551 is_on = ranges->at(j).Contains(i);
1552 OutSet* set = table.Get(i);
1553 CHECK_EQ(is_on, set->Get(0) == false);
1554 }
1555 }
1556 Zone zone(CcTest::i_isolate()->allocator());
1557 ZoneList<CharacterRange>* ranges =
1558 new(&zone) ZoneList<CharacterRange>(1, &zone);
1559 ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
1560 DispatchTable table(&zone);
1561 DispatchTableConstructor cons(&table, false, &zone);
1562 cons.set_choice_index(0);
1563 cons.AddInverse(ranges);
1564 CHECK(!table.Get(0xFFFE)->Get(0));
1565 CHECK(table.Get(0xFFFF)->Get(0));
1566 }
1567
1568
canonicalize(uc32 c)1569 static uc32 canonicalize(uc32 c) {
1570 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1571 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1572 if (count == 0) {
1573 return c;
1574 } else {
1575 CHECK_EQ(1, count);
1576 return canon[0];
1577 }
1578 }
1579
1580
TEST(LatinCanonicalize)1581 TEST(LatinCanonicalize) {
1582 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1583 for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
1584 unibrow::uchar upper = lower + ('A' - 'a');
1585 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1586 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1587 int length = un_canonicalize.get(lower, '\0', uncanon);
1588 CHECK_EQ(2, length);
1589 CHECK_EQ(upper, uncanon[0]);
1590 CHECK_EQ(lower, uncanon[1]);
1591 }
1592 for (uc32 c = 128; c < (1 << 21); c++)
1593 CHECK_GE(canonicalize(c), 128);
1594 unibrow::Mapping<unibrow::ToUppercase> to_upper;
1595 // Canonicalization is only defined for the Basic Multilingual Plane.
1596 for (uc32 c = 0; c < (1 << 16); c++) {
1597 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1598 int length = to_upper.get(c, '\0', upper);
1599 if (length == 0) {
1600 length = 1;
1601 upper[0] = c;
1602 }
1603 uc32 u = upper[0];
1604 if (length > 1 || (c >= 128 && u < 128))
1605 u = c;
1606 CHECK_EQ(u, canonicalize(c));
1607 }
1608 }
1609
1610
CanonRangeEnd(uc32 c)1611 static uc32 CanonRangeEnd(uc32 c) {
1612 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1613 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1614 if (count == 0) {
1615 return c;
1616 } else {
1617 CHECK_EQ(1, count);
1618 return canon[0];
1619 }
1620 }
1621
1622
TEST(RangeCanonicalization)1623 TEST(RangeCanonicalization) {
1624 // Check that we arrive at the same result when using the basic
1625 // range canonicalization primitives as when using immediate
1626 // canonicalization.
1627 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1628 int block_start = 0;
1629 while (block_start <= 0xFFFF) {
1630 uc32 block_end = CanonRangeEnd(block_start);
1631 unsigned block_length = block_end - block_start + 1;
1632 if (block_length > 1) {
1633 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1634 int first_length = un_canonicalize.get(block_start, '\0', first);
1635 for (unsigned i = 1; i < block_length; i++) {
1636 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1637 int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1638 CHECK_EQ(first_length, succ_length);
1639 for (int j = 0; j < succ_length; j++) {
1640 int calc = first[j] + i;
1641 int found = succ[j];
1642 CHECK_EQ(calc, found);
1643 }
1644 }
1645 }
1646 block_start = block_start + block_length;
1647 }
1648 }
1649
1650
TEST(UncanonicalizeEquivalence)1651 TEST(UncanonicalizeEquivalence) {
1652 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1653 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1654 for (int i = 0; i < (1 << 16); i++) {
1655 int length = un_canonicalize.get(i, '\0', chars);
1656 for (int j = 0; j < length; j++) {
1657 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1658 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1659 CHECK_EQ(length, length2);
1660 for (int k = 0; k < length; k++)
1661 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1662 }
1663 }
1664 }
1665
1666
TestRangeCaseIndependence(Isolate * isolate,CharacterRange input,Vector<CharacterRange> expected)1667 static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
1668 Vector<CharacterRange> expected) {
1669 Zone zone(CcTest::i_isolate()->allocator());
1670 int count = expected.length();
1671 ZoneList<CharacterRange>* list =
1672 new(&zone) ZoneList<CharacterRange>(count, &zone);
1673 list->Add(input, &zone);
1674 CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
1675 list->Remove(0); // Remove the input before checking results.
1676 CHECK_EQ(count, list->length());
1677 for (int i = 0; i < list->length(); i++) {
1678 CHECK_EQ(expected[i].from(), list->at(i).from());
1679 CHECK_EQ(expected[i].to(), list->at(i).to());
1680 }
1681 }
1682
1683
TestSimpleRangeCaseIndependence(Isolate * isolate,CharacterRange input,CharacterRange expected)1684 static void TestSimpleRangeCaseIndependence(Isolate* isolate,
1685 CharacterRange input,
1686 CharacterRange expected) {
1687 EmbeddedVector<CharacterRange, 1> vector;
1688 vector[0] = expected;
1689 TestRangeCaseIndependence(isolate, input, vector);
1690 }
1691
1692
TEST(CharacterRangeCaseIndependence)1693 TEST(CharacterRangeCaseIndependence) {
1694 Isolate* isolate = CcTest::i_isolate();
1695 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
1696 CharacterRange::Singleton('A'));
1697 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
1698 CharacterRange::Singleton('Z'));
1699 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
1700 CharacterRange::Range('A', 'Z'));
1701 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
1702 CharacterRange::Range('C', 'F'));
1703 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
1704 CharacterRange::Range('A', 'B'));
1705 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
1706 CharacterRange::Range('Y', 'Z'));
1707 TestSimpleRangeCaseIndependence(isolate,
1708 CharacterRange::Range('a' - 1, 'z' + 1),
1709 CharacterRange::Range('A', 'Z'));
1710 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
1711 CharacterRange::Range('a', 'z'));
1712 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
1713 CharacterRange::Range('c', 'f'));
1714 TestSimpleRangeCaseIndependence(isolate,
1715 CharacterRange::Range('A' - 1, 'Z' + 1),
1716 CharacterRange::Range('a', 'z'));
1717 // Here we need to add [l-z] to complete the case independence of
1718 // [A-Za-z] but we expect [a-z] to be added since we always add a
1719 // whole block at a time.
1720 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
1721 CharacterRange::Range('a', 'z'));
1722 }
1723
1724
InClass(uc32 c,ZoneList<CharacterRange> * ranges)1725 static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
1726 if (ranges == NULL)
1727 return false;
1728 for (int i = 0; i < ranges->length(); i++) {
1729 CharacterRange range = ranges->at(i);
1730 if (range.from() <= c && c <= range.to())
1731 return true;
1732 }
1733 return false;
1734 }
1735
1736
TEST(UnicodeRangeSplitter)1737 TEST(UnicodeRangeSplitter) {
1738 Zone zone(CcTest::i_isolate()->allocator());
1739 ZoneList<CharacterRange>* base =
1740 new(&zone) ZoneList<CharacterRange>(1, &zone);
1741 base->Add(CharacterRange::Everything(), &zone);
1742 UnicodeRangeSplitter splitter(&zone, base);
1743 // BMP
1744 for (uc32 c = 0; c < 0xd800; c++) {
1745 CHECK(InClass(c, splitter.bmp()));
1746 CHECK(!InClass(c, splitter.lead_surrogates()));
1747 CHECK(!InClass(c, splitter.trail_surrogates()));
1748 CHECK(!InClass(c, splitter.non_bmp()));
1749 }
1750 // Lead surrogates
1751 for (uc32 c = 0xd800; c < 0xdbff; c++) {
1752 CHECK(!InClass(c, splitter.bmp()));
1753 CHECK(InClass(c, splitter.lead_surrogates()));
1754 CHECK(!InClass(c, splitter.trail_surrogates()));
1755 CHECK(!InClass(c, splitter.non_bmp()));
1756 }
1757 // Trail surrogates
1758 for (uc32 c = 0xdc00; c < 0xdfff; c++) {
1759 CHECK(!InClass(c, splitter.bmp()));
1760 CHECK(!InClass(c, splitter.lead_surrogates()));
1761 CHECK(InClass(c, splitter.trail_surrogates()));
1762 CHECK(!InClass(c, splitter.non_bmp()));
1763 }
1764 // BMP
1765 for (uc32 c = 0xe000; c < 0xffff; c++) {
1766 CHECK(InClass(c, splitter.bmp()));
1767 CHECK(!InClass(c, splitter.lead_surrogates()));
1768 CHECK(!InClass(c, splitter.trail_surrogates()));
1769 CHECK(!InClass(c, splitter.non_bmp()));
1770 }
1771 // Non-BMP
1772 for (uc32 c = 0x10000; c < 0x10ffff; c++) {
1773 CHECK(!InClass(c, splitter.bmp()));
1774 CHECK(!InClass(c, splitter.lead_surrogates()));
1775 CHECK(!InClass(c, splitter.trail_surrogates()));
1776 CHECK(InClass(c, splitter.non_bmp()));
1777 }
1778 }
1779
1780
TEST(CanonicalizeCharacterSets)1781 TEST(CanonicalizeCharacterSets) {
1782 Zone zone(CcTest::i_isolate()->allocator());
1783 ZoneList<CharacterRange>* list =
1784 new(&zone) ZoneList<CharacterRange>(4, &zone);
1785 CharacterSet set(list);
1786
1787 list->Add(CharacterRange::Range(10, 20), &zone);
1788 list->Add(CharacterRange::Range(30, 40), &zone);
1789 list->Add(CharacterRange::Range(50, 60), &zone);
1790 set.Canonicalize();
1791 CHECK_EQ(3, list->length());
1792 CHECK_EQ(10, list->at(0).from());
1793 CHECK_EQ(20, list->at(0).to());
1794 CHECK_EQ(30, list->at(1).from());
1795 CHECK_EQ(40, list->at(1).to());
1796 CHECK_EQ(50, list->at(2).from());
1797 CHECK_EQ(60, list->at(2).to());
1798
1799 list->Rewind(0);
1800 list->Add(CharacterRange::Range(10, 20), &zone);
1801 list->Add(CharacterRange::Range(50, 60), &zone);
1802 list->Add(CharacterRange::Range(30, 40), &zone);
1803 set.Canonicalize();
1804 CHECK_EQ(3, list->length());
1805 CHECK_EQ(10, list->at(0).from());
1806 CHECK_EQ(20, list->at(0).to());
1807 CHECK_EQ(30, list->at(1).from());
1808 CHECK_EQ(40, list->at(1).to());
1809 CHECK_EQ(50, list->at(2).from());
1810 CHECK_EQ(60, list->at(2).to());
1811
1812 list->Rewind(0);
1813 list->Add(CharacterRange::Range(30, 40), &zone);
1814 list->Add(CharacterRange::Range(10, 20), &zone);
1815 list->Add(CharacterRange::Range(25, 25), &zone);
1816 list->Add(CharacterRange::Range(100, 100), &zone);
1817 list->Add(CharacterRange::Range(1, 1), &zone);
1818 set.Canonicalize();
1819 CHECK_EQ(5, list->length());
1820 CHECK_EQ(1, list->at(0).from());
1821 CHECK_EQ(1, list->at(0).to());
1822 CHECK_EQ(10, list->at(1).from());
1823 CHECK_EQ(20, list->at(1).to());
1824 CHECK_EQ(25, list->at(2).from());
1825 CHECK_EQ(25, list->at(2).to());
1826 CHECK_EQ(30, list->at(3).from());
1827 CHECK_EQ(40, list->at(3).to());
1828 CHECK_EQ(100, list->at(4).from());
1829 CHECK_EQ(100, list->at(4).to());
1830
1831 list->Rewind(0);
1832 list->Add(CharacterRange::Range(10, 19), &zone);
1833 list->Add(CharacterRange::Range(21, 30), &zone);
1834 list->Add(CharacterRange::Range(20, 20), &zone);
1835 set.Canonicalize();
1836 CHECK_EQ(1, list->length());
1837 CHECK_EQ(10, list->at(0).from());
1838 CHECK_EQ(30, list->at(0).to());
1839 }
1840
1841
TEST(CharacterRangeMerge)1842 TEST(CharacterRangeMerge) {
1843 Zone zone(CcTest::i_isolate()->allocator());
1844 ZoneList<CharacterRange> l1(4, &zone);
1845 ZoneList<CharacterRange> l2(4, &zone);
1846 // Create all combinations of intersections of ranges, both singletons and
1847 // longer.
1848
1849 int offset = 0;
1850
1851 // The five kinds of singleton intersections:
1852 // X
1853 // Y - outside before
1854 // Y - outside touching start
1855 // Y - overlap
1856 // Y - outside touching end
1857 // Y - outside after
1858
1859 for (int i = 0; i < 5; i++) {
1860 l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1861 l2.Add(CharacterRange::Singleton(offset + i), &zone);
1862 offset += 6;
1863 }
1864
1865 // The seven kinds of singleton/non-singleton intersections:
1866 // XXX
1867 // Y - outside before
1868 // Y - outside touching start
1869 // Y - inside touching start
1870 // Y - entirely inside
1871 // Y - inside touching end
1872 // Y - outside touching end
1873 // Y - disjoint after
1874
1875 for (int i = 0; i < 7; i++) {
1876 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1877 l2.Add(CharacterRange::Singleton(offset + i), &zone);
1878 offset += 8;
1879 }
1880
1881 // The eleven kinds of non-singleton intersections:
1882 //
1883 // XXXXXXXX
1884 // YYYY - outside before.
1885 // YYYY - outside touching start.
1886 // YYYY - overlapping start
1887 // YYYY - inside touching start
1888 // YYYY - entirely inside
1889 // YYYY - inside touching end
1890 // YYYY - overlapping end
1891 // YYYY - outside touching end
1892 // YYYY - outside after
1893 // YYYYYYYY - identical
1894 // YYYYYYYYYYYY - containing entirely.
1895
1896 for (int i = 0; i < 9; i++) {
1897 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1898 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1899 offset += 22;
1900 }
1901 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1902 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1903 offset += 22;
1904 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1905 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1906 offset += 22;
1907
1908 // Different kinds of multi-range overlap:
1909 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1910 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1911
1912 l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1913 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1914 for (int i = 0; i < 6; i++) {
1915 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1916 l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1917 offset += 9;
1918 }
1919
1920 CHECK(CharacterRange::IsCanonical(&l1));
1921 CHECK(CharacterRange::IsCanonical(&l2));
1922
1923 ZoneList<CharacterRange> first_only(4, &zone);
1924 ZoneList<CharacterRange> second_only(4, &zone);
1925 ZoneList<CharacterRange> both(4, &zone);
1926 }
1927
1928
TEST(Graph)1929 TEST(Graph) {
1930 Execute("\\b\\w+\\b", false, true, true);
1931 }
1932
1933
1934 namespace {
1935
1936 int* global_use_counts = NULL;
1937
MockUseCounterCallback(v8::Isolate * isolate,v8::Isolate::UseCounterFeature feature)1938 void MockUseCounterCallback(v8::Isolate* isolate,
1939 v8::Isolate::UseCounterFeature feature) {
1940 ++global_use_counts[feature];
1941 }
1942 }
1943
1944
1945 // Test that ES2015 RegExp compatibility fixes are in place, that they
1946 // are not overly broad, and the appropriate UseCounters are incremented
TEST(UseCountRegExp)1947 TEST(UseCountRegExp) {
1948 v8::Isolate* isolate = CcTest::isolate();
1949 v8::HandleScope scope(isolate);
1950 LocalContext env;
1951 int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
1952 global_use_counts = use_counts;
1953 CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
1954
1955 // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
1956 v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
1957 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1958 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1959 CHECK(resultSticky->IsUndefined());
1960
1961 // re.sticky has approriate value and doesn't touch UseCounter
1962 v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
1963 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1964 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1965 CHECK(resultReSticky->IsFalse());
1966
1967 // When the getter is caleld on another object, throw an exception
1968 // and don't increment the UseCounter
1969 v8::Local<v8::Value> resultStickyError = CompileRun(
1970 "var exception;"
1971 "try { "
1972 " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
1973 " .get.call(null);"
1974 "} catch (e) {"
1975 " exception = e;"
1976 "}"
1977 "exception");
1978 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1979 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1980 CHECK(resultStickyError->IsObject());
1981
1982 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
1983 // a UseCounter is incremented to track it.
1984 v8::Local<v8::Value> resultToString =
1985 CompileRun("RegExp.prototype.toString().length");
1986 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1987 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1988 CHECK(resultToString->IsInt32());
1989 CHECK_EQ(6,
1990 resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1991
1992 // .toString() works on normal RegExps
1993 v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
1994 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1995 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1996 CHECK(resultReToString->IsInt32());
1997 CHECK_EQ(
1998 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1999
2000 // .toString() throws on non-RegExps that aren't RegExp.prototype
2001 v8::Local<v8::Value> resultToStringError = CompileRun(
2002 "var exception;"
2003 "try { RegExp.prototype.toString.call(null) }"
2004 "catch (e) { exception = e; }"
2005 "exception");
2006 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
2007 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
2008 CHECK(resultToStringError->IsObject());
2009 }
2010
2011 class UncachedExternalString
2012 : public v8::String::ExternalOneByteStringResource {
2013 public:
data() const2014 const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
length() const2015 size_t length() const override { return 26; }
IsCompressible() const2016 bool IsCompressible() const override { return true; }
2017 };
2018
TEST(UncachedExternalString)2019 TEST(UncachedExternalString) {
2020 v8::Isolate* isolate = CcTest::isolate();
2021 v8::HandleScope scope(isolate);
2022 LocalContext env;
2023 v8::Local<v8::String> external =
2024 v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
2025 .ToLocalChecked();
2026 CHECK(v8::Utils::OpenHandle(*external)->map() ==
2027 CcTest::i_isolate()->heap()->short_external_one_byte_string_map());
2028 v8::Local<v8::Object> global = env->Global();
2029 global->Set(env.local(), v8_str("external"), external).FromJust();
2030 CompileRun("var re = /y(.)/; re.test('ab');");
2031 ExpectString("external.substring(1).match(re)[1]", "z");
2032 }
2033