1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31
32 #include "v8.h"
33
34 #include "isolate.h"
35 #include "token.h"
36 #include "scanner.h"
37 #include "parser.h"
38 #include "utils.h"
39 #include "execution.h"
40 #include "preparser.h"
41 #include "cctest.h"
42
43 namespace i = ::v8::internal;
44
TEST(KeywordMatcher)45 TEST(KeywordMatcher) {
46 struct KeywordToken {
47 const char* keyword;
48 i::Token::Value token;
49 };
50
51 static const KeywordToken keywords[] = {
52 #define KEYWORD(t, s, d) { s, i::Token::t },
53 #define IGNORE(t, s, d) /* */
54 TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
55 #undef KEYWORD
56 { NULL, i::Token::IDENTIFIER }
57 };
58
59 static const char* future_keywords[] = {
60 #define FUTURE(t, s, d) s,
61 TOKEN_LIST(IGNORE, IGNORE, FUTURE)
62 #undef FUTURE
63 #undef IGNORE
64 NULL
65 };
66
67 KeywordToken key_token;
68 for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
69 i::KeywordMatcher matcher;
70 const char* keyword = key_token.keyword;
71 int length = i::StrLength(keyword);
72 for (int j = 0; j < length; j++) {
73 if (key_token.token == i::Token::INSTANCEOF && j == 2) {
74 // "in" is a prefix of "instanceof". It's the only keyword
75 // that is a prefix of another.
76 CHECK_EQ(i::Token::IN, matcher.token());
77 } else {
78 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
79 }
80 matcher.AddChar(keyword[j]);
81 }
82 CHECK_EQ(key_token.token, matcher.token());
83 // Adding more characters will make keyword matching fail.
84 matcher.AddChar('z');
85 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
86 // Adding a keyword later will not make it match again.
87 matcher.AddChar('i');
88 matcher.AddChar('f');
89 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
90 }
91
92 // Future keywords are not recognized.
93 const char* future_keyword;
94 for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
95 i::KeywordMatcher matcher;
96 int length = i::StrLength(future_keyword);
97 for (int j = 0; j < length; j++) {
98 matcher.AddChar(future_keyword[j]);
99 }
100 CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
101 }
102
103 // Zero isn't ignored at first.
104 i::KeywordMatcher bad_start;
105 bad_start.AddChar(0);
106 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
107 bad_start.AddChar('i');
108 bad_start.AddChar('f');
109 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
110
111 // Zero isn't ignored at end.
112 i::KeywordMatcher bad_end;
113 bad_end.AddChar('i');
114 bad_end.AddChar('f');
115 CHECK_EQ(i::Token::IF, bad_end.token());
116 bad_end.AddChar(0);
117 CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
118
119 // Case isn't ignored.
120 i::KeywordMatcher bad_case;
121 bad_case.AddChar('i');
122 bad_case.AddChar('F');
123 CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
124
125 // If we mark it as failure, continuing won't help.
126 i::KeywordMatcher full_stop;
127 full_stop.AddChar('i');
128 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
129 full_stop.Fail();
130 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
131 full_stop.AddChar('f');
132 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
133 }
134
135
TEST(ScanHTMLEndComments)136 TEST(ScanHTMLEndComments) {
137 v8::V8::Initialize();
138
139 // Regression test. See:
140 // http://code.google.com/p/chromium/issues/detail?id=53548
141 // Tests that --> is correctly interpreted as comment-to-end-of-line if there
142 // is only whitespace before it on the line, even after a multiline-comment
143 // comment. This was not the case if it occurred before the first real token
144 // in the input.
145 const char* tests[] = {
146 // Before first real token.
147 "--> is eol-comment\nvar y = 37;\n",
148 "\n --> is eol-comment\nvar y = 37;\n",
149 "/* precomment */ --> is eol-comment\nvar y = 37;\n",
150 "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
151 // After first real token.
152 "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
153 "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
154 NULL
155 };
156
157 // Parser/Scanner needs a stack limit.
158 int marker;
159 i::Isolate::Current()->stack_guard()->SetStackLimit(
160 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
161
162 for (int i = 0; tests[i]; i++) {
163 v8::ScriptData* data =
164 v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i]));
165 CHECK(data != NULL && !data->HasError());
166 delete data;
167 }
168 }
169
170
171 class ScriptResource : public v8::String::ExternalAsciiStringResource {
172 public:
ScriptResource(const char * data,size_t length)173 ScriptResource(const char* data, size_t length)
174 : data_(data), length_(length) { }
175
data() const176 const char* data() const { return data_; }
length() const177 size_t length() const { return length_; }
178
179 private:
180 const char* data_;
181 size_t length_;
182 };
183
184
TEST(Preparsing)185 TEST(Preparsing) {
186 v8::HandleScope handles;
187 v8::Persistent<v8::Context> context = v8::Context::New();
188 v8::Context::Scope context_scope(context);
189 int marker;
190 i::Isolate::Current()->stack_guard()->SetStackLimit(
191 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
192
193 // Source containing functions that might be lazily compiled and all types
194 // of symbols (string, propertyName, regexp).
195 const char* source =
196 "var x = 42;"
197 "function foo(a) { return function nolazy(b) { return a + b; } }"
198 "function bar(a) { if (a) return function lazy(b) { return b; } }"
199 "var z = {'string': 'string literal', bareword: 'propertyName', "
200 " 42: 'number literal', for: 'keyword as propertyName', "
201 " f\\u006fr: 'keyword propertyname with escape'};"
202 "var v = /RegExp Literal/;"
203 "var w = /RegExp Literal\\u0020With Escape/gin;"
204 "var y = { get getter() { return 42; }, "
205 " set setter(v) { this.value = v; }};";
206 int source_length = i::StrLength(source);
207 const char* error_source = "var x = y z;";
208 int error_source_length = i::StrLength(error_source);
209
210 v8::ScriptData* preparse =
211 v8::ScriptData::PreCompile(source, source_length);
212 CHECK(!preparse->HasError());
213 bool lazy_flag = i::FLAG_lazy;
214 {
215 i::FLAG_lazy = true;
216 ScriptResource* resource = new ScriptResource(source, source_length);
217 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
218 v8::Script::Compile(script_source, NULL, preparse);
219 }
220
221 {
222 i::FLAG_lazy = false;
223
224 ScriptResource* resource = new ScriptResource(source, source_length);
225 v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
226 v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
227 }
228 delete preparse;
229 i::FLAG_lazy = lazy_flag;
230
231 // Syntax error.
232 v8::ScriptData* error_preparse =
233 v8::ScriptData::PreCompile(error_source, error_source_length);
234 CHECK(error_preparse->HasError());
235 i::ScriptDataImpl *pre_impl =
236 reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
237 i::Scanner::Location error_location =
238 pre_impl->MessageLocation();
239 // Error is at "z" in source, location 10..11.
240 CHECK_EQ(10, error_location.beg_pos);
241 CHECK_EQ(11, error_location.end_pos);
242 // Should not crash.
243 const char* message = pre_impl->BuildMessage();
244 i::Vector<const char*> args = pre_impl->BuildArgs();
245 CHECK_GT(strlen(message), 0);
246 }
247
248
TEST(StandAlonePreParser)249 TEST(StandAlonePreParser) {
250 v8::V8::Initialize();
251
252 int marker;
253 i::Isolate::Current()->stack_guard()->SetStackLimit(
254 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
255
256 const char* programs[] = {
257 "{label: 42}",
258 "var x = 42;",
259 "function foo(x, y) { return x + y; }",
260 "native function foo(); return %ArgleBargle(glop);",
261 "var x = new new Function('this.x = 42');",
262 NULL
263 };
264
265 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
266 for (int i = 0; programs[i]; i++) {
267 const char* program = programs[i];
268 i::Utf8ToUC16CharacterStream stream(
269 reinterpret_cast<const i::byte*>(program),
270 static_cast<unsigned>(strlen(program)));
271 i::CompleteParserRecorder log;
272 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
273 scanner.Initialize(&stream);
274
275 v8::preparser::PreParser::PreParseResult result =
276 v8::preparser::PreParser::PreParseProgram(&scanner,
277 &log,
278 true,
279 stack_limit);
280 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
281 i::ScriptDataImpl data(log.ExtractData());
282 CHECK(!data.has_error());
283 }
284 }
285
286
TEST(RegressChromium62639)287 TEST(RegressChromium62639) {
288 v8::V8::Initialize();
289
290 int marker;
291 i::Isolate::Current()->stack_guard()->SetStackLimit(
292 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
293
294 const char* program = "var x = 'something';\n"
295 "escape: function() {}";
296 // Fails parsing expecting an identifier after "function".
297 // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
298 // and then used the invalid currently scanned literal. This always
299 // failed in debug mode, and sometimes crashed in release mode.
300
301 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
302 static_cast<unsigned>(strlen(program)));
303 i::ScriptDataImpl* data =
304 i::ParserApi::PreParse(&stream, NULL);
305 CHECK(data->HasError());
306 delete data;
307 }
308
309
TEST(Regress928)310 TEST(Regress928) {
311 v8::V8::Initialize();
312
313 // Preparsing didn't consider the catch clause of a try statement
314 // as with-content, which made it assume that a function inside
315 // the block could be lazily compiled, and an extra, unexpected,
316 // entry was added to the data.
317 int marker;
318 i::Isolate::Current()->stack_guard()->SetStackLimit(
319 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
320
321 const char* program =
322 "try { } catch (e) { var foo = function () { /* first */ } }"
323 "var bar = function () { /* second */ }";
324
325 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
326 static_cast<unsigned>(strlen(program)));
327 i::ScriptDataImpl* data =
328 i::ParserApi::PartialPreParse(&stream, NULL);
329 CHECK(!data->HasError());
330
331 data->Initialize();
332
333 int first_function =
334 static_cast<int>(strstr(program, "function") - program);
335 int first_lbrace = first_function + static_cast<int>(strlen("function () "));
336 CHECK_EQ('{', program[first_lbrace]);
337 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
338 CHECK(!entry1.is_valid());
339
340 int second_function =
341 static_cast<int>(strstr(program + first_lbrace, "function") - program);
342 int second_lbrace =
343 second_function + static_cast<int>(strlen("function () "));
344 CHECK_EQ('{', program[second_lbrace]);
345 i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
346 CHECK(entry2.is_valid());
347 CHECK_EQ('}', program[entry2.end_pos() - 1]);
348 delete data;
349 }
350
351
TEST(PreParseOverflow)352 TEST(PreParseOverflow) {
353 v8::V8::Initialize();
354
355 int marker;
356 i::Isolate::Current()->stack_guard()->SetStackLimit(
357 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
358
359 size_t kProgramSize = 1024 * 1024;
360 i::SmartPointer<char> program(
361 reinterpret_cast<char*>(malloc(kProgramSize + 1)));
362 memset(*program, '(', kProgramSize);
363 program[kProgramSize] = '\0';
364
365 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
366
367 i::Utf8ToUC16CharacterStream stream(
368 reinterpret_cast<const i::byte*>(*program),
369 static_cast<unsigned>(kProgramSize));
370 i::CompleteParserRecorder log;
371 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
372 scanner.Initialize(&stream);
373
374
375 v8::preparser::PreParser::PreParseResult result =
376 v8::preparser::PreParser::PreParseProgram(&scanner,
377 &log,
378 true,
379 stack_limit);
380 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
381 }
382
383
384 class TestExternalResource: public v8::String::ExternalStringResource {
385 public:
TestExternalResource(uint16_t * data,int length)386 explicit TestExternalResource(uint16_t* data, int length)
387 : data_(data), length_(static_cast<size_t>(length)) { }
388
~TestExternalResource()389 ~TestExternalResource() { }
390
data() const391 const uint16_t* data() const {
392 return data_;
393 }
394
length() const395 size_t length() const {
396 return length_;
397 }
398 private:
399 uint16_t* data_;
400 size_t length_;
401 };
402
403
404 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
405
TestCharacterStream(const char * ascii_source,unsigned length,unsigned start=0,unsigned end=0)406 void TestCharacterStream(const char* ascii_source,
407 unsigned length,
408 unsigned start = 0,
409 unsigned end = 0) {
410 if (end == 0) end = length;
411 unsigned sub_length = end - start;
412 i::HandleScope test_scope;
413 i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
414 for (unsigned i = 0; i < length; i++) {
415 uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
416 }
417 i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
418 i::Handle<i::String> ascii_string(
419 FACTORY->NewStringFromAscii(ascii_vector));
420 TestExternalResource resource(*uc16_buffer, length);
421 i::Handle<i::String> uc16_string(
422 FACTORY->NewExternalStringFromTwoByte(&resource));
423
424 i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
425 i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
426 i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
427 i::Utf8ToUC16CharacterStream utf8_stream(
428 reinterpret_cast<const i::byte*>(ascii_source), end);
429 utf8_stream.SeekForward(start);
430
431 unsigned i = start;
432 while (i < end) {
433 // Read streams one char at a time
434 CHECK_EQU(i, uc16_stream.pos());
435 CHECK_EQU(i, string_stream.pos());
436 CHECK_EQU(i, utf8_stream.pos());
437 int32_t c0 = ascii_source[i];
438 int32_t c1 = uc16_stream.Advance();
439 int32_t c2 = string_stream.Advance();
440 int32_t c3 = utf8_stream.Advance();
441 i++;
442 CHECK_EQ(c0, c1);
443 CHECK_EQ(c0, c2);
444 CHECK_EQ(c0, c3);
445 CHECK_EQU(i, uc16_stream.pos());
446 CHECK_EQU(i, string_stream.pos());
447 CHECK_EQU(i, utf8_stream.pos());
448 }
449 while (i > start + sub_length / 4) {
450 // Pushback, re-read, pushback again.
451 int32_t c0 = ascii_source[i - 1];
452 CHECK_EQU(i, uc16_stream.pos());
453 CHECK_EQU(i, string_stream.pos());
454 CHECK_EQU(i, utf8_stream.pos());
455 uc16_stream.PushBack(c0);
456 string_stream.PushBack(c0);
457 utf8_stream.PushBack(c0);
458 i--;
459 CHECK_EQU(i, uc16_stream.pos());
460 CHECK_EQU(i, string_stream.pos());
461 CHECK_EQU(i, utf8_stream.pos());
462 int32_t c1 = uc16_stream.Advance();
463 int32_t c2 = string_stream.Advance();
464 int32_t c3 = utf8_stream.Advance();
465 i++;
466 CHECK_EQU(i, uc16_stream.pos());
467 CHECK_EQU(i, string_stream.pos());
468 CHECK_EQU(i, utf8_stream.pos());
469 CHECK_EQ(c0, c1);
470 CHECK_EQ(c0, c2);
471 CHECK_EQ(c0, c3);
472 uc16_stream.PushBack(c0);
473 string_stream.PushBack(c0);
474 utf8_stream.PushBack(c0);
475 i--;
476 CHECK_EQU(i, uc16_stream.pos());
477 CHECK_EQU(i, string_stream.pos());
478 CHECK_EQU(i, utf8_stream.pos());
479 }
480 unsigned halfway = start + sub_length / 2;
481 uc16_stream.SeekForward(halfway - i);
482 string_stream.SeekForward(halfway - i);
483 utf8_stream.SeekForward(halfway - i);
484 i = halfway;
485 CHECK_EQU(i, uc16_stream.pos());
486 CHECK_EQU(i, string_stream.pos());
487 CHECK_EQU(i, utf8_stream.pos());
488
489 while (i < end) {
490 // Read streams one char at a time
491 CHECK_EQU(i, uc16_stream.pos());
492 CHECK_EQU(i, string_stream.pos());
493 CHECK_EQU(i, utf8_stream.pos());
494 int32_t c0 = ascii_source[i];
495 int32_t c1 = uc16_stream.Advance();
496 int32_t c2 = string_stream.Advance();
497 int32_t c3 = utf8_stream.Advance();
498 i++;
499 CHECK_EQ(c0, c1);
500 CHECK_EQ(c0, c2);
501 CHECK_EQ(c0, c3);
502 CHECK_EQU(i, uc16_stream.pos());
503 CHECK_EQU(i, string_stream.pos());
504 CHECK_EQU(i, utf8_stream.pos());
505 }
506
507 int32_t c1 = uc16_stream.Advance();
508 int32_t c2 = string_stream.Advance();
509 int32_t c3 = utf8_stream.Advance();
510 CHECK_LT(c1, 0);
511 CHECK_LT(c2, 0);
512 CHECK_LT(c3, 0);
513 }
514
515
TEST(CharacterStreams)516 TEST(CharacterStreams) {
517 v8::HandleScope handles;
518 v8::Persistent<v8::Context> context = v8::Context::New();
519 v8::Context::Scope context_scope(context);
520
521 TestCharacterStream("abc\0\n\r\x7f", 7);
522 static const unsigned kBigStringSize = 4096;
523 char buffer[kBigStringSize + 1];
524 for (unsigned i = 0; i < kBigStringSize; i++) {
525 buffer[i] = static_cast<char>(i & 0x7f);
526 }
527 TestCharacterStream(buffer, kBigStringSize);
528
529 TestCharacterStream(buffer, kBigStringSize, 576, 3298);
530
531 TestCharacterStream("\0", 1);
532 TestCharacterStream("", 0);
533 }
534
535
TEST(Utf8CharacterStream)536 TEST(Utf8CharacterStream) {
537 static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
538 static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
539
540 static const int kAllUtf8CharsSize =
541 (unibrow::Utf8::kMaxOneByteChar + 1) +
542 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
543 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
544 static const unsigned kAllUtf8CharsSizeU =
545 static_cast<unsigned>(kAllUtf8CharsSize);
546
547 char buffer[kAllUtf8CharsSizeU];
548 unsigned cursor = 0;
549 for (int i = 0; i <= kMaxUC16Char; i++) {
550 cursor += unibrow::Utf8::Encode(buffer + cursor, i);
551 }
552 ASSERT(cursor == kAllUtf8CharsSizeU);
553
554 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
555 kAllUtf8CharsSizeU);
556 for (int i = 0; i <= kMaxUC16Char; i++) {
557 CHECK_EQU(i, stream.pos());
558 int32_t c = stream.Advance();
559 CHECK_EQ(i, c);
560 CHECK_EQU(i + 1, stream.pos());
561 }
562 for (int i = kMaxUC16Char; i >= 0; i--) {
563 CHECK_EQU(i + 1, stream.pos());
564 stream.PushBack(i);
565 CHECK_EQU(i, stream.pos());
566 }
567 int i = 0;
568 while (stream.pos() < kMaxUC16CharU) {
569 CHECK_EQU(i, stream.pos());
570 unsigned progress = stream.SeekForward(12);
571 i += progress;
572 int32_t c = stream.Advance();
573 if (i <= kMaxUC16Char) {
574 CHECK_EQ(i, c);
575 } else {
576 CHECK_EQ(-1, c);
577 }
578 i += 1;
579 CHECK_EQU(i, stream.pos());
580 }
581 }
582
583 #undef CHECK_EQU
584
TestStreamScanner(i::UC16CharacterStream * stream,i::Token::Value * expected_tokens,int skip_pos=0,int skip_to=0)585 void TestStreamScanner(i::UC16CharacterStream* stream,
586 i::Token::Value* expected_tokens,
587 int skip_pos = 0, // Zero means not skipping.
588 int skip_to = 0) {
589 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
590 scanner.Initialize(stream);
591
592 int i = 0;
593 do {
594 i::Token::Value expected = expected_tokens[i];
595 i::Token::Value actual = scanner.Next();
596 CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
597 if (scanner.location().end_pos == skip_pos) {
598 scanner.SeekForward(skip_to);
599 }
600 i++;
601 } while (expected_tokens[i] != i::Token::ILLEGAL);
602 }
603
TEST(StreamScanner)604 TEST(StreamScanner) {
605 v8::V8::Initialize();
606
607 const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
608 i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
609 static_cast<unsigned>(strlen(str1)));
610 i::Token::Value expectations1[] = {
611 i::Token::LBRACE,
612 i::Token::IDENTIFIER,
613 i::Token::IDENTIFIER,
614 i::Token::FOR,
615 i::Token::COLON,
616 i::Token::MUL,
617 i::Token::DIV,
618 i::Token::LT,
619 i::Token::SUB,
620 i::Token::IDENTIFIER,
621 i::Token::EOS,
622 i::Token::ILLEGAL
623 };
624 TestStreamScanner(&stream1, expectations1, 0, 0);
625
626 const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
627 i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
628 static_cast<unsigned>(strlen(str2)));
629 i::Token::Value expectations2[] = {
630 i::Token::CASE,
631 i::Token::DEFAULT,
632 i::Token::CONST,
633 i::Token::LBRACE,
634 // Skipped part here
635 i::Token::RBRACE,
636 i::Token::DO,
637 i::Token::EOS,
638 i::Token::ILLEGAL
639 };
640 ASSERT_EQ('{', str2[19]);
641 ASSERT_EQ('}', str2[37]);
642 TestStreamScanner(&stream2, expectations2, 20, 37);
643
644 const char* str3 = "{}}}}";
645 i::Token::Value expectations3[] = {
646 i::Token::LBRACE,
647 i::Token::RBRACE,
648 i::Token::RBRACE,
649 i::Token::RBRACE,
650 i::Token::RBRACE,
651 i::Token::EOS,
652 i::Token::ILLEGAL
653 };
654 // Skip zero-four RBRACEs.
655 for (int i = 0; i <= 4; i++) {
656 expectations3[6 - i] = i::Token::ILLEGAL;
657 expectations3[5 - i] = i::Token::EOS;
658 i::Utf8ToUC16CharacterStream stream3(
659 reinterpret_cast<const i::byte*>(str3),
660 static_cast<unsigned>(strlen(str3)));
661 TestStreamScanner(&stream3, expectations3, 1, 1 + i);
662 }
663 }
664
665
TestScanRegExp(const char * re_source,const char * expected)666 void TestScanRegExp(const char* re_source, const char* expected) {
667 i::Utf8ToUC16CharacterStream stream(
668 reinterpret_cast<const i::byte*>(re_source),
669 static_cast<unsigned>(strlen(re_source)));
670 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
671 scanner.Initialize(&stream);
672
673 i::Token::Value start = scanner.peek();
674 CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
675 CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
676 scanner.Next(); // Current token is now the regexp literal.
677 CHECK(scanner.is_literal_ascii());
678 i::Vector<const char> actual = scanner.literal_ascii_string();
679 for (int i = 0; i < actual.length(); i++) {
680 CHECK_NE('\0', expected[i]);
681 CHECK_EQ(expected[i], actual[i]);
682 }
683 }
684
685
TEST(RegExpScanning)686 TEST(RegExpScanning) {
687 v8::V8::Initialize();
688
689 // RegExp token with added garbage at the end. The scanner should only
690 // scan the RegExp until the terminating slash just before "flipperwald".
691 TestScanRegExp("/b/flipperwald", "b");
692 // Incomplete escape sequences doesn't hide the terminating slash.
693 TestScanRegExp("/\\x/flipperwald", "\\x");
694 TestScanRegExp("/\\u/flipperwald", "\\u");
695 TestScanRegExp("/\\u1/flipperwald", "\\u1");
696 TestScanRegExp("/\\u12/flipperwald", "\\u12");
697 TestScanRegExp("/\\u123/flipperwald", "\\u123");
698 TestScanRegExp("/\\c/flipperwald", "\\c");
699 TestScanRegExp("/\\c//flipperwald", "\\c");
700 // Slashes inside character classes are not terminating.
701 TestScanRegExp("/[/]/flipperwald", "[/]");
702 TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
703 // Incomplete escape sequences inside a character class doesn't hide
704 // the end of the character class.
705 TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
706 TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
707 TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
708 TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
709 TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
710 TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
711 TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
712 TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
713 // Escaped ']'s wont end the character class.
714 TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
715 // Escaped slashes are not terminating.
716 TestScanRegExp("/\\//flipperwald", "\\/");
717 // Starting with '=' works too.
718 TestScanRegExp("/=/", "=");
719 TestScanRegExp("/=?/", "=?");
720 }
721