• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "RegexInterpreter.h"
28 
29 #include "RegexCompiler.h"
30 #include "RegexPattern.h"
31 
32 #ifndef NDEBUG
33 #include <stdio.h>
34 #endif
35 
36 #if ENABLE(YARR)
37 
38 using namespace WTF;
39 
40 namespace JSC { namespace Yarr {
41 
42 class Interpreter {
43 public:
44     struct ParenthesesDisjunctionContext;
45 
46     struct BackTrackInfoPatternCharacter {
47         uintptr_t matchAmount;
48     };
49     struct BackTrackInfoCharacterClass {
50         uintptr_t matchAmount;
51     };
52     struct BackTrackInfoBackReference {
53         uintptr_t begin; // Not really needed for greedy quantifiers.
54         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
55     };
56     struct BackTrackInfoAlternative {
57         uintptr_t offset;
58     };
59     struct BackTrackInfoParentheticalAssertion {
60         uintptr_t begin;
61     };
62     struct BackTrackInfoParenthesesOnce {
63         uintptr_t inParentheses;
64     };
65     struct BackTrackInfoParentheses {
66         uintptr_t matchAmount;
67         ParenthesesDisjunctionContext* lastContext;
68         uintptr_t prevBegin;
69         uintptr_t prevEnd;
70     };
71 
appendParenthesesDisjunctionContext(BackTrackInfoParentheses * backTrack,ParenthesesDisjunctionContext * context)72     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
73     {
74         context->next = backTrack->lastContext;
75         backTrack->lastContext = context;
76         ++backTrack->matchAmount;
77     }
78 
popParenthesesDisjunctionContext(BackTrackInfoParentheses * backTrack)79     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
80     {
81         ASSERT(backTrack->matchAmount);
82         ASSERT(backTrack->lastContext);
83         backTrack->lastContext = backTrack->lastContext->next;
84         --backTrack->matchAmount;
85     }
86 
87     struct DisjunctionContext
88     {
DisjunctionContextJSC::Yarr::Interpreter::DisjunctionContext89         DisjunctionContext()
90             : term(0)
91         {
92         }
93 
operator newJSC::Yarr::Interpreter::DisjunctionContext94         void* operator new(size_t, void* where)
95         {
96             return where;
97         }
98 
99         int term;
100         unsigned matchBegin;
101         unsigned matchEnd;
102         uintptr_t frame[1];
103     };
104 
allocDisjunctionContext(ByteDisjunction * disjunction)105     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
106     {
107         return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext();
108     }
109 
freeDisjunctionContext(DisjunctionContext * context)110     void freeDisjunctionContext(DisjunctionContext* context)
111     {
112         free(context);
113     }
114 
115     struct ParenthesesDisjunctionContext
116     {
ParenthesesDisjunctionContextJSC::Yarr::Interpreter::ParenthesesDisjunctionContext117         ParenthesesDisjunctionContext(int* output, ByteTerm& term)
118             : next(0)
119         {
120             unsigned firstSubpatternId = term.atom.subpatternId;
121             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
122 
123             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
124                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
125                 output[(firstSubpatternId << 1) + i] = -1;
126             }
127 
128             new(getDisjunctionContext(term)) DisjunctionContext();
129         }
130 
operator newJSC::Yarr::Interpreter::ParenthesesDisjunctionContext131         void* operator new(size_t, void* where)
132         {
133             return where;
134         }
135 
restoreOutputJSC::Yarr::Interpreter::ParenthesesDisjunctionContext136         void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
137         {
138             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
139                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
140         }
141 
getDisjunctionContextJSC::Yarr::Interpreter::ParenthesesDisjunctionContext142         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
143         {
144             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
145         }
146 
147         ParenthesesDisjunctionContext* next;
148         int subpatternBackup[1];
149     };
150 
allocParenthesesDisjunctionContext(ByteDisjunction * disjunction,int * output,ByteTerm & term)151     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
152     {
153         return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term);
154     }
155 
freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext * context)156     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
157     {
158         free(context);
159     }
160 
161     class InputStream {
162     public:
InputStream(const UChar * input,unsigned start,unsigned length)163         InputStream(const UChar* input, unsigned start, unsigned length)
164             : input(input)
165             , pos(start)
166             , length(length)
167         {
168         }
169 
next()170         void next()
171         {
172             ++pos;
173         }
174 
rewind(unsigned amount)175         void rewind(unsigned amount)
176         {
177             ASSERT(pos >= amount);
178             pos -= amount;
179         }
180 
read()181         int read()
182         {
183             ASSERT(pos < length);
184             if (pos < length)
185                 return input[pos];
186             return -1;
187         }
188 
readChecked(int position)189         int readChecked(int position)
190         {
191             ASSERT(position < 0);
192             ASSERT((unsigned)-position <= pos);
193             unsigned p = pos + position;
194             ASSERT(p < length);
195             return input[p];
196         }
197 
reread(unsigned from)198         int reread(unsigned from)
199         {
200             ASSERT(from < length);
201             return input[from];
202         }
203 
prev()204         int prev()
205         {
206             ASSERT(!(pos > length));
207             if (pos && length)
208                 return input[pos - 1];
209             return -1;
210         }
211 
getPos()212         unsigned getPos()
213         {
214             return pos;
215         }
216 
setPos(unsigned p)217         void setPos(unsigned p)
218         {
219             pos = p;
220         }
221 
atStart()222         bool atStart()
223         {
224             return pos == 0;
225         }
226 
atEnd()227         bool atEnd()
228         {
229             return pos == length;
230         }
231 
checkInput(int count)232         bool checkInput(int count)
233         {
234             if ((pos + count) <= length) {
235                 pos += count;
236                 return true;
237             } else
238                 return false;
239         }
240 
uncheckInput(int count)241         void uncheckInput(int count)
242         {
243             pos -= count;
244         }
245 
atStart(int position)246         bool atStart(int position)
247         {
248             return (pos + position) == 0;
249         }
250 
atEnd(int position)251         bool atEnd(int position)
252         {
253             return (pos + position) == length;
254         }
255 
256     private:
257         const UChar* input;
258         unsigned pos;
259         unsigned length;
260     };
261 
testCharacterClass(CharacterClass * characterClass,int ch)262     bool testCharacterClass(CharacterClass* characterClass, int ch)
263     {
264         if (ch & 0xFF80) {
265             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
266                 if (ch == characterClass->m_matchesUnicode[i])
267                     return true;
268             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
269                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
270                     return true;
271         } else {
272             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
273                 if (ch == characterClass->m_matches[i])
274                     return true;
275             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
276                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
277                     return true;
278         }
279 
280         return false;
281     }
282 
tryConsumeCharacter(int testChar)283     bool tryConsumeCharacter(int testChar)
284     {
285         if (input.atEnd())
286             return false;
287 
288         int ch = input.read();
289 
290         if (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch)) {
291             input.next();
292             return true;
293         }
294         return false;
295     }
296 
checkCharacter(int testChar,int inputPosition)297     bool checkCharacter(int testChar, int inputPosition)
298     {
299         return testChar == input.readChecked(inputPosition);
300     }
301 
checkCasedCharacter(int loChar,int hiChar,int inputPosition)302     bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
303     {
304         int ch = input.readChecked(inputPosition);
305         return (loChar == ch) || (hiChar == ch);
306     }
307 
tryConsumeCharacterClass(CharacterClass * characterClass,bool invert)308     bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert)
309     {
310         if (input.atEnd())
311             return false;
312 
313         bool match = testCharacterClass(characterClass, input.read());
314 
315         if (invert)
316             match = !match;
317 
318         if (match) {
319             input.next();
320             return true;
321         }
322         return false;
323     }
324 
checkCharacterClass(CharacterClass * characterClass,bool invert,int inputPosition)325     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
326     {
327         bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
328         return invert ? !match : match;
329     }
330 
tryConsumeBackReference(int matchBegin,int matchEnd,int inputOffset)331     bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
332     {
333         int matchSize = matchEnd - matchBegin;
334 
335         if (!input.checkInput(matchSize))
336             return false;
337 
338         for (int i = 0; i < matchSize; ++i) {
339             if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
340                 input.uncheckInput(matchSize);
341                 return false;
342             }
343         }
344 
345         return true;
346     }
347 
matchAssertionBOL(ByteTerm & term)348     bool matchAssertionBOL(ByteTerm& term)
349     {
350         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
351     }
352 
matchAssertionEOL(ByteTerm & term)353     bool matchAssertionEOL(ByteTerm& term)
354     {
355         if (term.inputPosition)
356             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
357         else
358             return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
359     }
360 
matchAssertionWordBoundary(ByteTerm & term)361     bool matchAssertionWordBoundary(ByteTerm& term)
362     {
363         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
364         bool readIsWordchar;
365         if (term.inputPosition)
366             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
367         else
368             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
369 
370         bool wordBoundary = prevIsWordchar != readIsWordchar;
371         return term.invert() ? !wordBoundary : wordBoundary;
372     }
373 
backtrackPatternCharacter(ByteTerm & term,DisjunctionContext * context)374     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
375     {
376         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
377 
378         switch (term.atom.quantityType) {
379         case QuantifierFixedCount:
380             break;
381 
382         case QuantifierGreedy:
383             if (backTrack->matchAmount) {
384                 --backTrack->matchAmount;
385                 input.uncheckInput(1);
386                 return true;
387             }
388             break;
389 
390         case QuantifierNonGreedy:
391             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
392                 ++backTrack->matchAmount;
393                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
394                     return true;
395             }
396             input.uncheckInput(backTrack->matchAmount);
397             break;
398         }
399 
400         return false;
401     }
402 
backtrackPatternCasedCharacter(ByteTerm & term,DisjunctionContext * context)403     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
404     {
405         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
406 
407         switch (term.atom.quantityType) {
408         case QuantifierFixedCount:
409             break;
410 
411         case QuantifierGreedy:
412             if (backTrack->matchAmount) {
413                 --backTrack->matchAmount;
414                 input.uncheckInput(1);
415                 return true;
416             }
417             break;
418 
419         case QuantifierNonGreedy:
420             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
421                 ++backTrack->matchAmount;
422                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
423                     return true;
424             }
425             input.uncheckInput(backTrack->matchAmount);
426             break;
427         }
428 
429         return false;
430     }
431 
matchCharacterClass(ByteTerm & term,DisjunctionContext * context)432     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
433     {
434         ASSERT(term.type == ByteTerm::TypeCharacterClass);
435         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
436 
437         switch (term.atom.quantityType) {
438         case QuantifierFixedCount: {
439             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
440                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
441                     return false;
442             }
443             return true;
444         }
445 
446         case QuantifierGreedy: {
447             unsigned matchAmount = 0;
448             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
449                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
450                     input.uncheckInput(1);
451                     break;
452                 }
453                 ++matchAmount;
454             }
455             backTrack->matchAmount = matchAmount;
456 
457             return true;
458         }
459 
460         case QuantifierNonGreedy:
461             backTrack->matchAmount = 0;
462             return true;
463         }
464 
465         ASSERT_NOT_REACHED();
466         return false;
467     }
468 
backtrackCharacterClass(ByteTerm & term,DisjunctionContext * context)469     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
470     {
471         ASSERT(term.type == ByteTerm::TypeCharacterClass);
472         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
473 
474         switch (term.atom.quantityType) {
475         case QuantifierFixedCount:
476             break;
477 
478         case QuantifierGreedy:
479             if (backTrack->matchAmount) {
480                 --backTrack->matchAmount;
481                 input.uncheckInput(1);
482                 return true;
483             }
484             break;
485 
486         case QuantifierNonGreedy:
487             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
488                 ++backTrack->matchAmount;
489                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
490                     return true;
491             }
492             input.uncheckInput(backTrack->matchAmount);
493             break;
494         }
495 
496         return false;
497     }
498 
matchBackReference(ByteTerm & term,DisjunctionContext * context)499     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
500     {
501         ASSERT(term.type == ByteTerm::TypeBackReference);
502         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
503 
504         int matchBegin = output[(term.atom.subpatternId << 1)];
505         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
506         ASSERT((matchBegin == -1) == (matchEnd == -1));
507         ASSERT(matchBegin <= matchEnd);
508 
509         if (matchBegin == matchEnd)
510             return true;
511 
512         switch (term.atom.quantityType) {
513         case QuantifierFixedCount: {
514             backTrack->begin = input.getPos();
515             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
516                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
517                     input.setPos(backTrack->begin);
518                     return false;
519                 }
520             }
521             return true;
522         }
523 
524         case QuantifierGreedy: {
525             unsigned matchAmount = 0;
526             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
527                 ++matchAmount;
528             backTrack->matchAmount = matchAmount;
529             return true;
530         }
531 
532         case QuantifierNonGreedy:
533             backTrack->begin = input.getPos();
534             backTrack->matchAmount = 0;
535             return true;
536         }
537 
538         ASSERT_NOT_REACHED();
539         return false;
540     }
541 
backtrackBackReference(ByteTerm & term,DisjunctionContext * context)542     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
543     {
544         ASSERT(term.type == ByteTerm::TypeBackReference);
545         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
546 
547         int matchBegin = output[(term.atom.subpatternId << 1)];
548         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
549         ASSERT((matchBegin == -1) == (matchEnd == -1));
550         ASSERT(matchBegin <= matchEnd);
551 
552         if (matchBegin == matchEnd)
553             return false;
554 
555         switch (term.atom.quantityType) {
556         case QuantifierFixedCount:
557             // for quantityCount == 1, could rewind.
558             input.setPos(backTrack->begin);
559             break;
560 
561         case QuantifierGreedy:
562             if (backTrack->matchAmount) {
563                 --backTrack->matchAmount;
564                 input.rewind(matchEnd - matchBegin);
565                 return true;
566             }
567             break;
568 
569         case QuantifierNonGreedy:
570             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
571                 ++backTrack->matchAmount;
572                 return true;
573             } else
574                 input.setPos(backTrack->begin);
575             break;
576         }
577 
578         return false;
579     }
580 
recordParenthesesMatch(ByteTerm & term,ParenthesesDisjunctionContext * context)581     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
582     {
583         if (term.capture()) {
584             unsigned subpatternId = term.atom.subpatternId;
585             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
586             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
587         }
588     }
resetMatches(ByteTerm & term,ParenthesesDisjunctionContext * context)589     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
590     {
591         unsigned firstSubpatternId = term.atom.subpatternId;
592         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
593         context->restoreOutput(output, firstSubpatternId, count);
594     }
resetAssertionMatches(ByteTerm & term)595     void resetAssertionMatches(ByteTerm& term)
596     {
597         unsigned firstSubpatternId = term.atom.subpatternId;
598         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
599         for (unsigned i = 0; i < (count << 1); ++i)
600             output[(firstSubpatternId << 1) + i] = -1;
601     }
parenthesesDoBacktrack(ByteTerm & term,BackTrackInfoParentheses * backTrack)602     bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
603     {
604         while (backTrack->matchAmount) {
605             ParenthesesDisjunctionContext* context = backTrack->lastContext;
606 
607             if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
608                 return true;
609 
610             resetMatches(term, context);
611             popParenthesesDisjunctionContext(backTrack);
612             freeParenthesesDisjunctionContext(context);
613         }
614 
615         return false;
616     }
617 
matchParenthesesOnceBegin(ByteTerm & term,DisjunctionContext * context)618     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
619     {
620         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
621         ASSERT(term.atom.quantityCount == 1);
622 
623         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
624 
625         switch (term.atom.quantityType) {
626         case QuantifierGreedy: {
627             // set this speculatively; if we get to the parens end this will be true.
628             backTrack->inParentheses = 1;
629             break;
630         }
631         case QuantifierNonGreedy: {
632             backTrack->inParentheses = 0;
633             context->term += term.atom.parenthesesWidth;
634             return true;
635         }
636         case QuantifierFixedCount:
637             break;
638         }
639 
640         if (term.capture()) {
641             unsigned subpatternId = term.atom.subpatternId;
642             output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
643         }
644 
645         return true;
646     }
647 
matchParenthesesOnceEnd(ByteTerm & term,DisjunctionContext *)648     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*)
649     {
650         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
651         ASSERT(term.atom.quantityCount == 1);
652 
653         if (term.capture()) {
654             unsigned subpatternId = term.atom.subpatternId;
655             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
656         }
657         return true;
658     }
659 
backtrackParenthesesOnceBegin(ByteTerm & term,DisjunctionContext * context)660     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
661     {
662         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
663         ASSERT(term.atom.quantityCount == 1);
664 
665         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
666 
667         if (term.capture()) {
668             unsigned subpatternId = term.atom.subpatternId;
669             output[(subpatternId << 1)] = -1;
670             output[(subpatternId << 1) + 1] = -1;
671         }
672 
673         switch (term.atom.quantityType) {
674         case QuantifierGreedy:
675             // if we backtrack to this point, there is another chance - try matching nothing.
676             ASSERT(backTrack->inParentheses);
677             backTrack->inParentheses = 0;
678             context->term += term.atom.parenthesesWidth;
679             return true;
680         case QuantifierNonGreedy:
681             ASSERT(backTrack->inParentheses);
682         case QuantifierFixedCount:
683             break;
684         }
685 
686         return false;
687     }
688 
backtrackParenthesesOnceEnd(ByteTerm & term,DisjunctionContext * context)689     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
690     {
691         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
692         ASSERT(term.atom.quantityCount == 1);
693 
694         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
695 
696         switch (term.atom.quantityType) {
697         case QuantifierGreedy:
698             if (!backTrack->inParentheses) {
699                 context->term -= term.atom.parenthesesWidth;
700                 return false;
701             }
702         case QuantifierNonGreedy:
703             if (!backTrack->inParentheses) {
704                 // now try to match the parens; set this speculatively.
705                 backTrack->inParentheses = 1;
706                 if (term.capture()) {
707                     unsigned subpatternId = term.atom.subpatternId;
708                     output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
709                 }
710                 context->term -= term.atom.parenthesesWidth;
711                 return true;
712             }
713         case QuantifierFixedCount:
714             break;
715         }
716 
717         return false;
718     }
719 
matchParentheticalAssertionBegin(ByteTerm & term,DisjunctionContext * context)720     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
721     {
722         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
723         ASSERT(term.atom.quantityCount == 1);
724 
725         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
726 
727         backTrack->begin = input.getPos();
728         return true;
729     }
730 
matchParentheticalAssertionEnd(ByteTerm & term,DisjunctionContext * context)731     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
732     {
733         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
734         ASSERT(term.atom.quantityCount == 1);
735 
736         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
737 
738         input.setPos(backTrack->begin);
739 
740         // We've reached the end of the parens; if they are inverted, this is failure.
741         if (term.invert()) {
742             context->term -= term.atom.parenthesesWidth;
743             return false;
744         }
745 
746         return true;
747     }
748 
backtrackParentheticalAssertionBegin(ByteTerm & term,DisjunctionContext * context)749     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
750     {
751         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
752         ASSERT(term.atom.quantityCount == 1);
753 
754         // We've failed to match parens; if they are inverted, this is win!
755         if (term.invert()) {
756             context->term += term.atom.parenthesesWidth;
757             return true;
758         }
759 
760         return false;
761     }
762 
backtrackParentheticalAssertionEnd(ByteTerm & term,DisjunctionContext * context)763     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
764     {
765         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
766         ASSERT(term.atom.quantityCount == 1);
767 
768         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
769 
770         input.setPos(backTrack->begin);
771 
772         context->term -= term.atom.parenthesesWidth;
773         return false;
774     }
775 
matchParentheses(ByteTerm & term,DisjunctionContext * context)776     bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
777     {
778         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
779 
780         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
781 
782         unsigned subpatternId = term.atom.subpatternId;
783         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
784 
785         backTrack->prevBegin = output[(subpatternId << 1)];
786         backTrack->prevEnd = output[(subpatternId << 1) + 1];
787 
788         backTrack->matchAmount = 0;
789         backTrack->lastContext = 0;
790 
791         switch (term.atom.quantityType) {
792         case QuantifierFixedCount: {
793             // While we haven't yet reached our fixed limit,
794             while (backTrack->matchAmount < term.atom.quantityCount) {
795                 // Try to do a match, and it it succeeds, add it to the list.
796                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
797                 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
798                     appendParenthesesDisjunctionContext(backTrack, context);
799                 else {
800                     // The match failed; try to find an alternate point to carry on from.
801                     resetMatches(term, context);
802                     freeParenthesesDisjunctionContext(context);
803                     if (!parenthesesDoBacktrack(term, backTrack))
804                         return false;
805                 }
806             }
807 
808             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
809             ParenthesesDisjunctionContext* context = backTrack->lastContext;
810             recordParenthesesMatch(term, context);
811             return true;
812         }
813 
814         case QuantifierGreedy: {
815             while (backTrack->matchAmount < term.atom.quantityCount) {
816                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
817                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
818                     appendParenthesesDisjunctionContext(backTrack, context);
819                 else {
820                     resetMatches(term, context);
821                     freeParenthesesDisjunctionContext(context);
822                     break;
823                 }
824             }
825 
826             if (backTrack->matchAmount) {
827                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
828                 recordParenthesesMatch(term, context);
829             }
830             return true;
831         }
832 
833         case QuantifierNonGreedy:
834             return true;
835         }
836 
837         ASSERT_NOT_REACHED();
838         return false;
839     }
840 
841     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
842     //
843     // Greedy matches never should try just adding more - you should already have done
844     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
845     // you backtrack an item off the list needs checking, since we'll never have matched
846     // the one less case.  Tracking forwards, still add as much as possible.
847     //
848     // Non-greedy, we've already done the one less case, so don't match on popping.
849     // We haven't done the one more case, so always try to add that.
850     //
backtrackParentheses(ByteTerm & term,DisjunctionContext * context)851     bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
852     {
853         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
854 
855         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
856 
857         if (term.capture()) {
858             unsigned subpatternId = term.atom.subpatternId;
859             output[(subpatternId << 1)] = backTrack->prevBegin;
860             output[(subpatternId << 1) + 1] = backTrack->prevEnd;
861         }
862 
863         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
864 
865         switch (term.atom.quantityType) {
866         case QuantifierFixedCount: {
867             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
868 
869             ParenthesesDisjunctionContext* context = 0;
870 
871             if (!parenthesesDoBacktrack(term, backTrack))
872                 return false;
873 
874             // While we haven't yet reached our fixed limit,
875             while (backTrack->matchAmount < term.atom.quantityCount) {
876                 // Try to do a match, and it it succeeds, add it to the list.
877                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
878                 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
879                     appendParenthesesDisjunctionContext(backTrack, context);
880                 else {
881                     // The match failed; try to find an alternate point to carry on from.
882                     resetMatches(term, context);
883                     freeParenthesesDisjunctionContext(context);
884                     if (!parenthesesDoBacktrack(term, backTrack))
885                         return false;
886                 }
887             }
888 
889             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
890             context = backTrack->lastContext;
891             recordParenthesesMatch(term, context);
892             return true;
893         }
894 
895         case QuantifierGreedy: {
896             if (!backTrack->matchAmount)
897                 return false;
898 
899             ParenthesesDisjunctionContext* context = backTrack->lastContext;
900             if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
901                 while (backTrack->matchAmount < term.atom.quantityCount) {
902                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
903                     if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
904                         appendParenthesesDisjunctionContext(backTrack, context);
905                     else {
906                         resetMatches(term, context);
907                         freeParenthesesDisjunctionContext(context);
908                         break;
909                     }
910                 }
911             } else {
912                 resetMatches(term, context);
913                 popParenthesesDisjunctionContext(backTrack);
914                 freeParenthesesDisjunctionContext(context);
915             }
916 
917             if (backTrack->matchAmount) {
918                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
919                 recordParenthesesMatch(term, context);
920             }
921             return true;
922         }
923 
924         case QuantifierNonGreedy: {
925             // If we've not reached the limit, try to add one more match.
926             if (backTrack->matchAmount < term.atom.quantityCount) {
927                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
928                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
929                     appendParenthesesDisjunctionContext(backTrack, context);
930                     recordParenthesesMatch(term, context);
931                     return true;
932                 } else {
933                     resetMatches(term, context);
934                     freeParenthesesDisjunctionContext(context);
935                 }
936             }
937 
938             // Nope - okay backtrack looking for an alternative.
939             while (backTrack->matchAmount) {
940                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
941                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
942                     // successful backtrack! we're back in the game!
943                     if (backTrack->matchAmount) {
944                         context = backTrack->lastContext;
945                         recordParenthesesMatch(term, context);
946                     }
947                     return true;
948                 }
949 
950                 // pop a match off the stack
951                 resetMatches(term, context);
952                 popParenthesesDisjunctionContext(backTrack);
953                 freeParenthesesDisjunctionContext(context);
954             }
955 
956             return false;
957         }
958         }
959 
960         ASSERT_NOT_REACHED();
961         return false;
962     }
963 
964 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
965 #define BACKTRACK() { --context->term; goto backtrack; }
966 #define currentTerm() (disjunction->terms[context->term])
matchDisjunction(ByteDisjunction * disjunction,DisjunctionContext * context,bool btrack=false)967     bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
968     {
969         if (btrack)
970             BACKTRACK();
971 
972         context->matchBegin = input.getPos();
973         context->term = 0;
974 
975     matchAgain:
976         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
977 
978         switch (currentTerm().type) {
979         case ByteTerm::TypeSubpatternBegin:
980             MATCH_NEXT();
981         case ByteTerm::TypeSubpatternEnd:
982             context->matchEnd = input.getPos();
983             return true;
984 
985         case ByteTerm::TypeBodyAlternativeBegin:
986             MATCH_NEXT();
987         case ByteTerm::TypeBodyAlternativeDisjunction:
988         case ByteTerm::TypeBodyAlternativeEnd:
989             context->matchEnd = input.getPos();
990             return true;
991 
992         case ByteTerm::TypeAlternativeBegin:
993             MATCH_NEXT();
994         case ByteTerm::TypeAlternativeDisjunction:
995         case ByteTerm::TypeAlternativeEnd: {
996             int offset = currentTerm().alternative.end;
997             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
998             backTrack->offset = offset;
999             context->term += offset;
1000             MATCH_NEXT();
1001         }
1002 
1003         case ByteTerm::TypeAssertionBOL:
1004             if (matchAssertionBOL(currentTerm()))
1005                 MATCH_NEXT();
1006             BACKTRACK();
1007         case ByteTerm::TypeAssertionEOL:
1008             if (matchAssertionEOL(currentTerm()))
1009                 MATCH_NEXT();
1010             BACKTRACK();
1011         case ByteTerm::TypeAssertionWordBoundary:
1012             if (matchAssertionWordBoundary(currentTerm()))
1013                 MATCH_NEXT();
1014             BACKTRACK();
1015 
1016         case ByteTerm::TypePatternCharacterOnce:
1017         case ByteTerm::TypePatternCharacterFixed: {
1018             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1019                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
1020                     BACKTRACK();
1021             }
1022             MATCH_NEXT();
1023         }
1024         case ByteTerm::TypePatternCharacterGreedy: {
1025             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1026             unsigned matchAmount = 0;
1027             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1028                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
1029                     input.uncheckInput(1);
1030                     break;
1031                 }
1032                 ++matchAmount;
1033             }
1034             backTrack->matchAmount = matchAmount;
1035 
1036             MATCH_NEXT();
1037         }
1038         case ByteTerm::TypePatternCharacterNonGreedy: {
1039             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1040             backTrack->matchAmount = 0;
1041             MATCH_NEXT();
1042         }
1043 
1044         case ByteTerm::TypePatternCasedCharacterOnce:
1045         case ByteTerm::TypePatternCasedCharacterFixed: {
1046             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1047                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
1048                     BACKTRACK();
1049             }
1050             MATCH_NEXT();
1051         }
1052         case ByteTerm::TypePatternCasedCharacterGreedy: {
1053             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1054             unsigned matchAmount = 0;
1055             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1056                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
1057                     input.uncheckInput(1);
1058                     break;
1059                 }
1060                 ++matchAmount;
1061             }
1062             backTrack->matchAmount = matchAmount;
1063 
1064             MATCH_NEXT();
1065         }
1066         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1067             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1068             backTrack->matchAmount = 0;
1069             MATCH_NEXT();
1070         }
1071 
1072         case ByteTerm::TypeCharacterClass:
1073             if (matchCharacterClass(currentTerm(), context))
1074                 MATCH_NEXT();
1075             BACKTRACK();
1076         case ByteTerm::TypeBackReference:
1077             if (matchBackReference(currentTerm(), context))
1078                 MATCH_NEXT();
1079             BACKTRACK();
1080         case ByteTerm::TypeParenthesesSubpattern:
1081             if (matchParentheses(currentTerm(), context))
1082                 MATCH_NEXT();
1083             BACKTRACK();
1084         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1085             if (matchParenthesesOnceBegin(currentTerm(), context))
1086                 MATCH_NEXT();
1087             BACKTRACK();
1088         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1089             if (matchParenthesesOnceEnd(currentTerm(), context))
1090                 MATCH_NEXT();
1091             BACKTRACK();
1092         case ByteTerm::TypeParentheticalAssertionBegin:
1093             if (matchParentheticalAssertionBegin(currentTerm(), context))
1094                 MATCH_NEXT();
1095             BACKTRACK();
1096         case ByteTerm::TypeParentheticalAssertionEnd:
1097             if (matchParentheticalAssertionEnd(currentTerm(), context))
1098                 MATCH_NEXT();
1099             BACKTRACK();
1100 
1101         case ByteTerm::TypeCheckInput:
1102             if (input.checkInput(currentTerm().checkInputCount))
1103                 MATCH_NEXT();
1104             BACKTRACK();
1105         }
1106 
1107         // We should never fall-through to here.
1108         ASSERT_NOT_REACHED();
1109 
1110     backtrack:
1111         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1112 
1113         switch (currentTerm().type) {
1114         case ByteTerm::TypeSubpatternBegin:
1115             return false;
1116         case ByteTerm::TypeSubpatternEnd:
1117             ASSERT_NOT_REACHED();
1118 
1119         case ByteTerm::TypeBodyAlternativeBegin:
1120         case ByteTerm::TypeBodyAlternativeDisjunction: {
1121             int offset = currentTerm().alternative.next;
1122             context->term += offset;
1123             if (offset > 0)
1124                 MATCH_NEXT();
1125 
1126             if (input.atEnd())
1127                 return false;
1128 
1129             input.next();
1130             context->matchBegin = input.getPos();
1131             MATCH_NEXT();
1132         }
1133         case ByteTerm::TypeBodyAlternativeEnd:
1134             ASSERT_NOT_REACHED();
1135 
1136             case ByteTerm::TypeAlternativeBegin:
1137             case ByteTerm::TypeAlternativeDisjunction: {
1138                 int offset = currentTerm().alternative.next;
1139                 context->term += offset;
1140                 if (offset > 0)
1141                     MATCH_NEXT();
1142                 BACKTRACK();
1143             }
1144             case ByteTerm::TypeAlternativeEnd: {
1145                 // We should never backtrack back into an alternative of the main body of the regex.
1146                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1147                 unsigned offset = backTrack->offset;
1148                 context->term -= offset;
1149                 BACKTRACK();
1150             }
1151 
1152             case ByteTerm::TypeAssertionBOL:
1153             case ByteTerm::TypeAssertionEOL:
1154             case ByteTerm::TypeAssertionWordBoundary:
1155                 BACKTRACK();
1156 
1157             case ByteTerm::TypePatternCharacterOnce:
1158             case ByteTerm::TypePatternCharacterFixed:
1159             case ByteTerm::TypePatternCharacterGreedy:
1160             case ByteTerm::TypePatternCharacterNonGreedy:
1161                 if (backtrackPatternCharacter(currentTerm(), context))
1162                     MATCH_NEXT();
1163                 BACKTRACK();
1164             case ByteTerm::TypePatternCasedCharacterOnce:
1165             case ByteTerm::TypePatternCasedCharacterFixed:
1166             case ByteTerm::TypePatternCasedCharacterGreedy:
1167             case ByteTerm::TypePatternCasedCharacterNonGreedy:
1168                 if (backtrackPatternCasedCharacter(currentTerm(), context))
1169                     MATCH_NEXT();
1170                 BACKTRACK();
1171             case ByteTerm::TypeCharacterClass:
1172                 if (backtrackCharacterClass(currentTerm(), context))
1173                     MATCH_NEXT();
1174                 BACKTRACK();
1175             case ByteTerm::TypeBackReference:
1176                 if (backtrackBackReference(currentTerm(), context))
1177                     MATCH_NEXT();
1178                 BACKTRACK();
1179             case ByteTerm::TypeParenthesesSubpattern:
1180                 if (backtrackParentheses(currentTerm(), context))
1181                     MATCH_NEXT();
1182                 BACKTRACK();
1183             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1184                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
1185                     MATCH_NEXT();
1186                 BACKTRACK();
1187             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1188                 if (backtrackParenthesesOnceEnd(currentTerm(), context))
1189                     MATCH_NEXT();
1190                 BACKTRACK();
1191             case ByteTerm::TypeParentheticalAssertionBegin:
1192                 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1193                     MATCH_NEXT();
1194                 BACKTRACK();
1195             case ByteTerm::TypeParentheticalAssertionEnd:
1196                 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1197                     MATCH_NEXT();
1198                 BACKTRACK();
1199 
1200             case ByteTerm::TypeCheckInput:
1201                 input.uncheckInput(currentTerm().checkInputCount);
1202                 BACKTRACK();
1203         }
1204 
1205         ASSERT_NOT_REACHED();
1206         return false;
1207     }
1208 
matchNonZeroDisjunction(ByteDisjunction * disjunction,DisjunctionContext * context,bool btrack=false)1209     bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1210     {
1211         if (matchDisjunction(disjunction, context, btrack)) {
1212             while (context->matchBegin == context->matchEnd) {
1213                 if (!matchDisjunction(disjunction, context, true))
1214                     return false;
1215             }
1216             return true;
1217         }
1218 
1219         return false;
1220     }
1221 
interpret()1222     int interpret()
1223     {
1224         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
1225             output[i] = -1;
1226 
1227         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1228 
1229         if (matchDisjunction(pattern->m_body.get(), context)) {
1230             output[0] = context->matchBegin;
1231             output[1] = context->matchEnd;
1232         }
1233 
1234         freeDisjunctionContext(context);
1235 
1236         return output[0];
1237     }
1238 
Interpreter(BytecodePattern * pattern,int * output,const UChar * inputChar,unsigned start,unsigned length)1239     Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
1240         : pattern(pattern)
1241         , output(output)
1242         , input(inputChar, start, length)
1243     {
1244     }
1245 
1246 private:
1247     BytecodePattern *pattern;
1248     int* output;
1249     InputStream input;
1250 };
1251 
1252 
1253 
1254 class ByteCompiler {
1255     struct ParenthesesStackEntry {
1256         unsigned beginTerm;
1257         unsigned savedAlternativeIndex;
ParenthesesStackEntryJSC::Yarr::ByteCompiler::ParenthesesStackEntry1258         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1259             : beginTerm(beginTerm)
1260             , savedAlternativeIndex(savedAlternativeIndex)
1261         {
1262         }
1263     };
1264 
1265 public:
ByteCompiler(RegexPattern & pattern)1266     ByteCompiler(RegexPattern& pattern)
1267         : m_pattern(pattern)
1268     {
1269         m_bodyDisjunction = 0;
1270         m_currentAlternativeIndex = 0;
1271     }
1272 
compile()1273     BytecodePattern* compile()
1274     {
1275         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize);
1276         emitDisjunction(m_pattern.m_body);
1277         regexEnd();
1278 
1279         return new BytecodePattern(m_bodyDisjunction, m_allParenthesesInfo, m_pattern);
1280     }
1281 
checkInput(unsigned count)1282     void checkInput(unsigned count)
1283     {
1284         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1285     }
1286 
assertionBOL(int inputPosition)1287     void assertionBOL(int inputPosition)
1288     {
1289         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1290     }
1291 
assertionEOL(int inputPosition)1292     void assertionEOL(int inputPosition)
1293     {
1294         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1295     }
1296 
assertionWordBoundary(bool invert,int inputPosition)1297     void assertionWordBoundary(bool invert, int inputPosition)
1298     {
1299         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1300     }
1301 
atomPatternCharacter(UChar ch,int inputPosition,unsigned frameLocation,unsigned quantityCount,QuantifierType quantityType)1302     void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1303     {
1304         if (m_pattern.m_ignoreCase) {
1305             UChar lo = Unicode::toLower(ch);
1306             UChar hi = Unicode::toUpper(ch);
1307 
1308             if (lo != hi) {
1309                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1310                 return;
1311             }
1312         }
1313 
1314         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1315     }
1316 
atomCharacterClass(CharacterClass * characterClass,bool invert,int inputPosition,unsigned frameLocation,unsigned quantityCount,QuantifierType quantityType)1317     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1318     {
1319         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1320 
1321         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1322         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1323         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1324     }
1325 
atomBackReference(unsigned subpatternId,int inputPosition,unsigned frameLocation,unsigned quantityCount,QuantifierType quantityType)1326     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1327     {
1328         ASSERT(subpatternId);
1329 
1330         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1331 
1332         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1333         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1334         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1335     }
1336 
atomParenthesesSubpatternBegin(unsigned subpatternId,bool capture,int inputPosition,unsigned frameLocation,unsigned alternativeFrameLocation)1337     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1338     {
1339         int beginTerm = m_bodyDisjunction->terms.size();
1340 
1341         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
1342         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1343         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1344         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1345 
1346         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1347         m_currentAlternativeIndex = beginTerm + 1;
1348     }
1349 
atomParentheticalAssertionBegin(unsigned subpatternId,bool invert,unsigned frameLocation,unsigned alternativeFrameLocation)1350     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1351     {
1352         int beginTerm = m_bodyDisjunction->terms.size();
1353 
1354         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
1355         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1356         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1357         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1358 
1359         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1360         m_currentAlternativeIndex = beginTerm + 1;
1361     }
1362 
popParenthesesStack()1363     unsigned popParenthesesStack()
1364     {
1365         ASSERT(m_parenthesesStack.size());
1366         int stackEnd = m_parenthesesStack.size() - 1;
1367         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1368         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1369         m_parenthesesStack.shrink(stackEnd);
1370 
1371         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1372         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1373 
1374         return beginTerm;
1375     }
1376 
1377 #ifndef NDEBUG
dumpDisjunction(ByteDisjunction * disjunction)1378     void dumpDisjunction(ByteDisjunction* disjunction)
1379     {
1380         printf("ByteDisjunction(%p):\n\t", disjunction);
1381         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1382             printf("{ %d } ", disjunction->terms[i].type);
1383         printf("\n");
1384     }
1385 #endif
1386 
closeAlternative(int beginTerm)1387     void closeAlternative(int beginTerm)
1388     {
1389         int origBeginTerm = beginTerm;
1390         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1391         int endIndex = m_bodyDisjunction->terms.size();
1392 
1393         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1394 
1395         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1396             m_bodyDisjunction->terms.remove(beginTerm);
1397         else {
1398             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1399                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1400                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1401                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1402                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1403             }
1404 
1405             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1406 
1407             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1408             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1409         }
1410     }
1411 
closeBodyAlternative()1412     void closeBodyAlternative()
1413     {
1414         int beginTerm = 0;
1415         int origBeginTerm = 0;
1416         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1417         int endIndex = m_bodyDisjunction->terms.size();
1418 
1419         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1420 
1421         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1422             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1423             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1424             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1425             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1426         }
1427 
1428         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1429 
1430         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1431         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1432     }
1433 
atomParenthesesEnd(bool doInline,unsigned lastSubpatternId,int inputPosition,unsigned frameLocation,unsigned quantityCount,QuantifierType quantityType,unsigned callFrameSize=0)1434     void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1435     {
1436         unsigned beginTerm = popParenthesesStack();
1437         closeAlternative(beginTerm + 1);
1438         unsigned endTerm = m_bodyDisjunction->terms.size();
1439 
1440         bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
1441         bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
1442         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1443 
1444         m_bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
1445         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1446         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1447         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1448 
1449         if (doInline) {
1450             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1451             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1452             m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1453             m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1454         } else {
1455             ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1456             ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1457 
1458             bool invertOrCapture = parenthesesBegin.invertOrCapture;
1459             unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1460 
1461             unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1462             ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1463 
1464             parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1465             for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1466                 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1467             parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1468 
1469             m_bodyDisjunction->terms.shrink(beginTerm);
1470 
1471             m_allParenthesesInfo.append(parenthesesDisjunction);
1472             m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
1473 
1474             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1475             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1476             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1477         }
1478     }
1479 
regexBegin(unsigned numSubpatterns,unsigned callFrameSize)1480     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize)
1481     {
1482         m_bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1483         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
1484         m_bodyDisjunction->terms[0].frameLocation = 0;
1485         m_currentAlternativeIndex = 0;
1486     }
1487 
regexEnd()1488     void regexEnd()
1489     {
1490         closeBodyAlternative();
1491     }
1492 
alternativeBodyDisjunction()1493     void alternativeBodyDisjunction()
1494     {
1495         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1496         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1497         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
1498 
1499         m_currentAlternativeIndex = newAlternativeIndex;
1500     }
1501 
alternativeDisjunction()1502     void alternativeDisjunction()
1503     {
1504         int newAlternativeIndex = m_bodyDisjunction->terms.size();
1505         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1506         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1507 
1508         m_currentAlternativeIndex = newAlternativeIndex;
1509     }
1510 
emitDisjunction(PatternDisjunction * disjunction,unsigned inputCountAlreadyChecked=0,unsigned parenthesesInputCountAlreadyChecked=0)1511     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1512     {
1513         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1514             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1515 
1516             if (alt) {
1517                 if (disjunction == m_pattern.m_body)
1518                     alternativeBodyDisjunction();
1519                 else
1520                     alternativeDisjunction();
1521             }
1522 
1523             PatternAlternative* alternative = disjunction->m_alternatives[alt];
1524             unsigned minimumSize = alternative->m_minimumSize;
1525 
1526             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1527             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1528             if (countToCheck)
1529                 checkInput(countToCheck);
1530             currentCountAlreadyChecked += countToCheck;
1531 
1532             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1533                 PatternTerm& term = alternative->m_terms[i];
1534 
1535                 switch (term.type) {
1536                 case PatternTerm::TypeAssertionBOL:
1537                     assertionBOL(term.inputPosition - currentCountAlreadyChecked);
1538                     break;
1539 
1540                 case PatternTerm::TypeAssertionEOL:
1541                     assertionEOL(term.inputPosition - currentCountAlreadyChecked);
1542                     break;
1543 
1544                 case PatternTerm::TypeAssertionWordBoundary:
1545                     assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked);
1546                     break;
1547 
1548                 case PatternTerm::TypePatternCharacter:
1549                     atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1550                     break;
1551 
1552                 case PatternTerm::TypeCharacterClass:
1553                     atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1554                     break;
1555 
1556                 case PatternTerm::TypeBackReference:
1557                     atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1558                         break;
1559 
1560                 case PatternTerm::TypeForwardReference:
1561                     break;
1562 
1563                 case PatternTerm::TypeParenthesesSubpattern: {
1564                     unsigned disjunctionAlreadyCheckedCount = 0;
1565                     if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
1566                         if (term.quantityType == QuantifierFixedCount) {
1567                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1568                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1569                             atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
1570                             emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
1571                             atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1572                         } else {
1573                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1574                             atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
1575                             emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1576                             atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1577                         }
1578                     } else {
1579                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1580                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
1581                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1582                         atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1583                     }
1584                     break;
1585                 }
1586 
1587                 case PatternTerm::TypeParentheticalAssertion: {
1588                     unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
1589 
1590                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
1591                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1592                     atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
1593                     break;
1594                 }
1595                 }
1596             }
1597         }
1598     }
1599 
1600 private:
1601     RegexPattern& m_pattern;
1602     ByteDisjunction* m_bodyDisjunction;
1603     unsigned m_currentAlternativeIndex;
1604     Vector<ParenthesesStackEntry> m_parenthesesStack;
1605     Vector<ByteDisjunction*> m_allParenthesesInfo;
1606 };
1607 
1608 
byteCompileRegex(const UString & patternString,unsigned & numSubpatterns,const char * & error,bool ignoreCase,bool multiline)1609 BytecodePattern* byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1610 {
1611     RegexPattern pattern(ignoreCase, multiline);
1612 
1613     if ((error = compileRegex(patternString, pattern)))
1614         return 0;
1615 
1616     numSubpatterns = pattern.m_numSubpatterns;
1617 
1618     return ByteCompiler(pattern).compile();
1619 }
1620 
interpretRegex(BytecodePattern * regex,const UChar * input,unsigned start,unsigned length,int * output)1621 int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
1622 {
1623     return Interpreter(regex, output, input, start, length).interpret();
1624 }
1625 
1626 
1627 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter);
1628 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass);
1629 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
1630 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
1631 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
1632 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
1633 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
1634 
1635 
1636 } }
1637 
1638 #endif
1639