• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "RegexJIT.h"
28 
29 #include "ASCIICType.h"
30 #include "JSGlobalData.h"
31 #include "LinkBuffer.h"
32 #include "MacroAssembler.h"
33 #include "RegexCompiler.h"
34 
35 #include "pcre.h" // temporary, remove when fallback is removed.
36 
37 #if ENABLE(YARR_JIT)
38 
39 using namespace WTF;
40 
41 namespace JSC { namespace Yarr {
42 
43 
44 class RegexGenerator : private MacroAssembler {
45     friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
46 
47 #if PLATFORM(ARM)
48     static const RegisterID input = ARM::r0;
49     static const RegisterID index = ARM::r1;
50     static const RegisterID length = ARM::r2;
51     static const RegisterID output = ARM::r4;
52 
53     static const RegisterID regT0 = ARM::r5;
54     static const RegisterID regT1 = ARM::r6;
55 
56     static const RegisterID returnRegister = ARM::r0;
57 #elif PLATFORM(X86)
58     static const RegisterID input = X86::eax;
59     static const RegisterID index = X86::edx;
60     static const RegisterID length = X86::ecx;
61     static const RegisterID output = X86::edi;
62 
63     static const RegisterID regT0 = X86::ebx;
64     static const RegisterID regT1 = X86::esi;
65 
66     static const RegisterID returnRegister = X86::eax;
67 #elif PLATFORM(X86_64)
68     static const RegisterID input = X86::edi;
69     static const RegisterID index = X86::esi;
70     static const RegisterID length = X86::edx;
71     static const RegisterID output = X86::ecx;
72 
73     static const RegisterID regT0 = X86::eax;
74     static const RegisterID regT1 = X86::ebx;
75 
76     static const RegisterID returnRegister = X86::eax;
77 #endif
78 
optimizeAlternative(PatternAlternative * alternative)79     void optimizeAlternative(PatternAlternative* alternative)
80     {
81         if (!alternative->m_terms.size())
82             return;
83 
84         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
85             PatternTerm& term = alternative->m_terms[i];
86             PatternTerm& nextTerm = alternative->m_terms[i + 1];
87 
88             if ((term.type == PatternTerm::TypeCharacterClass)
89                 && (term.quantityType == QuantifierFixedCount)
90                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
91                 && (nextTerm.quantityType == QuantifierFixedCount)) {
92                 PatternTerm termCopy = term;
93                 alternative->m_terms[i] = nextTerm;
94                 alternative->m_terms[i + 1] = termCopy;
95             }
96         }
97     }
98 
matchCharacterClassRange(RegisterID character,JumpList & failures,JumpList & matchDest,const CharacterRange * ranges,unsigned count,unsigned * matchIndex,const UChar * matches,unsigned matchCount)99     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
100     {
101         do {
102             // pick which range we're going to generate
103             int which = count >> 1;
104             char lo = ranges[which].begin;
105             char hi = ranges[which].end;
106 
107             // check if there are any ranges or matches below lo.  If not, just jl to failure -
108             // if there is anything else to check, check that first, if it falls through jmp to failure.
109             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
110                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
111 
112                 // generate code for all ranges before this one
113                 if (which)
114                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
115 
116                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
117                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
118                     ++*matchIndex;
119                 }
120                 failures.append(jump());
121 
122                 loOrAbove.link(this);
123             } else if (which) {
124                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
125 
126                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
127                 failures.append(jump());
128 
129                 loOrAbove.link(this);
130             } else
131                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
132 
133             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
134                 ++*matchIndex;
135 
136             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
137             // fall through to here, the value is above hi.
138 
139             // shuffle along & loop around if there are any more matches to handle.
140             unsigned next = which + 1;
141             ranges += next;
142             count -= next;
143         } while (count);
144     }
145 
matchCharacterClass(RegisterID character,JumpList & matchDest,const CharacterClass * charClass)146     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
147     {
148         Jump unicodeFail;
149         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
150             Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
151 
152             if (charClass->m_matchesUnicode.size()) {
153                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
154                     UChar ch = charClass->m_matchesUnicode[i];
155                     matchDest.append(branch32(Equal, character, Imm32(ch)));
156                 }
157             }
158 
159             if (charClass->m_rangesUnicode.size()) {
160                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
161                     UChar lo = charClass->m_rangesUnicode[i].begin;
162                     UChar hi = charClass->m_rangesUnicode[i].end;
163 
164                     Jump below = branch32(LessThan, character, Imm32(lo));
165                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
166                     below.link(this);
167                 }
168             }
169 
170             unicodeFail = jump();
171             isAscii.link(this);
172         }
173 
174         if (charClass->m_ranges.size()) {
175             unsigned matchIndex = 0;
176             JumpList failures;
177             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
178             while (matchIndex < charClass->m_matches.size())
179                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
180 
181             failures.link(this);
182         } else if (charClass->m_matches.size()) {
183             // optimization: gather 'a','A' etc back together, can mask & test once.
184             Vector<char> matchesAZaz;
185 
186             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
187                 char ch = charClass->m_matches[i];
188                 if (m_pattern.m_ignoreCase) {
189                     if (isASCIILower(ch)) {
190                         matchesAZaz.append(ch);
191                         continue;
192                     }
193                     if (isASCIIUpper(ch))
194                         continue;
195                 }
196                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
197             }
198 
199             if (unsigned countAZaz = matchesAZaz.size()) {
200                 or32(Imm32(32), character);
201                 for (unsigned i = 0; i < countAZaz; ++i)
202                     matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
203             }
204         }
205 
206         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
207             unicodeFail.link(this);
208     }
209 
210     // Jumps if input not available; will have (incorrectly) incremented already!
jumpIfNoAvailableInput(unsigned countToCheck)211     Jump jumpIfNoAvailableInput(unsigned countToCheck)
212     {
213         add32(Imm32(countToCheck), index);
214         return branch32(Above, index, length);
215     }
216 
jumpIfAvailableInput(unsigned countToCheck)217     Jump jumpIfAvailableInput(unsigned countToCheck)
218     {
219         add32(Imm32(countToCheck), index);
220         return branch32(BelowOrEqual, index, length);
221     }
222 
checkInput()223     Jump checkInput()
224     {
225         return branch32(BelowOrEqual, index, length);
226     }
227 
atEndOfInput()228     Jump atEndOfInput()
229     {
230         return branch32(Equal, index, length);
231     }
232 
notAtEndOfInput()233     Jump notAtEndOfInput()
234     {
235         return branch32(NotEqual, index, length);
236     }
237 
jumpIfCharEquals(UChar ch,int inputPosition)238     Jump jumpIfCharEquals(UChar ch, int inputPosition)
239     {
240         return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
241     }
242 
jumpIfCharNotEquals(UChar ch,int inputPosition)243     Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
244     {
245         return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
246     }
247 
readCharacter(int inputPosition,RegisterID reg)248     void readCharacter(int inputPosition, RegisterID reg)
249     {
250         load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
251     }
252 
storeToFrame(RegisterID reg,unsigned frameLocation)253     void storeToFrame(RegisterID reg, unsigned frameLocation)
254     {
255         poke(reg, frameLocation);
256     }
257 
storeToFrame(Imm32 imm,unsigned frameLocation)258     void storeToFrame(Imm32 imm, unsigned frameLocation)
259     {
260         poke(imm, frameLocation);
261     }
262 
storeToFrameWithPatch(unsigned frameLocation)263     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
264     {
265         return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
266     }
267 
loadFromFrame(unsigned frameLocation,RegisterID reg)268     void loadFromFrame(unsigned frameLocation, RegisterID reg)
269     {
270         peek(reg, frameLocation);
271     }
272 
loadFromFrameAndJump(unsigned frameLocation)273     void loadFromFrameAndJump(unsigned frameLocation)
274     {
275         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
276     }
277 
278     struct AlternativeBacktrackRecord {
279         DataLabelPtr dataLabel;
280         Label backtrackLocation;
281 
AlternativeBacktrackRecordJSC::Yarr::RegexGenerator::AlternativeBacktrackRecord282         AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
283             : dataLabel(dataLabel)
284             , backtrackLocation(backtrackLocation)
285         {
286         }
287     };
288 
289     struct TermGenerationState {
TermGenerationStateJSC::Yarr::RegexGenerator::TermGenerationState290         TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
291             : disjunction(disjunction)
292             , checkedTotal(checkedTotal)
293         {
294         }
295 
resetAlternativeJSC::Yarr::RegexGenerator::TermGenerationState296         void resetAlternative()
297         {
298             isBackTrackGenerated = false;
299             alt = 0;
300         }
alternativeValidJSC::Yarr::RegexGenerator::TermGenerationState301         bool alternativeValid()
302         {
303             return alt < disjunction->m_alternatives.size();
304         }
nextAlternativeJSC::Yarr::RegexGenerator::TermGenerationState305         void nextAlternative()
306         {
307             ++alt;
308         }
alternativeJSC::Yarr::RegexGenerator::TermGenerationState309         PatternAlternative* alternative()
310         {
311             return disjunction->m_alternatives[alt];
312         }
313 
resetTermJSC::Yarr::RegexGenerator::TermGenerationState314         void resetTerm()
315         {
316             ASSERT(alternativeValid());
317             t = 0;
318         }
termValidJSC::Yarr::RegexGenerator::TermGenerationState319         bool termValid()
320         {
321             ASSERT(alternativeValid());
322             return t < alternative()->m_terms.size();
323         }
nextTermJSC::Yarr::RegexGenerator::TermGenerationState324         void nextTerm()
325         {
326             ASSERT(alternativeValid());
327             ++t;
328         }
termJSC::Yarr::RegexGenerator::TermGenerationState329         PatternTerm& term()
330         {
331             ASSERT(alternativeValid());
332             return alternative()->m_terms[t];
333         }
334 
lookaheadTermJSC::Yarr::RegexGenerator::TermGenerationState335         PatternTerm& lookaheadTerm()
336         {
337             ASSERT(alternativeValid());
338             ASSERT((t + 1) < alternative()->m_terms.size());
339             return alternative()->m_terms[t + 1];
340         }
isSinglePatternCharacterLookaheadTermJSC::Yarr::RegexGenerator::TermGenerationState341         bool isSinglePatternCharacterLookaheadTerm()
342         {
343             ASSERT(alternativeValid());
344             return ((t + 1) < alternative()->m_terms.size())
345                 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
346                 && (lookaheadTerm().quantityType == QuantifierFixedCount)
347                 && (lookaheadTerm().quantityCount == 1);
348         }
349 
inputOffsetJSC::Yarr::RegexGenerator::TermGenerationState350         int inputOffset()
351         {
352             return term().inputPosition - checkedTotal;
353         }
354 
jumpToBacktrackJSC::Yarr::RegexGenerator::TermGenerationState355         void jumpToBacktrack(Jump jump, MacroAssembler* masm)
356         {
357             if (isBackTrackGenerated)
358                 jump.linkTo(backtrackLabel, masm);
359             else
360                 backTrackJumps.append(jump);
361         }
jumpToBacktrackJSC::Yarr::RegexGenerator::TermGenerationState362         void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
363         {
364             if (isBackTrackGenerated)
365                 jumps.linkTo(backtrackLabel, masm);
366             else
367                 backTrackJumps.append(jumps);
368         }
plantJumpToBacktrackIfExistsJSC::Yarr::RegexGenerator::TermGenerationState369         bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
370         {
371             if (isBackTrackGenerated) {
372                 masm->jump(backtrackLabel);
373                 return true;
374             }
375             return false;
376         }
addBacktrackJumpJSC::Yarr::RegexGenerator::TermGenerationState377         void addBacktrackJump(Jump jump)
378         {
379             backTrackJumps.append(jump);
380         }
setBacktrackGeneratedJSC::Yarr::RegexGenerator::TermGenerationState381         void setBacktrackGenerated(Label label)
382         {
383             isBackTrackGenerated = true;
384             backtrackLabel = label;
385         }
linkAlternativeBacktracksJSC::Yarr::RegexGenerator::TermGenerationState386         void linkAlternativeBacktracks(MacroAssembler* masm)
387         {
388             isBackTrackGenerated = false;
389             backTrackJumps.link(masm);
390         }
linkAlternativeBacktracksToJSC::Yarr::RegexGenerator::TermGenerationState391         void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
392         {
393             isBackTrackGenerated = false;
394             backTrackJumps.linkTo(label, masm);
395         }
propagateBacktrackingFromJSC::Yarr::RegexGenerator::TermGenerationState396         void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
397         {
398             jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
399             if (nestedParenthesesState.isBackTrackGenerated)
400                 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
401         }
402 
403         PatternDisjunction* disjunction;
404         int checkedTotal;
405     private:
406         unsigned alt;
407         unsigned t;
408         JumpList backTrackJumps;
409         Label backtrackLabel;
410         bool isBackTrackGenerated;
411     };
412 
generateAssertionBOL(TermGenerationState & state)413     void generateAssertionBOL(TermGenerationState& state)
414     {
415         PatternTerm& term = state.term();
416 
417         if (m_pattern.m_multiline) {
418             const RegisterID character = regT0;
419 
420             JumpList matchDest;
421             if (!term.inputPosition)
422                 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
423 
424             readCharacter(state.inputOffset() - 1, character);
425             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
426             state.jumpToBacktrack(jump(), this);
427 
428             matchDest.link(this);
429         } else {
430             // Erk, really should poison out these alternatives early. :-/
431             if (term.inputPosition)
432                 state.jumpToBacktrack(jump(), this);
433             else
434                 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
435         }
436     }
437 
generateAssertionEOL(TermGenerationState & state)438     void generateAssertionEOL(TermGenerationState& state)
439     {
440         PatternTerm& term = state.term();
441 
442         if (m_pattern.m_multiline) {
443             const RegisterID character = regT0;
444 
445             JumpList matchDest;
446             if (term.inputPosition == state.checkedTotal)
447                 matchDest.append(atEndOfInput());
448 
449             readCharacter(state.inputOffset(), character);
450             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
451             state.jumpToBacktrack(jump(), this);
452 
453             matchDest.link(this);
454         } else {
455             if (term.inputPosition == state.checkedTotal)
456                 state.jumpToBacktrack(notAtEndOfInput(), this);
457             // Erk, really should poison out these alternatives early. :-/
458             else
459                 state.jumpToBacktrack(jump(), this);
460         }
461     }
462 
463     // Also falls though on nextIsNotWordChar.
matchAssertionWordchar(TermGenerationState & state,JumpList & nextIsWordChar,JumpList & nextIsNotWordChar)464     void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
465     {
466         const RegisterID character = regT0;
467         PatternTerm& term = state.term();
468 
469         if (term.inputPosition == state.checkedTotal)
470             nextIsNotWordChar.append(atEndOfInput());
471 
472         readCharacter(state.inputOffset(), character);
473         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
474     }
475 
generateAssertionWordBoundary(TermGenerationState & state)476     void generateAssertionWordBoundary(TermGenerationState& state)
477     {
478         const RegisterID character = regT0;
479         PatternTerm& term = state.term();
480 
481         Jump atBegin;
482         JumpList matchDest;
483         if (!term.inputPosition)
484             atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
485         readCharacter(state.inputOffset() - 1, character);
486         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
487         if (!term.inputPosition)
488             atBegin.link(this);
489 
490         // We fall through to here if the last character was not a wordchar.
491         JumpList nonWordCharThenWordChar;
492         JumpList nonWordCharThenNonWordChar;
493         if (term.invertOrCapture) {
494             matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
495             nonWordCharThenWordChar.append(jump());
496         } else {
497             matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
498             nonWordCharThenNonWordChar.append(jump());
499         }
500         state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
501 
502         // We jump here if the last character was a wordchar.
503         matchDest.link(this);
504         JumpList wordCharThenWordChar;
505         JumpList wordCharThenNonWordChar;
506         if (term.invertOrCapture) {
507             matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
508             wordCharThenWordChar.append(jump());
509         } else {
510             matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
511             // This can fall-though!
512         }
513 
514         state.jumpToBacktrack(wordCharThenWordChar, this);
515 
516         nonWordCharThenWordChar.link(this);
517         wordCharThenNonWordChar.link(this);
518     }
519 
generatePatternCharacterSingle(TermGenerationState & state)520     void generatePatternCharacterSingle(TermGenerationState& state)
521     {
522         const RegisterID character = regT0;
523         UChar ch = state.term().patternCharacter;
524 
525         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
526             readCharacter(state.inputOffset(), character);
527             or32(Imm32(32), character);
528             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
529         } else {
530             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
531             state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
532         }
533     }
534 
generatePatternCharacterPair(TermGenerationState & state)535     void generatePatternCharacterPair(TermGenerationState& state)
536     {
537         const RegisterID character = regT0;
538         UChar ch1 = state.term().patternCharacter;
539         UChar ch2 = state.lookaheadTerm().patternCharacter;
540 
541         int mask = 0;
542         int chPair = ch1 | (ch2 << 16);
543 
544         if (m_pattern.m_ignoreCase) {
545             if (isASCIIAlpha(ch1))
546                 mask |= 32;
547             if (isASCIIAlpha(ch2))
548                 mask |= 32 << 16;
549         }
550 
551         if (mask) {
552             load32(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
553             or32(Imm32(mask), character);
554             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
555         } else
556             state.jumpToBacktrack(branch32(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
557     }
558 
generatePatternCharacterFixed(TermGenerationState & state)559     void generatePatternCharacterFixed(TermGenerationState& state)
560     {
561         const RegisterID character = regT0;
562         const RegisterID countRegister = regT1;
563         PatternTerm& term = state.term();
564         UChar ch = term.patternCharacter;
565 
566         move(index, countRegister);
567         sub32(Imm32(term.quantityCount), countRegister);
568 
569         Label loop(this);
570         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
571             load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
572             or32(Imm32(32), character);
573             state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
574         } else {
575             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
576             state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
577         }
578         add32(Imm32(1), countRegister);
579         branch32(NotEqual, countRegister, index).linkTo(loop, this);
580     }
581 
generatePatternCharacterGreedy(TermGenerationState & state)582     void generatePatternCharacterGreedy(TermGenerationState& state)
583     {
584         const RegisterID character = regT0;
585         const RegisterID countRegister = regT1;
586         PatternTerm& term = state.term();
587         UChar ch = term.patternCharacter;
588 
589         move(Imm32(0), countRegister);
590 
591         JumpList failures;
592         Label loop(this);
593         failures.append(atEndOfInput());
594         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
595             readCharacter(state.inputOffset(), character);
596             or32(Imm32(32), character);
597             failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
598         } else {
599             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
600             failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
601         }
602         add32(Imm32(1), countRegister);
603         add32(Imm32(1), index);
604         branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
605         failures.append(jump());
606 
607         Label backtrackBegin(this);
608         loadFromFrame(term.frameLocation, countRegister);
609         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
610         sub32(Imm32(1), countRegister);
611         sub32(Imm32(1), index);
612 
613         failures.link(this);
614 
615         storeToFrame(countRegister, term.frameLocation);
616 
617         state.setBacktrackGenerated(backtrackBegin);
618     }
619 
generatePatternCharacterNonGreedy(TermGenerationState & state)620     void generatePatternCharacterNonGreedy(TermGenerationState& state)
621     {
622         const RegisterID character = regT0;
623         const RegisterID countRegister = regT1;
624         PatternTerm& term = state.term();
625         UChar ch = term.patternCharacter;
626 
627         move(Imm32(0), countRegister);
628 
629         Jump firstTimeDoNothing = jump();
630 
631         Label hardFail(this);
632         sub32(countRegister, index);
633         state.jumpToBacktrack(jump(), this);
634 
635         Label backtrackBegin(this);
636         loadFromFrame(term.frameLocation, countRegister);
637 
638         atEndOfInput().linkTo(hardFail, this);
639         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
640         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
641             readCharacter(state.inputOffset(), character);
642             or32(Imm32(32), character);
643             branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
644         } else {
645             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
646             jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
647         }
648 
649         add32(Imm32(1), countRegister);
650         add32(Imm32(1), index);
651 
652         firstTimeDoNothing.link(this);
653         storeToFrame(countRegister, term.frameLocation);
654 
655         state.setBacktrackGenerated(backtrackBegin);
656     }
657 
generateCharacterClassSingle(TermGenerationState & state)658     void generateCharacterClassSingle(TermGenerationState& state)
659     {
660         const RegisterID character = regT0;
661         PatternTerm& term = state.term();
662 
663         JumpList matchDest;
664         readCharacter(state.inputOffset(), character);
665         matchCharacterClass(character, matchDest, term.characterClass);
666 
667         if (term.invertOrCapture)
668             state.jumpToBacktrack(matchDest, this);
669         else {
670             state.jumpToBacktrack(jump(), this);
671             matchDest.link(this);
672         }
673     }
674 
generateCharacterClassFixed(TermGenerationState & state)675     void generateCharacterClassFixed(TermGenerationState& state)
676     {
677         const RegisterID character = regT0;
678         const RegisterID countRegister = regT1;
679         PatternTerm& term = state.term();
680 
681         move(index, countRegister);
682         sub32(Imm32(term.quantityCount), countRegister);
683 
684         Label loop(this);
685         JumpList matchDest;
686         load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
687         matchCharacterClass(character, matchDest, term.characterClass);
688 
689         if (term.invertOrCapture)
690             state.jumpToBacktrack(matchDest, this);
691         else {
692             state.jumpToBacktrack(jump(), this);
693             matchDest.link(this);
694         }
695 
696         add32(Imm32(1), countRegister);
697         branch32(NotEqual, countRegister, index).linkTo(loop, this);
698     }
699 
generateCharacterClassGreedy(TermGenerationState & state)700     void generateCharacterClassGreedy(TermGenerationState& state)
701     {
702         const RegisterID character = regT0;
703         const RegisterID countRegister = regT1;
704         PatternTerm& term = state.term();
705 
706         move(Imm32(0), countRegister);
707 
708         JumpList failures;
709         Label loop(this);
710         failures.append(atEndOfInput());
711 
712         if (term.invertOrCapture) {
713             readCharacter(state.inputOffset(), character);
714             matchCharacterClass(character, failures, term.characterClass);
715         } else {
716             JumpList matchDest;
717             readCharacter(state.inputOffset(), character);
718             matchCharacterClass(character, matchDest, term.characterClass);
719             failures.append(jump());
720             matchDest.link(this);
721         }
722 
723         add32(Imm32(1), countRegister);
724         add32(Imm32(1), index);
725         branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
726         failures.append(jump());
727 
728         Label backtrackBegin(this);
729         loadFromFrame(term.frameLocation, countRegister);
730         state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
731         sub32(Imm32(1), countRegister);
732         sub32(Imm32(1), index);
733 
734         failures.link(this);
735 
736         storeToFrame(countRegister, term.frameLocation);
737 
738         state.setBacktrackGenerated(backtrackBegin);
739     }
740 
generateCharacterClassNonGreedy(TermGenerationState & state)741     void generateCharacterClassNonGreedy(TermGenerationState& state)
742     {
743         const RegisterID character = regT0;
744         const RegisterID countRegister = regT1;
745         PatternTerm& term = state.term();
746 
747         move(Imm32(0), countRegister);
748 
749         Jump firstTimeDoNothing = jump();
750 
751         Label hardFail(this);
752         sub32(countRegister, index);
753         state.jumpToBacktrack(jump(), this);
754 
755         Label backtrackBegin(this);
756         loadFromFrame(term.frameLocation, countRegister);
757 
758         atEndOfInput().linkTo(hardFail, this);
759         branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
760 
761         JumpList matchDest;
762         readCharacter(state.inputOffset(), character);
763         matchCharacterClass(character, matchDest, term.characterClass);
764 
765         if (term.invertOrCapture)
766             matchDest.linkTo(hardFail, this);
767         else {
768             jump(hardFail);
769             matchDest.link(this);
770         }
771 
772         add32(Imm32(1), countRegister);
773         add32(Imm32(1), index);
774 
775         firstTimeDoNothing.link(this);
776         storeToFrame(countRegister, term.frameLocation);
777 
778         state.setBacktrackGenerated(backtrackBegin);
779     }
780 
generateParenthesesDisjunction(PatternTerm & parenthesesTerm,TermGenerationState & state,unsigned alternativeFrameLocation)781     void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
782     {
783         ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
784         ASSERT(parenthesesTerm.quantityCount == 1);
785 
786         PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
787         unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
788 
789         if (disjunction->m_alternatives.size() == 1) {
790             state.resetAlternative();
791             ASSERT(state.alternativeValid());
792             PatternAlternative* alternative = state.alternative();
793             optimizeAlternative(alternative);
794 
795             int countToCheck = alternative->m_minimumSize - preCheckedCount;
796             if (countToCheck) {
797                 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
798 
799                 // FIXME: This is quite horrible.  The call to 'plantJumpToBacktrackIfExists'
800                 // will be forced to always trampoline into here, just to decrement the index.
801                 // Ick.
802                 Jump skip = jump();
803 
804                 Label backtrackBegin(this);
805                 sub32(Imm32(countToCheck), index);
806                 state.addBacktrackJump(jump());
807 
808                 skip.link(this);
809 
810                 state.setBacktrackGenerated(backtrackBegin);
811 
812                 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
813                 state.checkedTotal += countToCheck;
814             }
815 
816             for (state.resetTerm(); state.termValid(); state.nextTerm())
817                 generateTerm(state);
818 
819             state.checkedTotal -= countToCheck;
820         } else {
821             JumpList successes;
822 
823             for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
824 
825                 PatternAlternative* alternative = state.alternative();
826                 optimizeAlternative(alternative);
827 
828                 ASSERT(alternative->m_minimumSize >= preCheckedCount);
829                 int countToCheck = alternative->m_minimumSize - preCheckedCount;
830                 if (countToCheck) {
831                     state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
832                     state.checkedTotal += countToCheck;
833                 }
834 
835                 for (state.resetTerm(); state.termValid(); state.nextTerm())
836                     generateTerm(state);
837 
838                 // Matched an alternative.
839                 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
840                 successes.append(jump());
841 
842                 // Alternative did not match.
843                 Label backtrackLocation(this);
844 
845                 // Can we backtrack the alternative? - if so, do so.  If not, just fall through to the next one.
846                 state.plantJumpToBacktrackIfExists(this);
847 
848                 state.linkAlternativeBacktracks(this);
849 
850                 if (countToCheck) {
851                     sub32(Imm32(countToCheck), index);
852                     state.checkedTotal -= countToCheck;
853                 }
854 
855                 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
856             }
857             // We fall through to here when the last alternative fails.
858             // Add a backtrack out of here for the parenthese handling code to link up.
859             state.addBacktrackJump(jump());
860 
861             // Generate a trampoline for the parens code to backtrack to, to retry the
862             // next alternative.
863             state.setBacktrackGenerated(label());
864             loadFromFrameAndJump(alternativeFrameLocation);
865 
866             // FIXME: both of the above hooks are a little inefficient, in that you
867             // may end up trampolining here, just to trampoline back out to the
868             // parentheses code, or vice versa.  We can probably eliminate a jump
869             // by restructuring, but coding this way for now for simplicity during
870             // development.
871 
872             successes.link(this);
873         }
874     }
875 
generateParenthesesSingle(TermGenerationState & state)876     void generateParenthesesSingle(TermGenerationState& state)
877     {
878         const RegisterID indexTemporary = regT0;
879         PatternTerm& term = state.term();
880         PatternDisjunction* disjunction = term.parentheses.disjunction;
881         ASSERT(term.quantityCount == 1);
882 
883         unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
884 
885         unsigned parenthesesFrameLocation = term.frameLocation;
886         unsigned alternativeFrameLocation = parenthesesFrameLocation;
887         if (term.quantityType != QuantifierFixedCount)
888             alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
889 
890         // optimized case - no capture & no quantifier can be handled in a light-weight manner.
891         if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
892             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
893             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
894             // this expects that any backtracks back out of the parentheses will be in the
895             // parenthesesState's backTrackJumps vector, and that if they need backtracking
896             // they will have set an entry point on the parenthesesState's backtrackLabel.
897             state.propagateBacktrackingFrom(parenthesesState, this);
898         } else {
899             Jump nonGreedySkipParentheses;
900             Label nonGreedyTryParentheses;
901             if (term.quantityType == QuantifierGreedy)
902                 storeToFrame(Imm32(1), parenthesesFrameLocation);
903             else if (term.quantityType == QuantifierNonGreedy) {
904                 storeToFrame(Imm32(0), parenthesesFrameLocation);
905                 nonGreedySkipParentheses = jump();
906                 nonGreedyTryParentheses = label();
907                 storeToFrame(Imm32(1), parenthesesFrameLocation);
908             }
909 
910             // store the match start index
911             if (term.invertOrCapture) {
912                 int inputOffset = state.inputOffset() - preCheckedCount;
913                 if (inputOffset) {
914                     move(index, indexTemporary);
915                     add32(Imm32(inputOffset), indexTemporary);
916                     store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
917                 } else
918                     store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
919             }
920 
921             // generate the body of the parentheses
922             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
923             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
924 
925             // store the match end index
926             if (term.invertOrCapture) {
927                 int inputOffset = state.inputOffset();
928                 if (inputOffset) {
929                     move(index, indexTemporary);
930                     add32(Imm32(state.inputOffset()), indexTemporary);
931                     store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
932                 } else
933                     store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
934             }
935             Jump success = jump();
936 
937             // A failure AFTER the parens jumps here
938             Label backtrackFromAfterParens(this);
939 
940             if (term.quantityType == QuantifierGreedy) {
941                 // If this is zero we have now tested with both with and without the parens.
942                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
943                 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
944             } else if (term.quantityType == QuantifierNonGreedy) {
945                 // If this is zero we have now tested with both with and without the parens.
946                 loadFromFrame(parenthesesFrameLocation, indexTemporary);
947                 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
948             }
949 
950             parenthesesState.plantJumpToBacktrackIfExists(this);
951             // A failure WITHIN the parens jumps here
952             parenthesesState.linkAlternativeBacktracks(this);
953             if (term.invertOrCapture) {
954                 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
955                 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
956             }
957 
958             if (term.quantityType == QuantifierGreedy)
959                 storeToFrame(Imm32(0), parenthesesFrameLocation);
960             else
961                 state.jumpToBacktrack(jump(), this);
962 
963             state.setBacktrackGenerated(backtrackFromAfterParens);
964             if (term.quantityType == QuantifierNonGreedy)
965                 nonGreedySkipParentheses.link(this);
966             success.link(this);
967         }
968     }
969 
generateParentheticalAssertion(TermGenerationState & state)970     void generateParentheticalAssertion(TermGenerationState& state)
971     {
972         PatternTerm& term = state.term();
973         PatternDisjunction* disjunction = term.parentheses.disjunction;
974         ASSERT(term.quantityCount == 1);
975         ASSERT(term.quantityType == QuantifierFixedCount);
976 
977         unsigned parenthesesFrameLocation = term.frameLocation;
978         unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
979 
980         int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
981 
982         if (term.invertOrCapture) {
983             // Inverted case
984             storeToFrame(index, parenthesesFrameLocation);
985 
986             state.checkedTotal -= countCheckedAfterAssertion;
987             if (countCheckedAfterAssertion)
988                 sub32(Imm32(countCheckedAfterAssertion), index);
989 
990             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
991             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
992             // Success! - which means - Fail!
993             loadFromFrame(parenthesesFrameLocation, index);
994             state.jumpToBacktrack(jump(), this);
995 
996             // And fail means success.
997             parenthesesState.linkAlternativeBacktracks(this);
998             loadFromFrame(parenthesesFrameLocation, index);
999 
1000             state.checkedTotal += countCheckedAfterAssertion;
1001         } else {
1002             // Normal case
1003             storeToFrame(index, parenthesesFrameLocation);
1004 
1005             state.checkedTotal -= countCheckedAfterAssertion;
1006             if (countCheckedAfterAssertion)
1007                 sub32(Imm32(countCheckedAfterAssertion), index);
1008 
1009             TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1010             generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
1011             // Success! - which means - Success!
1012             loadFromFrame(parenthesesFrameLocation, index);
1013             Jump success = jump();
1014 
1015             parenthesesState.linkAlternativeBacktracks(this);
1016             loadFromFrame(parenthesesFrameLocation, index);
1017             state.jumpToBacktrack(jump(), this);
1018 
1019             success.link(this);
1020 
1021             state.checkedTotal += countCheckedAfterAssertion;
1022         }
1023     }
1024 
generateTerm(TermGenerationState & state)1025     void generateTerm(TermGenerationState& state)
1026     {
1027         PatternTerm& term = state.term();
1028 
1029         switch (term.type) {
1030         case PatternTerm::TypeAssertionBOL:
1031             generateAssertionBOL(state);
1032             break;
1033 
1034         case PatternTerm::TypeAssertionEOL:
1035             generateAssertionEOL(state);
1036             break;
1037 
1038         case PatternTerm::TypeAssertionWordBoundary:
1039             generateAssertionWordBoundary(state);
1040             break;
1041 
1042         case PatternTerm::TypePatternCharacter:
1043             switch (term.quantityType) {
1044             case QuantifierFixedCount:
1045                 if (term.quantityCount == 1) {
1046                     if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1047                         generatePatternCharacterPair(state);
1048                         state.nextTerm();
1049                     } else
1050                         generatePatternCharacterSingle(state);
1051                 } else
1052                     generatePatternCharacterFixed(state);
1053                 break;
1054             case QuantifierGreedy:
1055                 generatePatternCharacterGreedy(state);
1056                 break;
1057             case QuantifierNonGreedy:
1058                 generatePatternCharacterNonGreedy(state);
1059                 break;
1060             }
1061             break;
1062 
1063         case PatternTerm::TypeCharacterClass:
1064             switch (term.quantityType) {
1065             case QuantifierFixedCount:
1066                 if (term.quantityCount == 1)
1067                     generateCharacterClassSingle(state);
1068                 else
1069                     generateCharacterClassFixed(state);
1070                 break;
1071             case QuantifierGreedy:
1072                 generateCharacterClassGreedy(state);
1073                 break;
1074             case QuantifierNonGreedy:
1075                 generateCharacterClassNonGreedy(state);
1076                 break;
1077             }
1078             break;
1079 
1080         case PatternTerm::TypeBackReference:
1081             m_generationFailed = true;
1082             break;
1083 
1084         case PatternTerm::TypeForwardReference:
1085             break;
1086 
1087         case PatternTerm::TypeParenthesesSubpattern:
1088             if ((term.quantityCount == 1) && !term.parentheses.isCopy)
1089                 generateParenthesesSingle(state);
1090             else
1091                 m_generationFailed = true;
1092             break;
1093 
1094         case PatternTerm::TypeParentheticalAssertion:
1095             generateParentheticalAssertion(state);
1096             break;
1097         }
1098     }
1099 
generateDisjunction(PatternDisjunction * disjunction)1100     void generateDisjunction(PatternDisjunction* disjunction)
1101     {
1102         TermGenerationState state(disjunction, 0);
1103         state.resetAlternative();
1104 
1105         // Plant a check to see if there is sufficient input available to run the first alternative.
1106         // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1107         // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1108         // skipped this check.
1109 
1110         Label firstAlternative(this);
1111 
1112         // check availability for the next alternative
1113         int countCheckedForCurrentAlternative = 0;
1114         int countToCheckForFirstAlternative = 0;
1115         bool hasShorterAlternatives = false;
1116         JumpList notEnoughInputForPreviousAlternative;
1117 
1118         if (state.alternativeValid()) {
1119             PatternAlternative* alternative = state.alternative();
1120             countToCheckForFirstAlternative = alternative->m_minimumSize;
1121             state.checkedTotal += countToCheckForFirstAlternative;
1122             if (countToCheckForFirstAlternative)
1123                 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
1124             countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1125         }
1126 
1127         Label firstAlternativeInputChecked(this);
1128 
1129         while (state.alternativeValid()) {
1130             // Track whether any alternatives are shorter than the first one.
1131             hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1132 
1133             PatternAlternative* alternative = state.alternative();
1134             optimizeAlternative(alternative);
1135 
1136             for (state.resetTerm(); state.termValid(); state.nextTerm())
1137                 generateTerm(state);
1138 
1139             // If we get here, the alternative matched.
1140             if (m_pattern.m_body->m_callFrameSize)
1141                 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1142 
1143             ASSERT(index != returnRegister);
1144             if (m_pattern.m_body->m_hasFixedSize) {
1145                 move(index, returnRegister);
1146                 if (alternative->m_minimumSize)
1147                     sub32(Imm32(alternative->m_minimumSize), returnRegister);
1148             } else
1149                 pop(returnRegister);
1150             store32(index, Address(output, 4));
1151             store32(returnRegister, output);
1152 
1153             generateReturn();
1154 
1155             state.nextAlternative();
1156 
1157             // if there are any more alternatives, plant the check for input before looping.
1158             if (state.alternativeValid()) {
1159                 PatternAlternative* nextAlternative = state.alternative();
1160                 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1161 
1162                 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1163                     // If we get here, there the last input checked failed.
1164                     notEnoughInputForPreviousAlternative.link(this);
1165 
1166                     // Check if sufficent input available to run the next alternative
1167                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1168                     // We are now in the correct state to enter the next alternative; this add is only required
1169                     // to mirror and revert operation of the sub32, just below.
1170                     add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1171 
1172                     // If we get here, there the last input checked passed.
1173                     state.linkAlternativeBacktracks(this);
1174                     // No need to check if we can run the next alternative, since it is shorter -
1175                     // just update index.
1176                     sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
1177                 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1178                     // If we get here, there the last input checked failed.
1179                     // If there is insufficient input to run the current alternative, and the next alternative is longer,
1180                     // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1181                     // we had checked.
1182                     notEnoughInputForPreviousAlternative.link(this);
1183                     add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
1184                     notEnoughInputForPreviousAlternative.append(jump());
1185 
1186                     // The next alternative is longer than the current one; check the difference.
1187                     state.linkAlternativeBacktracks(this);
1188                     notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1189                 } else { // CASE 3: Both alternatives are the same length.
1190                     ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1191 
1192                     // If the next alterative is the same length as this one, then no need to check the input -
1193                     // if there was sufficent input to run the current alternative then there is sufficient
1194                     // input to run the next one; if not, there isn't.
1195                     state.linkAlternativeBacktracks(this);
1196                 }
1197 
1198                 state.checkedTotal -= countCheckedForCurrentAlternative;
1199                 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1200                 state.checkedTotal += countCheckedForCurrentAlternative;
1201             }
1202         }
1203 
1204         // If we get here, all Alternatives failed...
1205 
1206         state.checkedTotal -= countCheckedForCurrentAlternative;
1207 
1208         // How much more input need there be to be able to retry from the first alternative?
1209         // examples:
1210         //   /yarr_jit/ or /wrec|pcre/
1211         //     In these examples we need check for one more input before looping.
1212         //   /yarr_jit|pcre/
1213         //     In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1214         //     being four longer than the last alternative checked, and another +1 to effectively move
1215         //     the start position along by one).
1216         //   /yarr|rules/ or /wrec|notsomuch/
1217         //     In these examples, provided that there was sufficient input to have just been matching for
1218         //     the second alternative we can loop without checking for available input (since the second
1219         //     alternative is longer than the first).  In the latter example we need to decrement index
1220         //     (by 4) so the start position is only progressed by 1 from the last iteration.
1221         int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1222 
1223         // First, deal with the cases where there was sufficient input to try the last alternative.
1224         if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1225             state.linkAlternativeBacktracks(this);
1226         else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1227             state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
1228         else { // no need to check the input, but we do have some bookkeeping to do first.
1229             state.linkAlternativeBacktracks(this);
1230 
1231             // Where necessary update our preserved start position.
1232             if (!m_pattern.m_body->m_hasFixedSize) {
1233                 move(index, regT0);
1234                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1235                 poke(regT0, m_pattern.m_body->m_callFrameSize);
1236             }
1237 
1238             // Update index if necessary, and loop (without checking).
1239             if (incrementForNextIter)
1240                 add32(Imm32(incrementForNextIter), index);
1241             jump().linkTo(firstAlternativeInputChecked, this);
1242         }
1243 
1244         notEnoughInputForPreviousAlternative.link(this);
1245         // Update our idea of the start position, if we're tracking this.
1246         if (!m_pattern.m_body->m_hasFixedSize) {
1247             if (countCheckedForCurrentAlternative - 1) {
1248                 move(index, regT0);
1249                 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
1250                 poke(regT0, m_pattern.m_body->m_callFrameSize);
1251             } else
1252                 poke(index, m_pattern.m_body->m_callFrameSize);
1253         }
1254         // Check if there is sufficent input to run the first alternative again.
1255         jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
1256         // No - insufficent input to run the first alteranative, are there any other alternatives we
1257         // might need to check?  If so, the last check will have left the index incremented by
1258         // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1259         // LESS input is available, to have the effect of just progressing the start position by 1
1260         // from the last iteration.  If this check passes we can just jump up to the check associated
1261         // with the first alternative in the loop.  This is a bit sad, since we'll end up trying the
1262         // first alternative again, and this check will fail (otherwise the check planted just above
1263         // here would have passed).  This is a bit sad, however it saves trying to do something more
1264         // complex here in compilation, and in the common case we should end up coallescing the checks.
1265         //
1266         // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1267         // of the minimum-alterantive-lengths.  E.g. if I have two alternatives of length 200 and 150,
1268         // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1269         // is sufficient input to run either alternative (constantly failing).  If there had been only
1270         // one alternative, or if the shorter alternative had come first, we would have terminated
1271         // immediately. :-/
1272         if (hasShorterAlternatives)
1273             jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
1274         // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1275         // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1276         // but since we're about to return a failure this doesn't really matter!)
1277 
1278         unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1279         if (!m_pattern.m_body->m_hasFixedSize)
1280             ++frameSize;
1281         if (frameSize)
1282             addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
1283 
1284         move(Imm32(-1), returnRegister);
1285 
1286         generateReturn();
1287     }
1288 
generateEnter()1289     void generateEnter()
1290     {
1291 #if PLATFORM(X86_64)
1292         push(X86::ebp);
1293         move(stackPointerRegister, X86::ebp);
1294         push(X86::ebx);
1295 #elif PLATFORM(X86)
1296         push(X86::ebp);
1297         move(stackPointerRegister, X86::ebp);
1298         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1299         push(X86::ebx);
1300         push(X86::edi);
1301         push(X86::esi);
1302         // load output into edi (2 = saved ebp + return address).
1303     #if COMPILER(MSVC)
1304         loadPtr(Address(X86::ebp, 2 * sizeof(void*)), input);
1305         loadPtr(Address(X86::ebp, 3 * sizeof(void*)), index);
1306         loadPtr(Address(X86::ebp, 4 * sizeof(void*)), length);
1307         loadPtr(Address(X86::ebp, 5 * sizeof(void*)), output);
1308     #else
1309         loadPtr(Address(X86::ebp, 2 * sizeof(void*)), output);
1310     #endif
1311 #elif PLATFORM(ARM)
1312 #if !PLATFORM_ARM_ARCH(7)
1313         push(ARM::lr);
1314 #endif
1315         push(ARM::r4);
1316         push(ARM::r5);
1317         push(ARM::r6);
1318         move(ARM::r3, output);
1319 #endif
1320     }
1321 
generateReturn()1322     void generateReturn()
1323     {
1324 #if PLATFORM(X86_64)
1325         pop(X86::ebx);
1326         pop(X86::ebp);
1327 #elif PLATFORM(X86)
1328         pop(X86::esi);
1329         pop(X86::edi);
1330         pop(X86::ebx);
1331         pop(X86::ebp);
1332 #elif PLATFORM(ARM)
1333         pop(ARM::r6);
1334         pop(ARM::r5);
1335         pop(ARM::r4);
1336 #endif
1337         ret();
1338     }
1339 
1340 public:
RegexGenerator(RegexPattern & pattern)1341     RegexGenerator(RegexPattern& pattern)
1342         : m_pattern(pattern)
1343         , m_generationFailed(false)
1344     {
1345     }
1346 
generate()1347     void generate()
1348     {
1349         generateEnter();
1350 
1351         // TODO: do I really want this on the stack?
1352         if (!m_pattern.m_body->m_hasFixedSize)
1353             push(index);
1354 
1355         if (m_pattern.m_body->m_callFrameSize)
1356             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
1357 
1358         generateDisjunction(m_pattern.m_body);
1359     }
1360 
compile(JSGlobalData * globalData,RegexCodeBlock & jitObject)1361     void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1362     {
1363         generate();
1364 
1365         LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
1366 
1367         for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1368             patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
1369 
1370         jitObject.set(patchBuffer.finalizeCode());
1371     }
1372 
generationFailed()1373     bool generationFailed()
1374     {
1375         return m_generationFailed;
1376     }
1377 
1378 private:
1379     RegexPattern& m_pattern;
1380     Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1381     bool m_generationFailed;
1382 };
1383 
jitCompileRegex(JSGlobalData * globalData,RegexCodeBlock & jitObject,const UString & patternString,unsigned & numSubpatterns,const char * & error,bool ignoreCase,bool multiline)1384 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1385 {
1386     RegexPattern pattern(ignoreCase, multiline);
1387 
1388     if ((error = compileRegex(patternString, pattern)))
1389         return;
1390 
1391     numSubpatterns = pattern.m_numSubpatterns;
1392 
1393     RegexGenerator generator(pattern);
1394     generator.compile(globalData, jitObject);
1395 
1396     if (generator.generationFailed()) {
1397         JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1398         JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1399         jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
1400     }
1401 }
1402 
executeRegex(RegexCodeBlock & jitObject,const UChar * input,unsigned start,unsigned length,int * output,int outputArraySize)1403 int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
1404 {
1405     if (JSRegExp* fallback = jitObject.getFallback())
1406         return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
1407 
1408     return jitObject.execute(input, start, length, output);
1409 }
1410 
1411 }}
1412 
1413 #endif
1414 
1415 
1416 
1417 
1418 
1419