• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef YarrInterpreter_h
27 #define YarrInterpreter_h
28 
29 #include "YarrPattern.h"
30 #include <wtf/PassOwnPtr.h>
31 #include <wtf/unicode/Unicode.h>
32 
33 namespace WTF {
34 class BumpPointerAllocator;
35 }
36 using WTF::BumpPointerAllocator;
37 
38 namespace JSC { namespace Yarr {
39 
40 class ByteDisjunction;
41 
42 struct ByteTerm {
43     enum Type {
44         TypeBodyAlternativeBegin,
45         TypeBodyAlternativeDisjunction,
46         TypeBodyAlternativeEnd,
47         TypeAlternativeBegin,
48         TypeAlternativeDisjunction,
49         TypeAlternativeEnd,
50         TypeSubpatternBegin,
51         TypeSubpatternEnd,
52         TypeAssertionBOL,
53         TypeAssertionEOL,
54         TypeAssertionWordBoundary,
55         TypePatternCharacterOnce,
56         TypePatternCharacterFixed,
57         TypePatternCharacterGreedy,
58         TypePatternCharacterNonGreedy,
59         TypePatternCasedCharacterOnce,
60         TypePatternCasedCharacterFixed,
61         TypePatternCasedCharacterGreedy,
62         TypePatternCasedCharacterNonGreedy,
63         TypeCharacterClass,
64         TypeBackReference,
65         TypeParenthesesSubpattern,
66         TypeParenthesesSubpatternOnceBegin,
67         TypeParenthesesSubpatternOnceEnd,
68         TypeParenthesesSubpatternTerminalBegin,
69         TypeParenthesesSubpatternTerminalEnd,
70         TypeParentheticalAssertionBegin,
71         TypeParentheticalAssertionEnd,
72         TypeCheckInput,
73         TypeUncheckInput,
74     } type;
75     union {
76         struct {
77             union {
78                 UChar patternCharacter;
79                 struct {
80                     UChar lo;
81                     UChar hi;
82                 } casedCharacter;
83                 CharacterClass* characterClass;
84                 unsigned subpatternId;
85             };
86             union {
87                 ByteDisjunction* parenthesesDisjunction;
88                 unsigned parenthesesWidth;
89             };
90             QuantifierType quantityType;
91             unsigned quantityCount;
92         } atom;
93         struct {
94             int next;
95             int end;
96             bool onceThrough;
97         } alternative;
98         unsigned checkInputCount;
99     };
100     unsigned frameLocation;
101     bool m_capture : 1;
102     bool m_invert : 1;
103     int inputPosition;
104 
ByteTermByteTerm105     ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
106         : frameLocation(frameLocation)
107         , m_capture(false)
108         , m_invert(false)
109     {
110         switch (quantityType) {
111         case QuantifierFixedCount:
112             type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
113             break;
114         case QuantifierGreedy:
115             type = ByteTerm::TypePatternCharacterGreedy;
116             break;
117         case QuantifierNonGreedy:
118             type = ByteTerm::TypePatternCharacterNonGreedy;
119             break;
120         }
121 
122         atom.patternCharacter = ch;
123         atom.quantityType = quantityType;
124         atom.quantityCount = quantityCount;
125         inputPosition = inputPos;
126     }
127 
ByteTermByteTerm128     ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
129         : frameLocation(frameLocation)
130         , m_capture(false)
131         , m_invert(false)
132     {
133         switch (quantityType) {
134         case QuantifierFixedCount:
135             type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
136             break;
137         case QuantifierGreedy:
138             type = ByteTerm::TypePatternCasedCharacterGreedy;
139             break;
140         case QuantifierNonGreedy:
141             type = ByteTerm::TypePatternCasedCharacterNonGreedy;
142             break;
143         }
144 
145         atom.casedCharacter.lo = lo;
146         atom.casedCharacter.hi = hi;
147         atom.quantityType = quantityType;
148         atom.quantityCount = quantityCount;
149         inputPosition = inputPos;
150     }
151 
ByteTermByteTerm152     ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
153         : type(ByteTerm::TypeCharacterClass)
154         , m_capture(false)
155         , m_invert(invert)
156     {
157         atom.characterClass = characterClass;
158         atom.quantityType = QuantifierFixedCount;
159         atom.quantityCount = 1;
160         inputPosition = inputPos;
161     }
162 
ByteTermByteTerm163     ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
164         : type(type)
165         , m_capture(capture)
166         , m_invert(false)
167     {
168         atom.subpatternId = subpatternId;
169         atom.parenthesesDisjunction = parenthesesInfo;
170         atom.quantityType = QuantifierFixedCount;
171         atom.quantityCount = 1;
172         inputPosition = inputPos;
173     }
174 
175     ByteTerm(Type type, bool invert = false)
typeByteTerm176         : type(type)
177         , m_capture(false)
178         , m_invert(invert)
179     {
180         atom.quantityType = QuantifierFixedCount;
181         atom.quantityCount = 1;
182     }
183 
ByteTermByteTerm184     ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
185         : type(type)
186         , m_capture(capture)
187         , m_invert(invert)
188     {
189         atom.subpatternId = subpatternId;
190         atom.quantityType = QuantifierFixedCount;
191         atom.quantityCount = 1;
192         inputPosition = inputPos;
193     }
194 
BOLByteTerm195     static ByteTerm BOL(int inputPos)
196     {
197         ByteTerm term(TypeAssertionBOL);
198         term.inputPosition = inputPos;
199         return term;
200     }
201 
CheckInputByteTerm202     static ByteTerm CheckInput(unsigned count)
203     {
204         ByteTerm term(TypeCheckInput);
205         term.checkInputCount = count;
206         return term;
207     }
208 
UncheckInputByteTerm209     static ByteTerm UncheckInput(unsigned count)
210     {
211         ByteTerm term(TypeUncheckInput);
212         term.checkInputCount = count;
213         return term;
214     }
215 
EOLByteTerm216     static ByteTerm EOL(int inputPos)
217     {
218         ByteTerm term(TypeAssertionEOL);
219         term.inputPosition = inputPos;
220         return term;
221     }
222 
WordBoundaryByteTerm223     static ByteTerm WordBoundary(bool invert, int inputPos)
224     {
225         ByteTerm term(TypeAssertionWordBoundary, invert);
226         term.inputPosition = inputPos;
227         return term;
228     }
229 
BackReferenceByteTerm230     static ByteTerm BackReference(unsigned subpatternId, int inputPos)
231     {
232         return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
233     }
234 
BodyAlternativeBeginByteTerm235     static ByteTerm BodyAlternativeBegin(bool onceThrough)
236     {
237         ByteTerm term(TypeBodyAlternativeBegin);
238         term.alternative.next = 0;
239         term.alternative.end = 0;
240         term.alternative.onceThrough = onceThrough;
241         return term;
242     }
243 
BodyAlternativeDisjunctionByteTerm244     static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
245     {
246         ByteTerm term(TypeBodyAlternativeDisjunction);
247         term.alternative.next = 0;
248         term.alternative.end = 0;
249         term.alternative.onceThrough = onceThrough;
250         return term;
251     }
252 
BodyAlternativeEndByteTerm253     static ByteTerm BodyAlternativeEnd()
254     {
255         ByteTerm term(TypeBodyAlternativeEnd);
256         term.alternative.next = 0;
257         term.alternative.end = 0;
258         term.alternative.onceThrough = false;
259         return term;
260     }
261 
AlternativeBeginByteTerm262     static ByteTerm AlternativeBegin()
263     {
264         ByteTerm term(TypeAlternativeBegin);
265         term.alternative.next = 0;
266         term.alternative.end = 0;
267         term.alternative.onceThrough = false;
268         return term;
269     }
270 
AlternativeDisjunctionByteTerm271     static ByteTerm AlternativeDisjunction()
272     {
273         ByteTerm term(TypeAlternativeDisjunction);
274         term.alternative.next = 0;
275         term.alternative.end = 0;
276         term.alternative.onceThrough = false;
277         return term;
278     }
279 
AlternativeEndByteTerm280     static ByteTerm AlternativeEnd()
281     {
282         ByteTerm term(TypeAlternativeEnd);
283         term.alternative.next = 0;
284         term.alternative.end = 0;
285         term.alternative.onceThrough = false;
286         return term;
287     }
288 
SubpatternBeginByteTerm289     static ByteTerm SubpatternBegin()
290     {
291         return ByteTerm(TypeSubpatternBegin);
292     }
293 
SubpatternEndByteTerm294     static ByteTerm SubpatternEnd()
295     {
296         return ByteTerm(TypeSubpatternEnd);
297     }
298 
invertByteTerm299     bool invert()
300     {
301         return m_invert;
302     }
303 
captureByteTerm304     bool capture()
305     {
306         return m_capture;
307     }
308 };
309 
310 class ByteDisjunction {
311     WTF_MAKE_FAST_ALLOCATED;
312 public:
ByteDisjunction(unsigned numSubpatterns,unsigned frameSize)313     ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
314         : m_numSubpatterns(numSubpatterns)
315         , m_frameSize(frameSize)
316     {
317     }
318 
319     Vector<ByteTerm> terms;
320     unsigned m_numSubpatterns;
321     unsigned m_frameSize;
322 };
323 
324 struct BytecodePattern {
325     WTF_MAKE_FAST_ALLOCATED;
326 public:
BytecodePatternBytecodePattern327     BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
328         : m_body(body)
329         , m_ignoreCase(pattern.m_ignoreCase)
330         , m_multiline(pattern.m_multiline)
331         , m_containsBeginChars(pattern.m_containsBeginChars)
332         , m_allocator(allocator)
333     {
334         newlineCharacterClass = pattern.newlineCharacterClass();
335         wordcharCharacterClass = pattern.wordcharCharacterClass();
336 
337         m_allParenthesesInfo.append(allParenthesesInfo);
338         m_userCharacterClasses.append(pattern.m_userCharacterClasses);
339         // 'Steal' the YarrPattern's CharacterClasses!  We clear its
340         // array, so that it won't delete them on destruction.  We'll
341         // take responsibility for that.
342         pattern.m_userCharacterClasses.clear();
343 
344         m_beginChars.append(pattern.m_beginChars);
345     }
346 
~BytecodePatternBytecodePattern347     ~BytecodePattern()
348     {
349         deleteAllValues(m_allParenthesesInfo);
350         deleteAllValues(m_userCharacterClasses);
351     }
352 
353     OwnPtr<ByteDisjunction> m_body;
354     bool m_ignoreCase;
355     bool m_multiline;
356     bool m_containsBeginChars;
357     // Each BytecodePattern is associated with a RegExp, each RegExp is associated
358     // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regExpAllocator.
359     BumpPointerAllocator* m_allocator;
360 
361     CharacterClass* newlineCharacterClass;
362     CharacterClass* wordcharCharacterClass;
363 
364     Vector<BeginChar> m_beginChars;
365 
366 private:
367     Vector<ByteDisjunction*> m_allParenthesesInfo;
368     Vector<CharacterClass*> m_userCharacterClasses;
369 };
370 
371 } } // namespace JSC::Yarr
372 
373 #endif // YarrInterpreter_h
374