• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef Parser_h
27 #define Parser_h
28 
29 #include <wtf/Platform.h>
30 
31 #if ENABLE(WREC)
32 
33 #include "Escapes.h"
34 #include "Quantifier.h"
35 #include "UString.h"
36 #include "WRECGenerator.h"
37 #include <wtf/ASCIICType.h>
38 
39 namespace JSC { namespace WREC {
40 
41     struct CharacterClass;
42 
43     class Parser {
44     typedef Generator::JumpList JumpList;
45     typedef Generator::ParenthesesType ParenthesesType;
46 
47     friend class SavedState;
48 
49     public:
Parser(const UString & pattern,bool ignoreCase,bool multiline)50         Parser(const UString& pattern, bool ignoreCase, bool multiline)
51             : m_generator(*this)
52             , m_data(pattern.data())
53             , m_size(pattern.size())
54             , m_ignoreCase(ignoreCase)
55             , m_multiline(multiline)
56         {
57             reset();
58         }
59 
generator()60         Generator& generator() { return m_generator; }
61 
ignoreCase()62         bool ignoreCase() const { return m_ignoreCase; }
multiline()63         bool multiline() const { return m_multiline; }
64 
recordSubpattern()65         void recordSubpattern() { ++m_numSubpatterns; }
numSubpatterns()66         unsigned numSubpatterns() const { return m_numSubpatterns; }
67 
error()68         const char* error() const { return m_error; }
syntaxError()69         const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; }
70 
parsePattern(JumpList & failures)71         void parsePattern(JumpList& failures)
72         {
73             reset();
74 
75             parseDisjunction(failures);
76 
77             if (peek() != EndOfPattern)
78                 setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it.
79         }
80 
81         void parseDisjunction(JumpList& failures);
82         void parseAlternative(JumpList& failures);
83         bool parseTerm(JumpList& failures);
84         bool parseNonCharacterEscape(JumpList& failures, const Escape&);
85         bool parseParentheses(JumpList& failures);
86         bool parseCharacterClass(JumpList& failures);
87         bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert);
88         bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId);
89 
90     private:
91         class SavedState {
92         public:
SavedState(Parser & parser)93             SavedState(Parser& parser)
94                 : m_parser(parser)
95                 , m_index(parser.m_index)
96             {
97             }
98 
restore()99             void restore()
100             {
101                 m_parser.m_index = m_index;
102             }
103 
104         private:
105             Parser& m_parser;
106             unsigned m_index;
107         };
108 
reset()109         void reset()
110         {
111             m_index = 0;
112             m_numSubpatterns = 0;
113             m_error = 0;
114         }
115 
setError(const char * error)116         void setError(const char* error)
117         {
118             if (m_error)
119                 return;
120             m_error = error;
121         }
122 
peek()123         int peek()
124         {
125             if (m_index >= m_size)
126                 return EndOfPattern;
127             return m_data[m_index];
128         }
129 
consume()130         int consume()
131         {
132             if (m_index >= m_size)
133                 return EndOfPattern;
134             return m_data[m_index++];
135         }
136 
peekIsDigit()137         bool peekIsDigit()
138         {
139             return WTF::isASCIIDigit(peek());
140         }
141 
peekDigit()142         unsigned peekDigit()
143         {
144             ASSERT(peekIsDigit());
145             return peek() - '0';
146         }
147 
consumeDigit()148         unsigned consumeDigit()
149         {
150             ASSERT(peekIsDigit());
151             return consume() - '0';
152         }
153 
consumeNumber()154         unsigned consumeNumber()
155         {
156             int n = consumeDigit();
157             while (peekIsDigit()) {
158                 n *= 10;
159                 n += consumeDigit();
160             }
161             return n;
162         }
163 
consumeHex(int count)164         int consumeHex(int count)
165         {
166             int n = 0;
167             while (count--) {
168                 if (!WTF::isASCIIHexDigit(peek()))
169                     return -1;
170                 n = (n << 4) | WTF::toASCIIHexValue(consume());
171             }
172             return n;
173         }
174 
consumeOctal()175         unsigned consumeOctal()
176         {
177             unsigned n = 0;
178             while (n < 32 && WTF::isASCIIOctalDigit(peek()))
179                 n = n * 8 + consumeDigit();
180             return n;
181         }
182 
183         ALWAYS_INLINE Quantifier consumeGreedyQuantifier();
184         Quantifier consumeQuantifier();
185         Escape consumeEscape(bool inCharacterClass);
186         ParenthesesType consumeParenthesesType();
187 
188         static const int EndOfPattern = -1;
189 
190         // Error messages.
191         static const char* QuantifierOutOfOrder;
192         static const char* QuantifierWithoutAtom;
193         static const char* ParenthesesUnmatched;
194         static const char* ParenthesesTypeInvalid;
195         static const char* ParenthesesNotSupported;
196         static const char* CharacterClassUnmatched;
197         static const char* CharacterClassOutOfOrder;
198         static const char* EscapeUnterminated;
199 
200         Generator m_generator;
201         const UChar* m_data;
202         unsigned m_size;
203         unsigned m_index;
204         bool m_ignoreCase;
205         bool m_multiline;
206         unsigned m_numSubpatterns;
207         const char* m_error;
208     };
209 
210 } } // namespace JSC::WREC
211 
212 #endif // ENABLE(WREC)
213 
214 #endif // Parser_h
215