1 /*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "config.h"
27 #include "IDBKeyPath.h"
28
29 #if ENABLE(INDEXED_DATABASE)
30
31 #include <wtf/ASCIICType.h>
32 #include <wtf/dtoa.h>
33
34 namespace WebCore {
35
36 class IDBKeyPathLexer {
37 public:
38 enum TokenType {
39 TokenLeftBracket,
40 TokenRightBracket,
41 TokenIdentifier,
42 TokenNumber,
43 TokenDot,
44 TokenEnd,
45 TokenError
46 };
47
IDBKeyPathLexer(const String & s)48 explicit IDBKeyPathLexer(const String& s)
49 : m_string(s)
50 , m_ptr(s.characters())
51 , m_end(s.characters() + s.length())
52 , m_currentTokenType(TokenError)
53 {
54 }
55
currentTokenType() const56 TokenType currentTokenType() const { return m_currentTokenType; }
57
nextTokenType()58 TokenType nextTokenType()
59 {
60 m_currentTokenType = lex(m_currentElement);
61 return m_currentTokenType;
62 }
63
currentElement()64 const IDBKeyPathElement& currentElement() { return m_currentElement; }
65
66 private:
67 TokenType lex(IDBKeyPathElement&);
68 TokenType lexIdentifier(IDBKeyPathElement&);
69 TokenType lexNumber(IDBKeyPathElement&);
70 IDBKeyPathElement m_currentElement;
71 String m_string;
72 const UChar* m_ptr;
73 const UChar* m_end;
74 TokenType m_currentTokenType;
75 };
76
lex(IDBKeyPathElement & element)77 IDBKeyPathLexer::TokenType IDBKeyPathLexer::lex(IDBKeyPathElement& element)
78 {
79 while (m_ptr < m_end && isASCIISpace(*m_ptr))
80 ++m_ptr;
81
82 if (m_ptr >= m_end)
83 return TokenEnd;
84
85 ASSERT(m_ptr < m_end);
86 switch (*m_ptr) {
87 case '[':
88 ++m_ptr;
89 return TokenLeftBracket;
90 case ']':
91 ++m_ptr;
92 return TokenRightBracket;
93 case '.':
94 ++m_ptr;
95 return TokenDot;
96 case '0':
97 case '1':
98 case '2':
99 case '3':
100 case '4':
101 case '5':
102 case '6':
103 case '7':
104 case '8':
105 case '9':
106 return lexNumber(element);
107 default:
108 return lexIdentifier(element);
109 }
110 return TokenError;
111 }
112
isSafeIdentifierStartCharacter(UChar c)113 static inline bool isSafeIdentifierStartCharacter(UChar c)
114 {
115 return isASCIIAlpha(c) || (c == '_') || (c == '$');
116 }
117
isSafeIdentifierCharacter(UChar c)118 static inline bool isSafeIdentifierCharacter(UChar c)
119 {
120 return isASCIIAlphanumeric(c) || (c == '_') || (c == '$');
121 }
122
lexIdentifier(IDBKeyPathElement & element)123 IDBKeyPathLexer::TokenType IDBKeyPathLexer::lexIdentifier(IDBKeyPathElement& element)
124 {
125 const UChar* start = m_ptr;
126 if (m_ptr < m_end && isSafeIdentifierStartCharacter(*m_ptr))
127 ++m_ptr;
128 else
129 return TokenError;
130
131 while (m_ptr < m_end && isSafeIdentifierCharacter(*m_ptr))
132 ++m_ptr;
133
134 element.type = IDBKeyPathElement::IsNamed;
135 element.identifier = String(start, m_ptr - start);
136 return TokenIdentifier;
137 }
138
lexNumber(IDBKeyPathElement & element)139 IDBKeyPathLexer::TokenType IDBKeyPathLexer::lexNumber(IDBKeyPathElement& element)
140 {
141 if (m_ptr >= m_end)
142 return TokenError;
143
144 const UChar* start = m_ptr;
145 // [0-9]*
146 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
147 ++m_ptr;
148
149 String numberAsString;
150 numberAsString = String(start, m_ptr - start);
151 bool ok = false;
152 unsigned number = numberAsString.toUIntStrict(&ok);
153 if (!ok)
154 return TokenError;
155
156 element.type = IDBKeyPathElement::IsIndexed;
157 element.index = number;
158 return TokenNumber;
159 }
160
IDBParseKeyPath(const String & keyPath,Vector<IDBKeyPathElement> & elements,IDBKeyPathParseError & error)161 void IDBParseKeyPath(const String& keyPath, Vector<IDBKeyPathElement>& elements, IDBKeyPathParseError& error)
162 {
163 // This is a simplified parser loosely based on LiteralParser.
164 // An IDBKeyPath is defined as a sequence of:
165 // identifierA{.identifierB{[numeric_value]}
166 // where "{}" represents an optional part
167 // The basic state machine is:
168 // Start => {Identifier, Array}
169 // Identifier => {Dot, Array, End}
170 // Array => {Start, Dot, End}
171 // Dot => {Identifier}
172 // It bails out as soon as it finds an error, but doesn't discard the bits it managed to parse.
173 enum ParserState { Identifier, Array, Dot, End };
174
175 IDBKeyPathLexer lexer(keyPath);
176 IDBKeyPathLexer::TokenType tokenType = lexer.nextTokenType();
177 ParserState state;
178 if (tokenType == IDBKeyPathLexer::TokenIdentifier)
179 state = Identifier;
180 else if (tokenType == IDBKeyPathLexer::TokenLeftBracket)
181 state = Array;
182 else if (tokenType == IDBKeyPathLexer::TokenEnd)
183 state = End;
184 else {
185 error = IDBKeyPathParseErrorStart;
186 return;
187 }
188
189 while (1) {
190 switch (state) {
191 case Identifier : {
192 IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
193 ASSERT(tokenType == IDBKeyPathLexer::TokenIdentifier);
194
195 IDBKeyPathElement element = lexer.currentElement();
196 ASSERT(element.type == IDBKeyPathElement::IsNamed);
197 elements.append(element);
198
199 tokenType = lexer.nextTokenType();
200 if (tokenType == IDBKeyPathLexer::TokenDot)
201 state = Dot;
202 else if (tokenType == IDBKeyPathLexer::TokenLeftBracket)
203 state = Array;
204 else if (tokenType == IDBKeyPathLexer::TokenEnd)
205 state = End;
206 else {
207 error = IDBKeyPathParseErrorIdentifier;
208 return;
209 }
210 break;
211 }
212 case Array : {
213 IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
214 ASSERT(tokenType == IDBKeyPathLexer::TokenLeftBracket);
215
216 tokenType = lexer.nextTokenType();
217 if (tokenType != IDBKeyPathLexer::TokenNumber) {
218 error = IDBKeyPathParseErrorArrayIndex;
219 return;
220 }
221
222 ASSERT(tokenType == IDBKeyPathLexer::TokenNumber);
223 IDBKeyPathElement element = lexer.currentElement();
224 ASSERT(element.type == IDBKeyPathElement::IsIndexed);
225 elements.append(element);
226
227 tokenType = lexer.nextTokenType();
228 if (tokenType != IDBKeyPathLexer::TokenRightBracket) {
229 error = IDBKeyPathParseErrorArrayIndex;
230 return;
231 }
232
233 tokenType = lexer.nextTokenType();
234 if (tokenType == IDBKeyPathLexer::TokenDot)
235 state = Dot;
236 else if (tokenType == IDBKeyPathLexer::TokenLeftBracket)
237 state = Array;
238 else if (tokenType == IDBKeyPathLexer::TokenEnd)
239 state = End;
240 else {
241 error = IDBKeyPathParseErrorAfterArray;
242 return;
243 }
244 break;
245 }
246 case Dot: {
247 IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
248 ASSERT(tokenType == IDBKeyPathLexer::TokenDot);
249
250 tokenType = lexer.nextTokenType();
251 if (tokenType != IDBKeyPathLexer::TokenIdentifier) {
252 error = IDBKeyPathParseErrorDot;
253 return;
254 }
255
256 state = Identifier;
257 break;
258 }
259 case End: {
260 error = IDBKeyPathParseErrorNone;
261 return;
262 }
263 }
264 }
265 }
266
267 } // namespace WebCore
268
269 #endif // ENABLE(INDEXED_DATABASE)
270