1 /*
2 * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3 * Copyright (C) 2006 Apple Computer, Inc.
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29 #include "XPathParser.h"
30
31 #if ENABLE(XPATH)
32
33 #include "ExceptionCode.h"
34 #include "StringHash.h"
35 #include "XPathEvaluator.h"
36 #include "XPathException.h"
37 #include "XPathNSResolver.h"
38 #include "XPathStep.h"
39 #include <wtf/StdLibExtras.h>
40
41 int xpathyyparse(void*);
42
43 using namespace WTF;
44 using namespace Unicode;
45
46 namespace WebCore {
47 namespace XPath {
48
49 class LocationPath;
50
51 #include "XPathGrammar.h"
52
53 Parser* Parser::currentParser = 0;
54
55 enum XMLCat { NameStart, NameCont, NotPartOfName };
56
57 typedef HashMap<String, Step::Axis> AxisNamesMap;
58
charCat(UChar aChar)59 static XMLCat charCat(UChar aChar)
60 {
61 //### might need to add some special cases from the XML spec.
62
63 if (aChar == '_')
64 return NameStart;
65
66 if (aChar == '.' || aChar == '-')
67 return NameCont;
68 CharCategory category = Unicode::category(aChar);
69 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
70 return NameStart;
71 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
72 return NameCont;
73 return NotPartOfName;
74 }
75
setUpAxisNamesMap(AxisNamesMap & axisNames)76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
77 {
78 struct AxisName {
79 const char* name;
80 Step::Axis axis;
81 };
82 const AxisName axisNameList[] = {
83 { "ancestor", Step::AncestorAxis },
84 { "ancestor-or-self", Step::AncestorOrSelfAxis },
85 { "attribute", Step::AttributeAxis },
86 { "child", Step::ChildAxis },
87 { "descendant", Step::DescendantAxis },
88 { "descendant-or-self", Step::DescendantOrSelfAxis },
89 { "following", Step::FollowingAxis },
90 { "following-sibling", Step::FollowingSiblingAxis },
91 { "namespace", Step::NamespaceAxis },
92 { "parent", Step::ParentAxis },
93 { "preceding", Step::PrecedingAxis },
94 { "preceding-sibling", Step::PrecedingSiblingAxis },
95 { "self", Step::SelfAxis }
96 };
97 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
98 axisNames.set(axisNameList[i].name, axisNameList[i].axis);
99 }
100
isAxisName(const String & name,Step::Axis & type)101 static bool isAxisName(const String& name, Step::Axis& type)
102 {
103 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
104
105 if (axisNames.isEmpty())
106 setUpAxisNamesMap(axisNames);
107
108 AxisNamesMap::iterator it = axisNames.find(name);
109 if (it == axisNames.end())
110 return false;
111 type = it->second;
112 return true;
113 }
114
isNodeTypeName(const String & name)115 static bool isNodeTypeName(const String& name)
116 {
117 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
118 if (nodeTypeNames.isEmpty()) {
119 nodeTypeNames.add("comment");
120 nodeTypeNames.add("text");
121 nodeTypeNames.add("processing-instruction");
122 nodeTypeNames.add("node");
123 }
124 return nodeTypeNames.contains(name);
125 }
126
127 /* Returns whether the last parsed token matches the [32] Operator rule
128 * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
129 * the tokens.
130 */
isOperatorContext() const131 bool Parser::isOperatorContext() const
132 {
133 if (m_nextPos == 0)
134 return false;
135
136 switch (m_lastTokenType) {
137 case AND: case OR: case MULOP:
138 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
139 case EQOP: case RELOP:
140 case '@': case AXISNAME: case '(': case '[':
141 return false;
142 default:
143 return true;
144 }
145 }
146
skipWS()147 void Parser::skipWS()
148 {
149 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
150 ++m_nextPos;
151 }
152
makeTokenAndAdvance(int code,int advance)153 Token Parser::makeTokenAndAdvance(int code, int advance)
154 {
155 m_nextPos += advance;
156 return Token(code);
157 }
158
makeTokenAndAdvance(int code,NumericOp::Opcode val,int advance)159 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
160 {
161 m_nextPos += advance;
162 return Token(code, val);
163 }
164
makeTokenAndAdvance(int code,EqTestOp::Opcode val,int advance)165 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
166 {
167 m_nextPos += advance;
168 return Token(code, val);
169 }
170
171 // Returns next char if it's there and interesting, 0 otherwise
peekAheadHelper()172 char Parser::peekAheadHelper()
173 {
174 if (m_nextPos + 1 >= m_data.length())
175 return 0;
176 UChar next = m_data[m_nextPos + 1];
177 if (next >= 0xff)
178 return 0;
179 return next;
180 }
181
peekCurHelper()182 char Parser::peekCurHelper()
183 {
184 if (m_nextPos >= m_data.length())
185 return 0;
186 UChar next = m_data[m_nextPos];
187 if (next >= 0xff)
188 return 0;
189 return next;
190 }
191
lexString()192 Token Parser::lexString()
193 {
194 UChar delimiter = m_data[m_nextPos];
195 int startPos = m_nextPos + 1;
196
197 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
198 if (m_data[m_nextPos] == delimiter) {
199 String value = m_data.substring(startPos, m_nextPos - startPos);
200 if (value.isNull())
201 value = "";
202 ++m_nextPos; // Consume the char.
203 return Token(LITERAL, value);
204 }
205 }
206
207 // Ouch, went off the end -- report error.
208 return Token(XPATH_ERROR);
209 }
210
lexNumber()211 Token Parser::lexNumber()
212 {
213 int startPos = m_nextPos;
214 bool seenDot = false;
215
216 // Go until end or a non-digits character.
217 for (; m_nextPos < m_data.length(); ++m_nextPos) {
218 UChar aChar = m_data[m_nextPos];
219 if (aChar >= 0xff) break;
220
221 if (aChar < '0' || aChar > '9') {
222 if (aChar == '.' && !seenDot)
223 seenDot = true;
224 else
225 break;
226 }
227 }
228
229 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
230 }
231
lexNCName(String & name)232 bool Parser::lexNCName(String& name)
233 {
234 int startPos = m_nextPos;
235 if (m_nextPos >= m_data.length())
236 return false;
237
238 if (charCat(m_data[m_nextPos]) != NameStart)
239 return false;
240
241 // Keep going until we get a character that's not good for names.
242 for (; m_nextPos < m_data.length(); ++m_nextPos)
243 if (charCat(m_data[m_nextPos]) == NotPartOfName)
244 break;
245
246 name = m_data.substring(startPos, m_nextPos - startPos);
247 return true;
248 }
249
lexQName(String & name)250 bool Parser::lexQName(String& name)
251 {
252 String n1;
253 if (!lexNCName(n1))
254 return false;
255
256 skipWS();
257
258 // If the next character is :, what we just got it the prefix, if not,
259 // it's the whole thing.
260 if (peekAheadHelper() != ':') {
261 name = n1;
262 return true;
263 }
264
265 String n2;
266 if (!lexNCName(n2))
267 return false;
268
269 name = n1 + ":" + n2;
270 return true;
271 }
272
nextTokenInternal()273 Token Parser::nextTokenInternal()
274 {
275 skipWS();
276
277 if (m_nextPos >= m_data.length())
278 return Token(0);
279
280 char code = peekCurHelper();
281 switch (code) {
282 case '(': case ')': case '[': case ']':
283 case '@': case ',': case '|':
284 return makeTokenAndAdvance(code);
285 case '\'':
286 case '\"':
287 return lexString();
288 case '0': case '1': case '2': case '3': case '4':
289 case '5': case '6': case '7': case '8': case '9':
290 return lexNumber();
291 case '.': {
292 char next = peekAheadHelper();
293 if (next == '.')
294 return makeTokenAndAdvance(DOTDOT, 2);
295 if (next >= '0' && next <= '9')
296 return lexNumber();
297 return makeTokenAndAdvance('.');
298 }
299 case '/':
300 if (peekAheadHelper() == '/')
301 return makeTokenAndAdvance(SLASHSLASH, 2);
302 return makeTokenAndAdvance('/');
303 case '+':
304 return makeTokenAndAdvance(PLUS);
305 case '-':
306 return makeTokenAndAdvance(MINUS);
307 case '=':
308 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
309 case '!':
310 if (peekAheadHelper() == '=')
311 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
312 return Token(XPATH_ERROR);
313 case '<':
314 if (peekAheadHelper() == '=')
315 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
316 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
317 case '>':
318 if (peekAheadHelper() == '=')
319 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
320 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
321 case '*':
322 if (isOperatorContext())
323 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
324 ++m_nextPos;
325 return Token(NAMETEST, "*");
326 case '$': { // $ QName
327 m_nextPos++;
328 String name;
329 if (!lexQName(name))
330 return Token(XPATH_ERROR);
331 return Token(VARIABLEREFERENCE, name);
332 }
333 }
334
335 String name;
336 if (!lexNCName(name))
337 return Token(XPATH_ERROR);
338
339 skipWS();
340 // If we're in an operator context, check for any operator names
341 if (isOperatorContext()) {
342 if (name == "and") //### hash?
343 return Token(AND);
344 if (name == "or")
345 return Token(OR);
346 if (name == "mod")
347 return Token(MULOP, NumericOp::OP_Mod);
348 if (name == "div")
349 return Token(MULOP, NumericOp::OP_Div);
350 }
351
352 // See whether we are at a :
353 if (peekCurHelper() == ':') {
354 m_nextPos++;
355 // Any chance it's an axis name?
356 if (peekCurHelper() == ':') {
357 m_nextPos++;
358
359 //It might be an axis name.
360 Step::Axis axis;
361 if (isAxisName(name, axis))
362 return Token(AXISNAME, axis);
363 // Ugh, :: is only valid in axis names -> error
364 return Token(XPATH_ERROR);
365 }
366
367 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
368 skipWS();
369 if (peekCurHelper() == '*') {
370 m_nextPos++;
371 return Token(NAMETEST, name + ":*");
372 }
373
374 // Make a full qname.
375 String n2;
376 if (!lexNCName(n2))
377 return Token(XPATH_ERROR);
378
379 name = name + ":" + n2;
380 }
381
382 skipWS();
383 if (peekCurHelper() == '(') {
384 //note: we don't swallow the (here!
385
386 //either node type of function name
387 if (isNodeTypeName(name)) {
388 if (name == "processing-instruction")
389 return Token(PI, name);
390
391 return Token(NODETYPE, name);
392 }
393 //must be a function name.
394 return Token(FUNCTIONNAME, name);
395 }
396
397 // At this point, it must be NAMETEST.
398 return Token(NAMETEST, name);
399 }
400
nextToken()401 Token Parser::nextToken()
402 {
403 Token toRet = nextTokenInternal();
404 m_lastTokenType = toRet.type;
405 return toRet;
406 }
407
Parser()408 Parser::Parser()
409 {
410 reset(String());
411 }
412
reset(const String & data)413 void Parser::reset(const String& data)
414 {
415 m_nextPos = 0;
416 m_data = data;
417 m_lastTokenType = 0;
418
419 m_topExpr = 0;
420 m_gotNamespaceError = false;
421 }
422
lex(void * data)423 int Parser::lex(void* data)
424 {
425 YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
426 Token tok = nextToken();
427
428 switch (tok.type) {
429 case AXISNAME:
430 yylval->axis = tok.axis;
431 break;
432 case MULOP:
433 yylval->numop = tok.numop;
434 break;
435 case RELOP:
436 case EQOP:
437 yylval->eqop = tok.eqop;
438 break;
439 case NODETYPE:
440 case PI:
441 case FUNCTIONNAME:
442 case LITERAL:
443 case VARIABLEREFERENCE:
444 case NUMBER:
445 case NAMETEST:
446 yylval->str = new String(tok.str);
447 registerString(yylval->str);
448 break;
449 }
450
451 return tok.type;
452 }
453
expandQName(const String & qName,String & localName,String & namespaceURI)454 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
455 {
456 int colon = qName.find(':');
457 if (colon >= 0) {
458 if (!m_resolver)
459 return false;
460 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
461 if (namespaceURI.isNull())
462 return false;
463 localName = qName.substring(colon + 1);
464 } else
465 localName = qName;
466
467 return true;
468 }
469
parseStatement(const String & statement,PassRefPtr<XPathNSResolver> resolver,ExceptionCode & ec)470 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
471 {
472 reset(statement);
473
474 m_resolver = resolver;
475
476 Parser* oldParser = currentParser;
477 currentParser = this;
478 int parseError = xpathyyparse(this);
479 currentParser = oldParser;
480
481 if (parseError) {
482 deleteAllValues(m_parseNodes);
483 m_parseNodes.clear();
484
485 HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
486 for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
487 deleteAllValues(**it);
488 delete *it;
489 }
490 m_predicateVectors.clear();
491
492 HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
493 for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
494 deleteAllValues(**it);
495 delete *it;
496 }
497 m_expressionVectors.clear();
498
499 deleteAllValues(m_strings);
500 m_strings.clear();
501
502 deleteAllValues(m_nodeTests);
503 m_nodeTests.clear();
504
505 m_topExpr = 0;
506
507 if (m_gotNamespaceError)
508 ec = NAMESPACE_ERR;
509 else
510 ec = XPathException::INVALID_EXPRESSION_ERR;
511 return 0;
512 }
513
514 ASSERT(m_parseNodes.size() == 1);
515 ASSERT(*m_parseNodes.begin() == m_topExpr);
516 ASSERT(m_expressionVectors.size() == 0);
517 ASSERT(m_predicateVectors.size() == 0);
518 ASSERT(m_strings.size() == 0);
519 ASSERT(m_nodeTests.size() == 0);
520
521 m_parseNodes.clear();
522 Expression* result = m_topExpr;
523 m_topExpr = 0;
524
525 return result;
526 }
527
registerParseNode(ParseNode * node)528 void Parser::registerParseNode(ParseNode* node)
529 {
530 if (node == 0)
531 return;
532
533 ASSERT(!m_parseNodes.contains(node));
534
535 m_parseNodes.add(node);
536 }
537
unregisterParseNode(ParseNode * node)538 void Parser::unregisterParseNode(ParseNode* node)
539 {
540 if (node == 0)
541 return;
542
543 ASSERT(m_parseNodes.contains(node));
544
545 m_parseNodes.remove(node);
546 }
547
registerPredicateVector(Vector<Predicate * > * vector)548 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
549 {
550 if (vector == 0)
551 return;
552
553 ASSERT(!m_predicateVectors.contains(vector));
554
555 m_predicateVectors.add(vector);
556 }
557
deletePredicateVector(Vector<Predicate * > * vector)558 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
559 {
560 if (vector == 0)
561 return;
562
563 ASSERT(m_predicateVectors.contains(vector));
564
565 m_predicateVectors.remove(vector);
566 delete vector;
567 }
568
569
registerExpressionVector(Vector<Expression * > * vector)570 void Parser::registerExpressionVector(Vector<Expression*>* vector)
571 {
572 if (vector == 0)
573 return;
574
575 ASSERT(!m_expressionVectors.contains(vector));
576
577 m_expressionVectors.add(vector);
578 }
579
deleteExpressionVector(Vector<Expression * > * vector)580 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
581 {
582 if (vector == 0)
583 return;
584
585 ASSERT(m_expressionVectors.contains(vector));
586
587 m_expressionVectors.remove(vector);
588 delete vector;
589 }
590
registerString(String * s)591 void Parser::registerString(String* s)
592 {
593 if (s == 0)
594 return;
595
596 ASSERT(!m_strings.contains(s));
597
598 m_strings.add(s);
599 }
600
deleteString(String * s)601 void Parser::deleteString(String* s)
602 {
603 if (s == 0)
604 return;
605
606 ASSERT(m_strings.contains(s));
607
608 m_strings.remove(s);
609 delete s;
610 }
611
registerNodeTest(Step::NodeTest * t)612 void Parser::registerNodeTest(Step::NodeTest* t)
613 {
614 if (t == 0)
615 return;
616
617 ASSERT(!m_nodeTests.contains(t));
618
619 m_nodeTests.add(t);
620 }
621
deleteNodeTest(Step::NodeTest * t)622 void Parser::deleteNodeTest(Step::NodeTest* t)
623 {
624 if (t == 0)
625 return;
626
627 ASSERT(m_nodeTests.contains(t));
628
629 m_nodeTests.remove(t);
630 delete t;
631 }
632
633 }
634 }
635
636 #endif // ENABLE(XPATH)
637