• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2005 Maksim Orlovich <maksim@kde.org>
3  * Copyright (C) 2006 Apple Computer, Inc.
4  * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 #include "XPathParser.h"
30 
31 #if ENABLE(XPATH)
32 
33 #include "ExceptionCode.h"
34 #include "StringHash.h"
35 #include "XPathEvaluator.h"
36 #include "XPathException.h"
37 #include "XPathNSResolver.h"
38 #include "XPathStep.h"
39 #include <wtf/StdLibExtras.h>
40 
41 int xpathyyparse(void*);
42 
43 using namespace WTF;
44 using namespace Unicode;
45 
46 namespace WebCore {
47 namespace XPath {
48 
49 class LocationPath;
50 
51 #include "XPathGrammar.h"
52 
53 Parser* Parser::currentParser = 0;
54 
55 enum XMLCat { NameStart, NameCont, NotPartOfName };
56 
57 typedef HashMap<String, Step::Axis> AxisNamesMap;
58 
charCat(UChar aChar)59 static XMLCat charCat(UChar aChar)
60 {
61     //### might need to add some special cases from the XML spec.
62 
63     if (aChar == '_')
64         return NameStart;
65 
66     if (aChar == '.' || aChar == '-')
67         return NameCont;
68     CharCategory category = Unicode::category(aChar);
69     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
70         return NameStart;
71     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
72         return NameCont;
73     return NotPartOfName;
74 }
75 
setUpAxisNamesMap(AxisNamesMap & axisNames)76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
77 {
78     struct AxisName {
79         const char* name;
80         Step::Axis axis;
81     };
82     const AxisName axisNameList[] = {
83         { "ancestor", Step::AncestorAxis },
84         { "ancestor-or-self", Step::AncestorOrSelfAxis },
85         { "attribute", Step::AttributeAxis },
86         { "child", Step::ChildAxis },
87         { "descendant", Step::DescendantAxis },
88         { "descendant-or-self", Step::DescendantOrSelfAxis },
89         { "following", Step::FollowingAxis },
90         { "following-sibling", Step::FollowingSiblingAxis },
91         { "namespace", Step::NamespaceAxis },
92         { "parent", Step::ParentAxis },
93         { "preceding", Step::PrecedingAxis },
94         { "preceding-sibling", Step::PrecedingSiblingAxis },
95         { "self", Step::SelfAxis }
96     };
97     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
98         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
99 }
100 
isAxisName(const String & name,Step::Axis & type)101 static bool isAxisName(const String& name, Step::Axis& type)
102 {
103     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
104 
105     if (axisNames.isEmpty())
106         setUpAxisNamesMap(axisNames);
107 
108     AxisNamesMap::iterator it = axisNames.find(name);
109     if (it == axisNames.end())
110         return false;
111     type = it->second;
112     return true;
113 }
114 
isNodeTypeName(const String & name)115 static bool isNodeTypeName(const String& name)
116 {
117     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
118     if (nodeTypeNames.isEmpty()) {
119         nodeTypeNames.add("comment");
120         nodeTypeNames.add("text");
121         nodeTypeNames.add("processing-instruction");
122         nodeTypeNames.add("node");
123     }
124     return nodeTypeNames.contains(name);
125 }
126 
127 /* Returns whether the last parsed token matches the [32] Operator rule
128  * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
129  * the tokens.
130  */
isOperatorContext() const131 bool Parser::isOperatorContext() const
132 {
133     if (m_nextPos == 0)
134         return false;
135 
136     switch (m_lastTokenType) {
137         case AND: case OR: case MULOP:
138         case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
139         case EQOP: case RELOP:
140         case '@': case AXISNAME:   case '(': case '[':
141             return false;
142         default:
143             return true;
144     }
145 }
146 
skipWS()147 void Parser::skipWS()
148 {
149     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
150         ++m_nextPos;
151 }
152 
makeTokenAndAdvance(int code,int advance)153 Token Parser::makeTokenAndAdvance(int code, int advance)
154 {
155     m_nextPos += advance;
156     return Token(code);
157 }
158 
makeTokenAndAdvance(int code,NumericOp::Opcode val,int advance)159 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
160 {
161     m_nextPos += advance;
162     return Token(code, val);
163 }
164 
makeTokenAndAdvance(int code,EqTestOp::Opcode val,int advance)165 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
166 {
167     m_nextPos += advance;
168     return Token(code, val);
169 }
170 
171 // Returns next char if it's there and interesting, 0 otherwise
peekAheadHelper()172 char Parser::peekAheadHelper()
173 {
174     if (m_nextPos + 1 >= m_data.length())
175         return 0;
176     UChar next = m_data[m_nextPos + 1];
177     if (next >= 0xff)
178         return 0;
179     return next;
180 }
181 
peekCurHelper()182 char Parser::peekCurHelper()
183 {
184     if (m_nextPos >= m_data.length())
185         return 0;
186     UChar next = m_data[m_nextPos];
187     if (next >= 0xff)
188         return 0;
189     return next;
190 }
191 
lexString()192 Token Parser::lexString()
193 {
194     UChar delimiter = m_data[m_nextPos];
195     int startPos = m_nextPos + 1;
196 
197     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
198         if (m_data[m_nextPos] == delimiter) {
199             String value = m_data.substring(startPos, m_nextPos - startPos);
200             if (value.isNull())
201                 value = "";
202             ++m_nextPos; // Consume the char.
203             return Token(LITERAL, value);
204         }
205     }
206 
207     // Ouch, went off the end -- report error.
208     return Token(XPATH_ERROR);
209 }
210 
lexNumber()211 Token Parser::lexNumber()
212 {
213     int startPos = m_nextPos;
214     bool seenDot = false;
215 
216     // Go until end or a non-digits character.
217     for (; m_nextPos < m_data.length(); ++m_nextPos) {
218         UChar aChar = m_data[m_nextPos];
219         if (aChar >= 0xff) break;
220 
221         if (aChar < '0' || aChar > '9') {
222             if (aChar == '.' && !seenDot)
223                 seenDot = true;
224             else
225                 break;
226         }
227     }
228 
229     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
230 }
231 
lexNCName(String & name)232 bool Parser::lexNCName(String& name)
233 {
234     int startPos = m_nextPos;
235     if (m_nextPos >= m_data.length())
236         return false;
237 
238     if (charCat(m_data[m_nextPos]) != NameStart)
239         return false;
240 
241     // Keep going until we get a character that's not good for names.
242     for (; m_nextPos < m_data.length(); ++m_nextPos)
243         if (charCat(m_data[m_nextPos]) == NotPartOfName)
244             break;
245 
246     name = m_data.substring(startPos, m_nextPos - startPos);
247     return true;
248 }
249 
lexQName(String & name)250 bool Parser::lexQName(String& name)
251 {
252     String n1;
253     if (!lexNCName(n1))
254         return false;
255 
256     skipWS();
257 
258     // If the next character is :, what we just got it the prefix, if not,
259     // it's the whole thing.
260     if (peekAheadHelper() != ':') {
261         name = n1;
262         return true;
263     }
264 
265     String n2;
266     if (!lexNCName(n2))
267         return false;
268 
269     name = n1 + ":" + n2;
270     return true;
271 }
272 
nextTokenInternal()273 Token Parser::nextTokenInternal()
274 {
275     skipWS();
276 
277     if (m_nextPos >= m_data.length())
278         return Token(0);
279 
280     char code = peekCurHelper();
281     switch (code) {
282         case '(': case ')': case '[': case ']':
283         case '@': case ',': case '|':
284             return makeTokenAndAdvance(code);
285         case '\'':
286         case '\"':
287             return lexString();
288         case '0': case '1': case '2': case '3': case '4':
289         case '5': case '6': case '7': case '8': case '9':
290             return lexNumber();
291         case '.': {
292             char next = peekAheadHelper();
293             if (next == '.')
294                 return makeTokenAndAdvance(DOTDOT, 2);
295             if (next >= '0' && next <= '9')
296                 return lexNumber();
297             return makeTokenAndAdvance('.');
298         }
299         case '/':
300             if (peekAheadHelper() == '/')
301                 return makeTokenAndAdvance(SLASHSLASH, 2);
302             return makeTokenAndAdvance('/');
303         case '+':
304             return makeTokenAndAdvance(PLUS);
305         case '-':
306             return makeTokenAndAdvance(MINUS);
307         case '=':
308             return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
309         case '!':
310             if (peekAheadHelper() == '=')
311                 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
312             return Token(XPATH_ERROR);
313         case '<':
314             if (peekAheadHelper() == '=')
315                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
316             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
317         case '>':
318             if (peekAheadHelper() == '=')
319                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
320             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
321         case '*':
322             if (isOperatorContext())
323                 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
324             ++m_nextPos;
325             return Token(NAMETEST, "*");
326         case '$': { // $ QName
327             m_nextPos++;
328             String name;
329             if (!lexQName(name))
330                 return Token(XPATH_ERROR);
331             return Token(VARIABLEREFERENCE, name);
332         }
333     }
334 
335     String name;
336     if (!lexNCName(name))
337         return Token(XPATH_ERROR);
338 
339     skipWS();
340     // If we're in an operator context, check for any operator names
341     if (isOperatorContext()) {
342         if (name == "and") //### hash?
343             return Token(AND);
344         if (name == "or")
345             return Token(OR);
346         if (name == "mod")
347             return Token(MULOP, NumericOp::OP_Mod);
348         if (name == "div")
349             return Token(MULOP, NumericOp::OP_Div);
350     }
351 
352     // See whether we are at a :
353     if (peekCurHelper() == ':') {
354         m_nextPos++;
355         // Any chance it's an axis name?
356         if (peekCurHelper() == ':') {
357             m_nextPos++;
358 
359             //It might be an axis name.
360             Step::Axis axis;
361             if (isAxisName(name, axis))
362                 return Token(AXISNAME, axis);
363             // Ugh, :: is only valid in axis names -> error
364             return Token(XPATH_ERROR);
365         }
366 
367         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
368         skipWS();
369         if (peekCurHelper() == '*') {
370             m_nextPos++;
371             return Token(NAMETEST, name + ":*");
372         }
373 
374         // Make a full qname.
375         String n2;
376         if (!lexNCName(n2))
377             return Token(XPATH_ERROR);
378 
379         name = name + ":" + n2;
380     }
381 
382     skipWS();
383     if (peekCurHelper() == '(') {
384         //note: we don't swallow the (here!
385 
386         //either node type of function name
387         if (isNodeTypeName(name)) {
388             if (name == "processing-instruction")
389                 return Token(PI, name);
390 
391             return Token(NODETYPE, name);
392         }
393         //must be a function name.
394         return Token(FUNCTIONNAME, name);
395     }
396 
397     // At this point, it must be NAMETEST.
398     return Token(NAMETEST, name);
399 }
400 
nextToken()401 Token Parser::nextToken()
402 {
403     Token toRet = nextTokenInternal();
404     m_lastTokenType = toRet.type;
405     return toRet;
406 }
407 
Parser()408 Parser::Parser()
409 {
410     reset(String());
411 }
412 
reset(const String & data)413 void Parser::reset(const String& data)
414 {
415     m_nextPos = 0;
416     m_data = data;
417     m_lastTokenType = 0;
418 
419     m_topExpr = 0;
420     m_gotNamespaceError = false;
421 }
422 
lex(void * data)423 int Parser::lex(void* data)
424 {
425     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
426     Token tok = nextToken();
427 
428     switch (tok.type) {
429         case AXISNAME:
430             yylval->axis = tok.axis;
431             break;
432         case MULOP:
433             yylval->numop = tok.numop;
434             break;
435         case RELOP:
436         case EQOP:
437             yylval->eqop = tok.eqop;
438             break;
439         case NODETYPE:
440         case PI:
441         case FUNCTIONNAME:
442         case LITERAL:
443         case VARIABLEREFERENCE:
444         case NUMBER:
445         case NAMETEST:
446             yylval->str = new String(tok.str);
447             registerString(yylval->str);
448             break;
449     }
450 
451     return tok.type;
452 }
453 
expandQName(const String & qName,String & localName,String & namespaceURI)454 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
455 {
456     int colon = qName.find(':');
457     if (colon >= 0) {
458         if (!m_resolver)
459             return false;
460         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
461         if (namespaceURI.isNull())
462             return false;
463         localName = qName.substring(colon + 1);
464     } else
465         localName = qName;
466 
467     return true;
468 }
469 
parseStatement(const String & statement,PassRefPtr<XPathNSResolver> resolver,ExceptionCode & ec)470 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
471 {
472     reset(statement);
473 
474     m_resolver = resolver;
475 
476     Parser* oldParser = currentParser;
477     currentParser = this;
478     int parseError = xpathyyparse(this);
479     currentParser = oldParser;
480 
481     if (parseError) {
482         deleteAllValues(m_parseNodes);
483         m_parseNodes.clear();
484 
485         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
486         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
487             deleteAllValues(**it);
488             delete *it;
489         }
490         m_predicateVectors.clear();
491 
492         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
493         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
494             deleteAllValues(**it);
495             delete *it;
496         }
497         m_expressionVectors.clear();
498 
499         deleteAllValues(m_strings);
500         m_strings.clear();
501 
502         deleteAllValues(m_nodeTests);
503         m_nodeTests.clear();
504 
505         m_topExpr = 0;
506 
507         if (m_gotNamespaceError)
508             ec = NAMESPACE_ERR;
509         else
510             ec = XPathException::INVALID_EXPRESSION_ERR;
511         return 0;
512     }
513 
514     ASSERT(m_parseNodes.size() == 1);
515     ASSERT(*m_parseNodes.begin() == m_topExpr);
516     ASSERT(m_expressionVectors.size() == 0);
517     ASSERT(m_predicateVectors.size() == 0);
518     ASSERT(m_strings.size() == 0);
519     ASSERT(m_nodeTests.size() == 0);
520 
521     m_parseNodes.clear();
522     Expression* result = m_topExpr;
523     m_topExpr = 0;
524 
525     return result;
526 }
527 
registerParseNode(ParseNode * node)528 void Parser::registerParseNode(ParseNode* node)
529 {
530     if (node == 0)
531         return;
532 
533     ASSERT(!m_parseNodes.contains(node));
534 
535     m_parseNodes.add(node);
536 }
537 
unregisterParseNode(ParseNode * node)538 void Parser::unregisterParseNode(ParseNode* node)
539 {
540     if (node == 0)
541         return;
542 
543     ASSERT(m_parseNodes.contains(node));
544 
545     m_parseNodes.remove(node);
546 }
547 
registerPredicateVector(Vector<Predicate * > * vector)548 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
549 {
550     if (vector == 0)
551         return;
552 
553     ASSERT(!m_predicateVectors.contains(vector));
554 
555     m_predicateVectors.add(vector);
556 }
557 
deletePredicateVector(Vector<Predicate * > * vector)558 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
559 {
560     if (vector == 0)
561         return;
562 
563     ASSERT(m_predicateVectors.contains(vector));
564 
565     m_predicateVectors.remove(vector);
566     delete vector;
567 }
568 
569 
registerExpressionVector(Vector<Expression * > * vector)570 void Parser::registerExpressionVector(Vector<Expression*>* vector)
571 {
572     if (vector == 0)
573         return;
574 
575     ASSERT(!m_expressionVectors.contains(vector));
576 
577     m_expressionVectors.add(vector);
578 }
579 
deleteExpressionVector(Vector<Expression * > * vector)580 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
581 {
582     if (vector == 0)
583         return;
584 
585     ASSERT(m_expressionVectors.contains(vector));
586 
587     m_expressionVectors.remove(vector);
588     delete vector;
589 }
590 
registerString(String * s)591 void Parser::registerString(String* s)
592 {
593     if (s == 0)
594         return;
595 
596     ASSERT(!m_strings.contains(s));
597 
598     m_strings.add(s);
599 }
600 
deleteString(String * s)601 void Parser::deleteString(String* s)
602 {
603     if (s == 0)
604         return;
605 
606     ASSERT(m_strings.contains(s));
607 
608     m_strings.remove(s);
609     delete s;
610 }
611 
registerNodeTest(Step::NodeTest * t)612 void Parser::registerNodeTest(Step::NodeTest* t)
613 {
614     if (t == 0)
615         return;
616 
617     ASSERT(!m_nodeTests.contains(t));
618 
619     m_nodeTests.add(t);
620 }
621 
deleteNodeTest(Step::NodeTest * t)622 void Parser::deleteNodeTest(Step::NodeTest* t)
623 {
624     if (t == 0)
625         return;
626 
627     ASSERT(m_nodeTests.contains(t));
628 
629     m_nodeTests.remove(t);
630     delete t;
631 }
632 
633 }
634 }
635 
636 #endif // ENABLE(XPATH)
637