• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xpointer.c : Code to handle XML Pointer
3  *
4  * Base implementation was made accordingly to
5  * W3C Candidate Recommendation 7 June 2000
6  * http://www.w3.org/TR/2000/CR-xptr-20000607
7  *
8  * Added support for the element() scheme described in:
9  * W3C Proposed Recommendation 13 November 2002
10  * http://www.w3.org/TR/2002/PR-xptr-element-20021113/
11  *
12  * See Copyright for the status of this software.
13  *
14  * daniel@veillard.com
15  */
16 
17 /* To avoid EBCDIC trouble when parsing on zOS */
18 #if defined(__MVS__)
19 #pragma convert("ISO8859-1")
20 #endif
21 
22 #define IN_LIBXML
23 #include "libxml.h"
24 
25 /*
26  * TODO: better handling of error cases, the full expression should
27  *       be parsed beforehand instead of a progressive evaluation
28  * TODO: Access into entities references are not supported now ...
29  *       need a start to be able to pop out of entities refs since
30  *       parent is the entity declaration, not the ref.
31  */
32 
33 #include <string.h>
34 #include <libxml/xpointer.h>
35 #include <libxml/xmlmemory.h>
36 #include <libxml/parserInternals.h>
37 #include <libxml/uri.h>
38 #include <libxml/xpath.h>
39 #include <libxml/xpathInternals.h>
40 #include <libxml/xmlerror.h>
41 
42 #ifdef LIBXML_XPTR_ENABLED
43 
44 /* Add support of the xmlns() xpointer scheme to initialize the namespaces */
45 #define XPTR_XMLNS_SCHEME
46 
47 #include "private/error.h"
48 #include "private/parser.h"
49 #include "private/xpath.h"
50 
51 /************************************************************************
52  *									*
53  *		Some factorized error routines				*
54  *									*
55  ************************************************************************/
56 
57 /**
58  * xmlXPtrErr:
59  * @ctxt:  an XPTR evaluation context
60  * @extra:  extra information
61  *
62  * Handle an XPointer error
63  */
64 static void LIBXML_ATTR_FORMAT(3,0)
xmlXPtrErr(xmlXPathParserContextPtr ctxt,int code,const char * msg,const xmlChar * extra)65 xmlXPtrErr(xmlXPathParserContextPtr ctxt, int code,
66            const char * msg, const xmlChar *extra)
67 {
68     xmlStructuredErrorFunc serror = NULL;
69     void *data = NULL;
70     xmlNodePtr node = NULL;
71     int res;
72 
73     if (ctxt == NULL)
74         return;
75     /* Only report the first error */
76     if (ctxt->error != 0)
77         return;
78 
79     ctxt->error = code;
80 
81     if (ctxt->context != NULL) {
82         xmlErrorPtr err = &ctxt->context->lastError;
83 
84         /* cleanup current last error */
85         xmlResetError(err);
86 
87         err->domain = XML_FROM_XPOINTER;
88         err->code = code;
89         err->level = XML_ERR_ERROR;
90         err->str1 = (char *) xmlStrdup(ctxt->base);
91         if (err->str1 == NULL) {
92             xmlXPathPErrMemory(ctxt);
93             return;
94         }
95         err->int1 = ctxt->cur - ctxt->base;
96         err->node = ctxt->context->debugNode;
97 
98         serror = ctxt->context->error;
99         data = ctxt->context->userData;
100         node = ctxt->context->debugNode;
101     }
102 
103     res = xmlRaiseError(serror, NULL, data, NULL, node,
104                         XML_FROM_XPOINTER, code, XML_ERR_ERROR, NULL, 0,
105                         (const char *) extra, (const char *) ctxt->base,
106                         NULL, ctxt->cur - ctxt->base, 0,
107                         msg, extra);
108     if (res < 0)
109         xmlXPathPErrMemory(ctxt);
110 }
111 
112 /************************************************************************
113  *									*
114  *		A few helper functions for child sequences		*
115  *									*
116  ************************************************************************/
117 
118 /**
119  * xmlXPtrGetNthChild:
120  * @cur:  the node
121  * @no:  the child number
122  *
123  * Returns the @no'th element child of @cur or NULL
124  */
125 static xmlNodePtr
xmlXPtrGetNthChild(xmlNodePtr cur,int no)126 xmlXPtrGetNthChild(xmlNodePtr cur, int no) {
127     int i;
128     if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL))
129 	return(cur);
130     cur = cur->children;
131     for (i = 0;i <= no;cur = cur->next) {
132 	if (cur == NULL)
133 	    return(cur);
134 	if ((cur->type == XML_ELEMENT_NODE) ||
135 	    (cur->type == XML_DOCUMENT_NODE) ||
136 	    (cur->type == XML_HTML_DOCUMENT_NODE)) {
137 	    i++;
138 	    if (i == no)
139 		break;
140 	}
141     }
142     return(cur);
143 }
144 
145 /************************************************************************
146  *									*
147  *			The parser					*
148  *									*
149  ************************************************************************/
150 
151 static void xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name);
152 
153 /*
154  * Macros for accessing the content. Those should be used only by the parser,
155  * and not exported.
156  *
157  * Dirty macros, i.e. one need to make assumption on the context to use them
158  *
159  *   CUR     returns the current xmlChar value, i.e. a 8 bit value
160  *           in ISO-Latin or UTF-8.
161  *           This should be used internally by the parser
162  *           only to compare to ASCII values otherwise it would break when
163  *           running with UTF-8 encoding.
164  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
165  *           to compare on ASCII based substring.
166  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
167  *           strings within the parser.
168  *   CURRENT Returns the current char value, with the full decoding of
169  *           UTF-8 if we are using this mode. It returns an int.
170  *   NEXT    Skip to the next character, this does the proper decoding
171  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
172  *           It returns the pointer to the current xmlChar.
173  */
174 
175 #define CUR (*ctxt->cur)
176 #define SKIP(val) ctxt->cur += (val)
177 #define NXT(val) ctxt->cur[(val)]
178 
179 #define SKIP_BLANKS							\
180     while (IS_BLANK_CH(*(ctxt->cur))) NEXT
181 
182 #define CURRENT (*ctxt->cur)
183 #define NEXT ((*ctxt->cur) ?  ctxt->cur++: ctxt->cur)
184 
185 /*
186  * xmlXPtrGetChildNo:
187  * @ctxt:  the XPointer Parser context
188  * @index:  the child number
189  *
190  * Move the current node of the nodeset on the stack to the
191  * given child if found
192  */
193 static void
xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt,int indx)194 xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt, int indx) {
195     xmlNodePtr cur = NULL;
196     xmlXPathObjectPtr obj;
197     xmlNodeSetPtr oldset;
198 
199     CHECK_TYPE(XPATH_NODESET);
200     obj = valuePop(ctxt);
201     oldset = obj->nodesetval;
202     if ((indx <= 0) || (oldset == NULL) || (oldset->nodeNr != 1)) {
203 	xmlXPathFreeObject(obj);
204 	valuePush(ctxt, xmlXPathNewNodeSet(NULL));
205 	return;
206     }
207     cur = xmlXPtrGetNthChild(oldset->nodeTab[0], indx);
208     if (cur == NULL) {
209 	xmlXPathFreeObject(obj);
210 	valuePush(ctxt, xmlXPathNewNodeSet(NULL));
211 	return;
212     }
213     oldset->nodeTab[0] = cur;
214     valuePush(ctxt, obj);
215 }
216 
217 /**
218  * xmlXPtrEvalXPtrPart:
219  * @ctxt:  the XPointer Parser context
220  * @name:  the preparsed Scheme for the XPtrPart
221  *
222  * XPtrPart ::= 'xpointer' '(' XPtrExpr ')'
223  *            | Scheme '(' SchemeSpecificExpr ')'
224  *
225  * Scheme   ::=  NCName - 'xpointer' [VC: Non-XPointer schemes]
226  *
227  * SchemeSpecificExpr ::= StringWithBalancedParens
228  *
229  * StringWithBalancedParens ::=
230  *              [^()]* ('(' StringWithBalancedParens ')' [^()]*)*
231  *              [VC: Parenthesis escaping]
232  *
233  * XPtrExpr ::= Expr [VC: Parenthesis escaping]
234  *
235  * VC: Parenthesis escaping:
236  *   The end of an XPointer part is signaled by the right parenthesis ")"
237  *   character that is balanced with the left parenthesis "(" character
238  *   that began the part. Any unbalanced parenthesis character inside the
239  *   expression, even within literals, must be escaped with a circumflex (^)
240  *   character preceding it. If the expression contains any literal
241  *   occurrences of the circumflex, each must be escaped with an additional
242  *   circumflex (that is, ^^). If the unescaped parentheses in the expression
243  *   are not balanced, a syntax error results.
244  *
245  * Parse and evaluate an XPtrPart. Basically it generates the unescaped
246  * string and if the scheme is 'xpointer' it will call the XPath interpreter.
247  *
248  * TODO: there is no new scheme registration mechanism
249  */
250 
251 static void
xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt,xmlChar * name)252 xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt, xmlChar *name) {
253     xmlChar *buffer, *cur;
254     int len;
255     int level;
256 
257     if (name == NULL)
258     name = xmlXPathParseName(ctxt);
259     if (name == NULL)
260 	XP_ERROR(XPATH_EXPR_ERROR);
261 
262     if (CUR != '(') {
263         xmlFree(name);
264 	XP_ERROR(XPATH_EXPR_ERROR);
265     }
266     NEXT;
267     level = 1;
268 
269     len = xmlStrlen(ctxt->cur);
270     len++;
271     buffer = xmlMalloc(len);
272     if (buffer == NULL) {
273         xmlXPathPErrMemory(ctxt);
274         xmlFree(name);
275 	return;
276     }
277 
278     cur = buffer;
279     while (CUR != 0) {
280 	if (CUR == ')') {
281 	    level--;
282 	    if (level == 0) {
283 		NEXT;
284 		break;
285 	    }
286 	} else if (CUR == '(') {
287 	    level++;
288 	} else if (CUR == '^') {
289             if ((NXT(1) == ')') || (NXT(1) == '(') || (NXT(1) == '^')) {
290                 NEXT;
291             }
292 	}
293         *cur++ = CUR;
294 	NEXT;
295     }
296     *cur = 0;
297 
298     if ((level != 0) && (CUR == 0)) {
299         xmlFree(name);
300 	xmlFree(buffer);
301 	XP_ERROR(XPTR_SYNTAX_ERROR);
302     }
303 
304     if (xmlStrEqual(name, (xmlChar *) "xpointer") ||
305         xmlStrEqual(name, (xmlChar *) "xpath1")) {
306 	const xmlChar *oldBase = ctxt->base;
307 	const xmlChar *oldCur = ctxt->cur;
308 
309 	ctxt->cur = ctxt->base = buffer;
310 	/*
311 	 * To evaluate an xpointer scheme element (4.3) we need:
312 	 *   context initialized to the root
313 	 *   context position initialized to 1
314 	 *   context size initialized to 1
315 	 */
316 	ctxt->context->node = (xmlNodePtr)ctxt->context->doc;
317 	ctxt->context->proximityPosition = 1;
318 	ctxt->context->contextSize = 1;
319 	xmlXPathEvalExpr(ctxt);
320 	ctxt->base = oldBase;
321         ctxt->cur = oldCur;
322     } else if (xmlStrEqual(name, (xmlChar *) "element")) {
323 	const xmlChar *oldBase = ctxt->base;
324 	const xmlChar *oldCur = ctxt->cur;
325 	xmlChar *name2;
326 
327 	ctxt->cur = ctxt->base = buffer;
328 	if (buffer[0] == '/') {
329 	    xmlXPathRoot(ctxt);
330 	    xmlXPtrEvalChildSeq(ctxt, NULL);
331 	} else {
332 	    name2 = xmlXPathParseName(ctxt);
333 	    if (name2 == NULL) {
334                 ctxt->base = oldBase;
335                 ctxt->cur = oldCur;
336 		xmlFree(buffer);
337                 xmlFree(name);
338 		XP_ERROR(XPATH_EXPR_ERROR);
339 	    }
340 	    xmlXPtrEvalChildSeq(ctxt, name2);
341 	}
342 	ctxt->base = oldBase;
343         ctxt->cur = oldCur;
344 #ifdef XPTR_XMLNS_SCHEME
345     } else if (xmlStrEqual(name, (xmlChar *) "xmlns")) {
346 	const xmlChar *oldBase = ctxt->base;
347 	const xmlChar *oldCur = ctxt->cur;
348 	xmlChar *prefix;
349 
350 	ctxt->cur = ctxt->base = buffer;
351         prefix = xmlXPathParseNCName(ctxt);
352 	if (prefix == NULL) {
353             ctxt->base = oldBase;
354             ctxt->cur = oldCur;
355 	    xmlFree(buffer);
356 	    xmlFree(name);
357 	    XP_ERROR(XPTR_SYNTAX_ERROR);
358 	}
359 	SKIP_BLANKS;
360 	if (CUR != '=') {
361             ctxt->base = oldBase;
362             ctxt->cur = oldCur;
363 	    xmlFree(prefix);
364 	    xmlFree(buffer);
365 	    xmlFree(name);
366 	    XP_ERROR(XPTR_SYNTAX_ERROR);
367 	}
368 	NEXT;
369 	SKIP_BLANKS;
370 
371 	if (xmlXPathRegisterNs(ctxt->context, prefix, ctxt->cur) < 0)
372             xmlXPathPErrMemory(ctxt);
373         ctxt->base = oldBase;
374         ctxt->cur = oldCur;
375 	xmlFree(prefix);
376 #endif /* XPTR_XMLNS_SCHEME */
377     } else {
378         xmlXPtrErr(ctxt, XML_XPTR_UNKNOWN_SCHEME,
379 		   "unsupported scheme '%s'\n", name);
380     }
381     xmlFree(buffer);
382     xmlFree(name);
383 }
384 
385 /**
386  * xmlXPtrEvalFullXPtr:
387  * @ctxt:  the XPointer Parser context
388  * @name:  the preparsed Scheme for the first XPtrPart
389  *
390  * FullXPtr ::= XPtrPart (S? XPtrPart)*
391  *
392  * As the specs says:
393  * -----------
394  * When multiple XPtrParts are provided, they must be evaluated in
395  * left-to-right order. If evaluation of one part fails, the nexti
396  * is evaluated. The following conditions cause XPointer part failure:
397  *
398  * - An unknown scheme
399  * - A scheme that does not locate any sub-resource present in the resource
400  * - A scheme that is not applicable to the media type of the resource
401  *
402  * The XPointer application must consume a failed XPointer part and
403  * attempt to evaluate the next one, if any. The result of the first
404  * XPointer part whose evaluation succeeds is taken to be the fragment
405  * located by the XPointer as a whole. If all the parts fail, the result
406  * for the XPointer as a whole is a sub-resource error.
407  * -----------
408  *
409  * Parse and evaluate a Full XPtr i.e. possibly a cascade of XPath based
410  * expressions or other schemes.
411  */
412 static void
xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt,xmlChar * name)413 xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt, xmlChar *name) {
414     if (name == NULL)
415     name = xmlXPathParseName(ctxt);
416     if (name == NULL)
417 	XP_ERROR(XPATH_EXPR_ERROR);
418     while (name != NULL) {
419 	ctxt->error = XPATH_EXPRESSION_OK;
420 	xmlXPtrEvalXPtrPart(ctxt, name);
421 
422 	/* in case of syntax error, break here */
423 	if ((ctxt->error != XPATH_EXPRESSION_OK) &&
424             (ctxt->error != XML_XPTR_UNKNOWN_SCHEME))
425 	    return;
426 
427 	/*
428 	 * If the returned value is a non-empty nodeset
429 	 * or location set, return here.
430 	 */
431 	if (ctxt->value != NULL) {
432 	    xmlXPathObjectPtr obj = ctxt->value;
433 
434 	    switch (obj->type) {
435 		case XPATH_NODESET: {
436 		    xmlNodeSetPtr loc = ctxt->value->nodesetval;
437 		    if ((loc != NULL) && (loc->nodeNr > 0))
438 			return;
439 		    break;
440 		}
441 		default:
442 		    break;
443 	    }
444 
445 	    /*
446 	     * Evaluating to improper values is equivalent to
447 	     * a sub-resource error, clean-up the stack
448 	     */
449 	    do {
450 		obj = valuePop(ctxt);
451 		if (obj != NULL) {
452 		    xmlXPathFreeObject(obj);
453 		}
454 	    } while (obj != NULL);
455 	}
456 
457 	/*
458 	 * Is there another XPointer part.
459 	 */
460 	SKIP_BLANKS;
461 	name = xmlXPathParseName(ctxt);
462     }
463 }
464 
465 /**
466  * xmlXPtrEvalChildSeq:
467  * @ctxt:  the XPointer Parser context
468  * @name:  a possible ID name of the child sequence
469  *
470  *  ChildSeq ::= '/1' ('/' [0-9]*)*
471  *             | Name ('/' [0-9]*)+
472  *
473  * Parse and evaluate a Child Sequence. This routine also handle the
474  * case of a Bare Name used to get a document ID.
475  */
476 static void
xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt,xmlChar * name)477 xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name) {
478     /*
479      * XPointer don't allow by syntax to address in multirooted trees
480      * this might prove useful in some cases, warn about it.
481      */
482     if ((name == NULL) && (CUR == '/') && (NXT(1) != '1')) {
483         xmlXPtrErr(ctxt, XML_XPTR_CHILDSEQ_START,
484 		   "warning: ChildSeq not starting by /1\n", NULL);
485     }
486 
487     if (name != NULL) {
488 	valuePush(ctxt, xmlXPathNewString(name));
489 	xmlFree(name);
490 	xmlXPathIdFunction(ctxt, 1);
491 	CHECK_ERROR;
492     }
493 
494     while (CUR == '/') {
495 	int child = 0, overflow = 0;
496 	NEXT;
497 
498 	while ((CUR >= '0') && (CUR <= '9')) {
499             int d = CUR - '0';
500             if (child > INT_MAX / 10)
501                 overflow = 1;
502             else
503                 child *= 10;
504             if (child > INT_MAX - d)
505                 overflow = 1;
506             else
507                 child += d;
508 	    NEXT;
509 	}
510         if (overflow)
511             child = 0;
512 	xmlXPtrGetChildNo(ctxt, child);
513     }
514 }
515 
516 
517 /**
518  * xmlXPtrEvalXPointer:
519  * @ctxt:  the XPointer Parser context
520  *
521  *  XPointer ::= Name
522  *             | ChildSeq
523  *             | FullXPtr
524  *
525  * Parse and evaluate an XPointer
526  */
527 static void
xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt)528 xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt) {
529     if (ctxt->valueTab == NULL) {
530 	/* Allocate the value stack */
531 	ctxt->valueTab = (xmlXPathObjectPtr *)
532 			 xmlMalloc(10 * sizeof(xmlXPathObjectPtr));
533 	if (ctxt->valueTab == NULL) {
534 	    xmlXPathPErrMemory(ctxt);
535 	    return;
536 	}
537 	ctxt->valueNr = 0;
538 	ctxt->valueMax = 10;
539 	ctxt->value = NULL;
540     }
541     SKIP_BLANKS;
542     if (CUR == '/') {
543 	xmlXPathRoot(ctxt);
544         xmlXPtrEvalChildSeq(ctxt, NULL);
545     } else {
546 	xmlChar *name;
547 
548 	name = xmlXPathParseName(ctxt);
549 	if (name == NULL)
550 	    XP_ERROR(XPATH_EXPR_ERROR);
551 	if (CUR == '(') {
552 	    xmlXPtrEvalFullXPtr(ctxt, name);
553 	    /* Short evaluation */
554 	    return;
555 	} else {
556 	    /* this handle both Bare Names and Child Sequences */
557 	    xmlXPtrEvalChildSeq(ctxt, name);
558 	}
559     }
560     SKIP_BLANKS;
561     if (CUR != 0)
562 	XP_ERROR(XPATH_EXPR_ERROR);
563 }
564 
565 
566 /************************************************************************
567  *									*
568  *			General routines				*
569  *									*
570  ************************************************************************/
571 
572 /**
573  * xmlXPtrNewContext:
574  * @doc:  the XML document
575  * @here:  the node that directly contains the XPointer being evaluated or NULL
576  * @origin:  the element from which a user or program initiated traversal of
577  *           the link, or NULL.
578  *
579  * Create a new XPointer context
580  *
581  * Returns the xmlXPathContext just allocated.
582  */
583 xmlXPathContextPtr
xmlXPtrNewContext(xmlDocPtr doc,xmlNodePtr here,xmlNodePtr origin)584 xmlXPtrNewContext(xmlDocPtr doc, xmlNodePtr here, xmlNodePtr origin) {
585     xmlXPathContextPtr ret;
586     (void) here;
587     (void) origin;
588 
589     ret = xmlXPathNewContext(doc);
590     if (ret == NULL)
591 	return(ret);
592 
593     return(ret);
594 }
595 
596 /**
597  * xmlXPtrEval:
598  * @str:  the XPointer expression
599  * @ctx:  the XPointer context
600  *
601  * Evaluate the XPath Location Path in the given context.
602  *
603  * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL.
604  *         the caller has to free the object.
605  */
606 xmlXPathObjectPtr
xmlXPtrEval(const xmlChar * str,xmlXPathContextPtr ctx)607 xmlXPtrEval(const xmlChar *str, xmlXPathContextPtr ctx) {
608     xmlXPathParserContextPtr ctxt;
609     xmlXPathObjectPtr res = NULL, tmp;
610     xmlXPathObjectPtr init = NULL;
611     int stack = 0;
612 
613     xmlInitParser();
614 
615     if ((ctx == NULL) || (str == NULL))
616 	return(NULL);
617 
618     xmlResetError(&ctx->lastError);
619 
620     ctxt = xmlXPathNewParserContext(str, ctx);
621     if (ctxt == NULL) {
622         xmlXPathErrMemory(ctx);
623 	return(NULL);
624     }
625     xmlXPtrEvalXPointer(ctxt);
626     if (ctx->lastError.code != XML_ERR_OK)
627         goto error;
628 
629     if ((ctxt->value != NULL) &&
630 	(ctxt->value->type != XPATH_NODESET)) {
631         xmlXPtrErr(ctxt, XML_XPTR_EVAL_FAILED,
632 		"xmlXPtrEval: evaluation failed to return a node set\n",
633 		   NULL);
634     } else {
635 	res = valuePop(ctxt);
636     }
637 
638     do {
639         tmp = valuePop(ctxt);
640 	if (tmp != NULL) {
641 	    if (tmp != init) {
642 		if (tmp->type == XPATH_NODESET) {
643 		    /*
644 		     * Evaluation may push a root nodeset which is unused
645 		     */
646 		    xmlNodeSetPtr set;
647 		    set = tmp->nodesetval;
648 		    if ((set == NULL) || (set->nodeNr != 1) ||
649 			(set->nodeTab[0] != (xmlNodePtr) ctx->doc))
650 			stack++;
651 		} else
652 		    stack++;
653 	    }
654 	    xmlXPathFreeObject(tmp);
655         }
656     } while (tmp != NULL);
657     if (stack != 0) {
658         xmlXPtrErr(ctxt, XML_XPTR_EXTRA_OBJECTS,
659 		   "xmlXPtrEval: object(s) left on the eval stack\n",
660 		   NULL);
661     }
662     if (ctx->lastError.code != XML_ERR_OK) {
663 	xmlXPathFreeObject(res);
664 	res = NULL;
665     }
666 
667 error:
668     xmlXPathFreeParserContext(ctxt);
669     return(res);
670 }
671 
672 #endif
673 
674