1 /*
2 * xpointer.c : Code to handle XML Pointer
3 *
4 * Base implementation was made accordingly to
5 * W3C Candidate Recommendation 7 June 2000
6 * http://www.w3.org/TR/2000/CR-xptr-20000607
7 *
8 * Added support for the element() scheme described in:
9 * W3C Proposed Recommendation 13 November 2002
10 * http://www.w3.org/TR/2002/PR-xptr-element-20021113/
11 *
12 * See Copyright for the status of this software.
13 *
14 * daniel@veillard.com
15 */
16
17 /* To avoid EBCDIC trouble when parsing on zOS */
18 #if defined(__MVS__)
19 #pragma convert("ISO8859-1")
20 #endif
21
22 #define IN_LIBXML
23 #include "libxml.h"
24
25 /*
26 * TODO: better handling of error cases, the full expression should
27 * be parsed beforehand instead of a progressive evaluation
28 * TODO: Access into entities references are not supported now ...
29 * need a start to be able to pop out of entities refs since
30 * parent is the entity declaration, not the ref.
31 */
32
33 #include <string.h>
34 #include <libxml/xpointer.h>
35 #include <libxml/xmlmemory.h>
36 #include <libxml/parserInternals.h>
37 #include <libxml/uri.h>
38 #include <libxml/xpath.h>
39 #include <libxml/xpathInternals.h>
40 #include <libxml/xmlerror.h>
41
42 #ifdef LIBXML_XPTR_ENABLED
43
44 /* Add support of the xmlns() xpointer scheme to initialize the namespaces */
45 #define XPTR_XMLNS_SCHEME
46
47 #include "private/error.h"
48 #include "private/parser.h"
49 #include "private/xpath.h"
50
51 /************************************************************************
52 * *
53 * Some factorized error routines *
54 * *
55 ************************************************************************/
56
57 /**
58 * xmlXPtrErr:
59 * @ctxt: an XPTR evaluation context
60 * @extra: extra information
61 *
62 * Handle an XPointer error
63 */
64 static void LIBXML_ATTR_FORMAT(3,0)
xmlXPtrErr(xmlXPathParserContextPtr ctxt,int code,const char * msg,const xmlChar * extra)65 xmlXPtrErr(xmlXPathParserContextPtr ctxt, int code,
66 const char * msg, const xmlChar *extra)
67 {
68 xmlStructuredErrorFunc serror = NULL;
69 void *data = NULL;
70 xmlNodePtr node = NULL;
71 int res;
72
73 if (ctxt == NULL)
74 return;
75 /* Only report the first error */
76 if (ctxt->error != 0)
77 return;
78
79 ctxt->error = code;
80
81 if (ctxt->context != NULL) {
82 xmlErrorPtr err = &ctxt->context->lastError;
83
84 /* cleanup current last error */
85 xmlResetError(err);
86
87 err->domain = XML_FROM_XPOINTER;
88 err->code = code;
89 err->level = XML_ERR_ERROR;
90 err->str1 = (char *) xmlStrdup(ctxt->base);
91 if (err->str1 == NULL) {
92 xmlXPathPErrMemory(ctxt);
93 return;
94 }
95 err->int1 = ctxt->cur - ctxt->base;
96 err->node = ctxt->context->debugNode;
97
98 serror = ctxt->context->error;
99 data = ctxt->context->userData;
100 node = ctxt->context->debugNode;
101 }
102
103 res = xmlRaiseError(serror, NULL, data, NULL, node,
104 XML_FROM_XPOINTER, code, XML_ERR_ERROR, NULL, 0,
105 (const char *) extra, (const char *) ctxt->base,
106 NULL, ctxt->cur - ctxt->base, 0,
107 msg, extra);
108 if (res < 0)
109 xmlXPathPErrMemory(ctxt);
110 }
111
112 /************************************************************************
113 * *
114 * A few helper functions for child sequences *
115 * *
116 ************************************************************************/
117
118 /**
119 * xmlXPtrGetNthChild:
120 * @cur: the node
121 * @no: the child number
122 *
123 * Returns the @no'th element child of @cur or NULL
124 */
125 static xmlNodePtr
xmlXPtrGetNthChild(xmlNodePtr cur,int no)126 xmlXPtrGetNthChild(xmlNodePtr cur, int no) {
127 int i;
128 if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL))
129 return(cur);
130 cur = cur->children;
131 for (i = 0;i <= no;cur = cur->next) {
132 if (cur == NULL)
133 return(cur);
134 if ((cur->type == XML_ELEMENT_NODE) ||
135 (cur->type == XML_DOCUMENT_NODE) ||
136 (cur->type == XML_HTML_DOCUMENT_NODE)) {
137 i++;
138 if (i == no)
139 break;
140 }
141 }
142 return(cur);
143 }
144
145 /************************************************************************
146 * *
147 * The parser *
148 * *
149 ************************************************************************/
150
151 static void xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name);
152
153 /*
154 * Macros for accessing the content. Those should be used only by the parser,
155 * and not exported.
156 *
157 * Dirty macros, i.e. one need to make assumption on the context to use them
158 *
159 * CUR returns the current xmlChar value, i.e. a 8 bit value
160 * in ISO-Latin or UTF-8.
161 * This should be used internally by the parser
162 * only to compare to ASCII values otherwise it would break when
163 * running with UTF-8 encoding.
164 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
165 * to compare on ASCII based substring.
166 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
167 * strings within the parser.
168 * CURRENT Returns the current char value, with the full decoding of
169 * UTF-8 if we are using this mode. It returns an int.
170 * NEXT Skip to the next character, this does the proper decoding
171 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
172 * It returns the pointer to the current xmlChar.
173 */
174
175 #define CUR (*ctxt->cur)
176 #define SKIP(val) ctxt->cur += (val)
177 #define NXT(val) ctxt->cur[(val)]
178
179 #define SKIP_BLANKS \
180 while (IS_BLANK_CH(*(ctxt->cur))) NEXT
181
182 #define CURRENT (*ctxt->cur)
183 #define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur)
184
185 /*
186 * xmlXPtrGetChildNo:
187 * @ctxt: the XPointer Parser context
188 * @index: the child number
189 *
190 * Move the current node of the nodeset on the stack to the
191 * given child if found
192 */
193 static void
xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt,int indx)194 xmlXPtrGetChildNo(xmlXPathParserContextPtr ctxt, int indx) {
195 xmlNodePtr cur = NULL;
196 xmlXPathObjectPtr obj;
197 xmlNodeSetPtr oldset;
198
199 CHECK_TYPE(XPATH_NODESET);
200 obj = valuePop(ctxt);
201 oldset = obj->nodesetval;
202 if ((indx <= 0) || (oldset == NULL) || (oldset->nodeNr != 1)) {
203 xmlXPathFreeObject(obj);
204 valuePush(ctxt, xmlXPathNewNodeSet(NULL));
205 return;
206 }
207 cur = xmlXPtrGetNthChild(oldset->nodeTab[0], indx);
208 if (cur == NULL) {
209 xmlXPathFreeObject(obj);
210 valuePush(ctxt, xmlXPathNewNodeSet(NULL));
211 return;
212 }
213 oldset->nodeTab[0] = cur;
214 valuePush(ctxt, obj);
215 }
216
217 /**
218 * xmlXPtrEvalXPtrPart:
219 * @ctxt: the XPointer Parser context
220 * @name: the preparsed Scheme for the XPtrPart
221 *
222 * XPtrPart ::= 'xpointer' '(' XPtrExpr ')'
223 * | Scheme '(' SchemeSpecificExpr ')'
224 *
225 * Scheme ::= NCName - 'xpointer' [VC: Non-XPointer schemes]
226 *
227 * SchemeSpecificExpr ::= StringWithBalancedParens
228 *
229 * StringWithBalancedParens ::=
230 * [^()]* ('(' StringWithBalancedParens ')' [^()]*)*
231 * [VC: Parenthesis escaping]
232 *
233 * XPtrExpr ::= Expr [VC: Parenthesis escaping]
234 *
235 * VC: Parenthesis escaping:
236 * The end of an XPointer part is signaled by the right parenthesis ")"
237 * character that is balanced with the left parenthesis "(" character
238 * that began the part. Any unbalanced parenthesis character inside the
239 * expression, even within literals, must be escaped with a circumflex (^)
240 * character preceding it. If the expression contains any literal
241 * occurrences of the circumflex, each must be escaped with an additional
242 * circumflex (that is, ^^). If the unescaped parentheses in the expression
243 * are not balanced, a syntax error results.
244 *
245 * Parse and evaluate an XPtrPart. Basically it generates the unescaped
246 * string and if the scheme is 'xpointer' it will call the XPath interpreter.
247 *
248 * TODO: there is no new scheme registration mechanism
249 */
250
251 static void
xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt,xmlChar * name)252 xmlXPtrEvalXPtrPart(xmlXPathParserContextPtr ctxt, xmlChar *name) {
253 xmlChar *buffer, *cur;
254 int len;
255 int level;
256
257 if (name == NULL)
258 name = xmlXPathParseName(ctxt);
259 if (name == NULL)
260 XP_ERROR(XPATH_EXPR_ERROR);
261
262 if (CUR != '(') {
263 xmlFree(name);
264 XP_ERROR(XPATH_EXPR_ERROR);
265 }
266 NEXT;
267 level = 1;
268
269 len = xmlStrlen(ctxt->cur);
270 len++;
271 buffer = xmlMalloc(len);
272 if (buffer == NULL) {
273 xmlXPathPErrMemory(ctxt);
274 xmlFree(name);
275 return;
276 }
277
278 cur = buffer;
279 while (CUR != 0) {
280 if (CUR == ')') {
281 level--;
282 if (level == 0) {
283 NEXT;
284 break;
285 }
286 } else if (CUR == '(') {
287 level++;
288 } else if (CUR == '^') {
289 if ((NXT(1) == ')') || (NXT(1) == '(') || (NXT(1) == '^')) {
290 NEXT;
291 }
292 }
293 *cur++ = CUR;
294 NEXT;
295 }
296 *cur = 0;
297
298 if ((level != 0) && (CUR == 0)) {
299 xmlFree(name);
300 xmlFree(buffer);
301 XP_ERROR(XPTR_SYNTAX_ERROR);
302 }
303
304 if (xmlStrEqual(name, (xmlChar *) "xpointer") ||
305 xmlStrEqual(name, (xmlChar *) "xpath1")) {
306 const xmlChar *oldBase = ctxt->base;
307 const xmlChar *oldCur = ctxt->cur;
308
309 ctxt->cur = ctxt->base = buffer;
310 /*
311 * To evaluate an xpointer scheme element (4.3) we need:
312 * context initialized to the root
313 * context position initialized to 1
314 * context size initialized to 1
315 */
316 ctxt->context->node = (xmlNodePtr)ctxt->context->doc;
317 ctxt->context->proximityPosition = 1;
318 ctxt->context->contextSize = 1;
319 xmlXPathEvalExpr(ctxt);
320 ctxt->base = oldBase;
321 ctxt->cur = oldCur;
322 } else if (xmlStrEqual(name, (xmlChar *) "element")) {
323 const xmlChar *oldBase = ctxt->base;
324 const xmlChar *oldCur = ctxt->cur;
325 xmlChar *name2;
326
327 ctxt->cur = ctxt->base = buffer;
328 if (buffer[0] == '/') {
329 xmlXPathRoot(ctxt);
330 xmlXPtrEvalChildSeq(ctxt, NULL);
331 } else {
332 name2 = xmlXPathParseName(ctxt);
333 if (name2 == NULL) {
334 ctxt->base = oldBase;
335 ctxt->cur = oldCur;
336 xmlFree(buffer);
337 xmlFree(name);
338 XP_ERROR(XPATH_EXPR_ERROR);
339 }
340 xmlXPtrEvalChildSeq(ctxt, name2);
341 }
342 ctxt->base = oldBase;
343 ctxt->cur = oldCur;
344 #ifdef XPTR_XMLNS_SCHEME
345 } else if (xmlStrEqual(name, (xmlChar *) "xmlns")) {
346 const xmlChar *oldBase = ctxt->base;
347 const xmlChar *oldCur = ctxt->cur;
348 xmlChar *prefix;
349
350 ctxt->cur = ctxt->base = buffer;
351 prefix = xmlXPathParseNCName(ctxt);
352 if (prefix == NULL) {
353 ctxt->base = oldBase;
354 ctxt->cur = oldCur;
355 xmlFree(buffer);
356 xmlFree(name);
357 XP_ERROR(XPTR_SYNTAX_ERROR);
358 }
359 SKIP_BLANKS;
360 if (CUR != '=') {
361 ctxt->base = oldBase;
362 ctxt->cur = oldCur;
363 xmlFree(prefix);
364 xmlFree(buffer);
365 xmlFree(name);
366 XP_ERROR(XPTR_SYNTAX_ERROR);
367 }
368 NEXT;
369 SKIP_BLANKS;
370
371 if (xmlXPathRegisterNs(ctxt->context, prefix, ctxt->cur) < 0)
372 xmlXPathPErrMemory(ctxt);
373 ctxt->base = oldBase;
374 ctxt->cur = oldCur;
375 xmlFree(prefix);
376 #endif /* XPTR_XMLNS_SCHEME */
377 } else {
378 xmlXPtrErr(ctxt, XML_XPTR_UNKNOWN_SCHEME,
379 "unsupported scheme '%s'\n", name);
380 }
381 xmlFree(buffer);
382 xmlFree(name);
383 }
384
385 /**
386 * xmlXPtrEvalFullXPtr:
387 * @ctxt: the XPointer Parser context
388 * @name: the preparsed Scheme for the first XPtrPart
389 *
390 * FullXPtr ::= XPtrPart (S? XPtrPart)*
391 *
392 * As the specs says:
393 * -----------
394 * When multiple XPtrParts are provided, they must be evaluated in
395 * left-to-right order. If evaluation of one part fails, the nexti
396 * is evaluated. The following conditions cause XPointer part failure:
397 *
398 * - An unknown scheme
399 * - A scheme that does not locate any sub-resource present in the resource
400 * - A scheme that is not applicable to the media type of the resource
401 *
402 * The XPointer application must consume a failed XPointer part and
403 * attempt to evaluate the next one, if any. The result of the first
404 * XPointer part whose evaluation succeeds is taken to be the fragment
405 * located by the XPointer as a whole. If all the parts fail, the result
406 * for the XPointer as a whole is a sub-resource error.
407 * -----------
408 *
409 * Parse and evaluate a Full XPtr i.e. possibly a cascade of XPath based
410 * expressions or other schemes.
411 */
412 static void
xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt,xmlChar * name)413 xmlXPtrEvalFullXPtr(xmlXPathParserContextPtr ctxt, xmlChar *name) {
414 if (name == NULL)
415 name = xmlXPathParseName(ctxt);
416 if (name == NULL)
417 XP_ERROR(XPATH_EXPR_ERROR);
418 while (name != NULL) {
419 ctxt->error = XPATH_EXPRESSION_OK;
420 xmlXPtrEvalXPtrPart(ctxt, name);
421
422 /* in case of syntax error, break here */
423 if ((ctxt->error != XPATH_EXPRESSION_OK) &&
424 (ctxt->error != XML_XPTR_UNKNOWN_SCHEME))
425 return;
426
427 /*
428 * If the returned value is a non-empty nodeset
429 * or location set, return here.
430 */
431 if (ctxt->value != NULL) {
432 xmlXPathObjectPtr obj = ctxt->value;
433
434 switch (obj->type) {
435 case XPATH_NODESET: {
436 xmlNodeSetPtr loc = ctxt->value->nodesetval;
437 if ((loc != NULL) && (loc->nodeNr > 0))
438 return;
439 break;
440 }
441 default:
442 break;
443 }
444
445 /*
446 * Evaluating to improper values is equivalent to
447 * a sub-resource error, clean-up the stack
448 */
449 do {
450 obj = valuePop(ctxt);
451 if (obj != NULL) {
452 xmlXPathFreeObject(obj);
453 }
454 } while (obj != NULL);
455 }
456
457 /*
458 * Is there another XPointer part.
459 */
460 SKIP_BLANKS;
461 name = xmlXPathParseName(ctxt);
462 }
463 }
464
465 /**
466 * xmlXPtrEvalChildSeq:
467 * @ctxt: the XPointer Parser context
468 * @name: a possible ID name of the child sequence
469 *
470 * ChildSeq ::= '/1' ('/' [0-9]*)*
471 * | Name ('/' [0-9]*)+
472 *
473 * Parse and evaluate a Child Sequence. This routine also handle the
474 * case of a Bare Name used to get a document ID.
475 */
476 static void
xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt,xmlChar * name)477 xmlXPtrEvalChildSeq(xmlXPathParserContextPtr ctxt, xmlChar *name) {
478 /*
479 * XPointer don't allow by syntax to address in multirooted trees
480 * this might prove useful in some cases, warn about it.
481 */
482 if ((name == NULL) && (CUR == '/') && (NXT(1) != '1')) {
483 xmlXPtrErr(ctxt, XML_XPTR_CHILDSEQ_START,
484 "warning: ChildSeq not starting by /1\n", NULL);
485 }
486
487 if (name != NULL) {
488 valuePush(ctxt, xmlXPathNewString(name));
489 xmlFree(name);
490 xmlXPathIdFunction(ctxt, 1);
491 CHECK_ERROR;
492 }
493
494 while (CUR == '/') {
495 int child = 0, overflow = 0;
496 NEXT;
497
498 while ((CUR >= '0') && (CUR <= '9')) {
499 int d = CUR - '0';
500 if (child > INT_MAX / 10)
501 overflow = 1;
502 else
503 child *= 10;
504 if (child > INT_MAX - d)
505 overflow = 1;
506 else
507 child += d;
508 NEXT;
509 }
510 if (overflow)
511 child = 0;
512 xmlXPtrGetChildNo(ctxt, child);
513 }
514 }
515
516
517 /**
518 * xmlXPtrEvalXPointer:
519 * @ctxt: the XPointer Parser context
520 *
521 * XPointer ::= Name
522 * | ChildSeq
523 * | FullXPtr
524 *
525 * Parse and evaluate an XPointer
526 */
527 static void
xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt)528 xmlXPtrEvalXPointer(xmlXPathParserContextPtr ctxt) {
529 if (ctxt->valueTab == NULL) {
530 /* Allocate the value stack */
531 ctxt->valueTab = (xmlXPathObjectPtr *)
532 xmlMalloc(10 * sizeof(xmlXPathObjectPtr));
533 if (ctxt->valueTab == NULL) {
534 xmlXPathPErrMemory(ctxt);
535 return;
536 }
537 ctxt->valueNr = 0;
538 ctxt->valueMax = 10;
539 ctxt->value = NULL;
540 }
541 SKIP_BLANKS;
542 if (CUR == '/') {
543 xmlXPathRoot(ctxt);
544 xmlXPtrEvalChildSeq(ctxt, NULL);
545 } else {
546 xmlChar *name;
547
548 name = xmlXPathParseName(ctxt);
549 if (name == NULL)
550 XP_ERROR(XPATH_EXPR_ERROR);
551 if (CUR == '(') {
552 xmlXPtrEvalFullXPtr(ctxt, name);
553 /* Short evaluation */
554 return;
555 } else {
556 /* this handle both Bare Names and Child Sequences */
557 xmlXPtrEvalChildSeq(ctxt, name);
558 }
559 }
560 SKIP_BLANKS;
561 if (CUR != 0)
562 XP_ERROR(XPATH_EXPR_ERROR);
563 }
564
565
566 /************************************************************************
567 * *
568 * General routines *
569 * *
570 ************************************************************************/
571
572 /**
573 * xmlXPtrNewContext:
574 * @doc: the XML document
575 * @here: the node that directly contains the XPointer being evaluated or NULL
576 * @origin: the element from which a user or program initiated traversal of
577 * the link, or NULL.
578 *
579 * Create a new XPointer context
580 *
581 * Returns the xmlXPathContext just allocated.
582 */
583 xmlXPathContextPtr
xmlXPtrNewContext(xmlDocPtr doc,xmlNodePtr here,xmlNodePtr origin)584 xmlXPtrNewContext(xmlDocPtr doc, xmlNodePtr here, xmlNodePtr origin) {
585 xmlXPathContextPtr ret;
586 (void) here;
587 (void) origin;
588
589 ret = xmlXPathNewContext(doc);
590 if (ret == NULL)
591 return(ret);
592
593 return(ret);
594 }
595
596 /**
597 * xmlXPtrEval:
598 * @str: the XPointer expression
599 * @ctx: the XPointer context
600 *
601 * Evaluate the XPath Location Path in the given context.
602 *
603 * Returns the xmlXPathObjectPtr resulting from the evaluation or NULL.
604 * the caller has to free the object.
605 */
606 xmlXPathObjectPtr
xmlXPtrEval(const xmlChar * str,xmlXPathContextPtr ctx)607 xmlXPtrEval(const xmlChar *str, xmlXPathContextPtr ctx) {
608 xmlXPathParserContextPtr ctxt;
609 xmlXPathObjectPtr res = NULL, tmp;
610 xmlXPathObjectPtr init = NULL;
611 int stack = 0;
612
613 xmlInitParser();
614
615 if ((ctx == NULL) || (str == NULL))
616 return(NULL);
617
618 xmlResetError(&ctx->lastError);
619
620 ctxt = xmlXPathNewParserContext(str, ctx);
621 if (ctxt == NULL) {
622 xmlXPathErrMemory(ctx);
623 return(NULL);
624 }
625 xmlXPtrEvalXPointer(ctxt);
626 if (ctx->lastError.code != XML_ERR_OK)
627 goto error;
628
629 if ((ctxt->value != NULL) &&
630 (ctxt->value->type != XPATH_NODESET)) {
631 xmlXPtrErr(ctxt, XML_XPTR_EVAL_FAILED,
632 "xmlXPtrEval: evaluation failed to return a node set\n",
633 NULL);
634 } else {
635 res = valuePop(ctxt);
636 }
637
638 do {
639 tmp = valuePop(ctxt);
640 if (tmp != NULL) {
641 if (tmp != init) {
642 if (tmp->type == XPATH_NODESET) {
643 /*
644 * Evaluation may push a root nodeset which is unused
645 */
646 xmlNodeSetPtr set;
647 set = tmp->nodesetval;
648 if ((set == NULL) || (set->nodeNr != 1) ||
649 (set->nodeTab[0] != (xmlNodePtr) ctx->doc))
650 stack++;
651 } else
652 stack++;
653 }
654 xmlXPathFreeObject(tmp);
655 }
656 } while (tmp != NULL);
657 if (stack != 0) {
658 xmlXPtrErr(ctxt, XML_XPTR_EXTRA_OBJECTS,
659 "xmlXPtrEval: object(s) left on the eval stack\n",
660 NULL);
661 }
662 if (ctx->lastError.code != XML_ERR_OK) {
663 xmlXPathFreeObject(res);
664 res = NULL;
665 }
666
667 error:
668 xmlXPathFreeParserContext(ctxt);
669 return(res);
670 }
671
672 #endif
673
674