• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <limits.h>
15 #include <string.h>
16 
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/xmlerror.h>
20 
21 #include "private/error.h"
22 
23 /**
24  * MAX_URI_LENGTH:
25  *
26  * The definition of the URI regexp in the above RFC has no size limit
27  * In practice they are usually relatively short except for the
28  * data URI scheme as defined in RFC 2397. Even for data URI the usual
29  * maximum size before hitting random practical limits is around 64 KB
30  * and 4KB is usually a maximum admitted limit for proper operations.
31  * The value below is more a security limit than anything else and
32  * really should never be hit by 'normal' operations
33  * Set to 1 MByte in 2012, this is only enforced on output
34  */
35 #define MAX_URI_LENGTH 1024 * 1024
36 
37 #define PORT_EMPTY           0
38 #define PORT_EMPTY_SERVER   -1
39 
40 static void
xmlURIErrMemory(const char * extra)41 xmlURIErrMemory(const char *extra)
42 {
43     if (extra)
44         __xmlRaiseError(NULL, NULL, NULL,
45                         NULL, NULL, XML_FROM_URI,
46                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
47                         extra, NULL, NULL, 0, 0,
48                         "Memory allocation failed : %s\n", extra);
49     else
50         __xmlRaiseError(NULL, NULL, NULL,
51                         NULL, NULL, XML_FROM_URI,
52                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
53                         NULL, NULL, NULL, 0, 0,
54                         "Memory allocation failed\n");
55 }
56 
57 static void xmlCleanURI(xmlURIPtr uri);
58 
59 /*
60  * Old rule from 2396 used in legacy handling code
61  * alpha    = lowalpha | upalpha
62  */
63 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
64 
65 
66 /*
67  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
68  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
69  *            "u" | "v" | "w" | "x" | "y" | "z"
70  */
71 
72 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
73 
74 /*
75  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
76  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
77  *           "U" | "V" | "W" | "X" | "Y" | "Z"
78  */
79 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80 
81 #ifdef IS_DIGIT
82 #undef IS_DIGIT
83 #endif
84 /*
85  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
86  */
87 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
88 
89 /*
90  * alphanum = alpha | digit
91  */
92 
93 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
94 
95 /*
96  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
97  */
98 
99 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
100     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
101     ((x) == '(') || ((x) == ')'))
102 
103 /*
104  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
105  */
106 
107 #define IS_UNWISE(p)                                                    \
108       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
109        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
110        ((*(p) == ']')) || ((*(p) == '`')))
111 /*
112  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
113  *            "[" | "]"
114  */
115 
116 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
117         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
118         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
119         ((x) == ']'))
120 
121 /*
122  * unreserved = alphanum | mark
123  */
124 
125 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
126 
127 /*
128  * Skip to next pointer char, handle escaped sequences
129  */
130 
131 #define NEXT(p) ((*p == '%')? p += 3 : p++)
132 
133 /*
134  * Productions from the spec.
135  *
136  *    authority     = server | reg_name
137  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
138  *                        ";" | ":" | "@" | "&" | "=" | "+" )
139  *
140  * path          = [ abs_path | opaque_part ]
141  */
142 
143 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
144 
145 /************************************************************************
146  *									*
147  *                         RFC 3986 parser				*
148  *									*
149  ************************************************************************/
150 
151 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
152 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
153                       ((*(p) >= 'A') && (*(p) <= 'Z')))
154 #define ISA_HEXDIG(p)							\
155        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
156         ((*(p) >= 'A') && (*(p) <= 'F')))
157 
158 /*
159  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
160  *                     / "*" / "+" / "," / ";" / "="
161  */
162 #define ISA_SUB_DELIM(p)						\
163       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
164        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
165        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
166        ((*(p) == '=')) || ((*(p) == '\'')))
167 
168 /*
169  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
170  */
171 #define ISA_GEN_DELIM(p)						\
172       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
173        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
174        ((*(p) == '@')))
175 
176 /*
177  *    reserved      = gen-delims / sub-delims
178  */
179 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
180 
181 /*
182  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
183  */
184 #define ISA_UNRESERVED(p)						\
185       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
186        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
187 
188 /*
189  *    pct-encoded   = "%" HEXDIG HEXDIG
190  */
191 #define ISA_PCT_ENCODED(p)						\
192      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
193 
194 /*
195  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
196  */
197 #define ISA_PCHAR(p)							\
198      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
199       ((*(p) == ':')) || ((*(p) == '@')))
200 
201 /**
202  * xmlParse3986Scheme:
203  * @uri:  pointer to an URI structure
204  * @str:  pointer to the string to analyze
205  *
206  * Parse an URI scheme
207  *
208  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
209  *
210  * Returns 0 or the error code
211  */
212 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)213 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
214     const char *cur;
215 
216     if (str == NULL)
217 	return(-1);
218 
219     cur = *str;
220     if (!ISA_ALPHA(cur))
221 	return(2);
222     cur++;
223     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
224            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
225     if (uri != NULL) {
226 	if (uri->scheme != NULL) xmlFree(uri->scheme);
227 	uri->scheme = STRNDUP(*str, cur - *str);
228     }
229     *str = cur;
230     return(0);
231 }
232 
233 /**
234  * xmlParse3986Fragment:
235  * @uri:  pointer to an URI structure
236  * @str:  pointer to the string to analyze
237  *
238  * Parse the query part of an URI
239  *
240  * fragment      = *( pchar / "/" / "?" )
241  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
242  *       in the fragment identifier but this is used very broadly for
243  *       xpointer scheme selection, so we are allowing it here to not break
244  *       for example all the DocBook processing chains.
245  *
246  * Returns 0 or the error code
247  */
248 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)249 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
250 {
251     const char *cur;
252 
253     if (str == NULL)
254         return (-1);
255 
256     cur = *str;
257 
258     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
259            (*cur == '[') || (*cur == ']') ||
260            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261         NEXT(cur);
262     if (uri != NULL) {
263         if (uri->fragment != NULL)
264             xmlFree(uri->fragment);
265 	if (uri->cleanup & 2)
266 	    uri->fragment = STRNDUP(*str, cur - *str);
267 	else
268 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
269     }
270     *str = cur;
271     return (0);
272 }
273 
274 /**
275  * xmlParse3986Query:
276  * @uri:  pointer to an URI structure
277  * @str:  pointer to the string to analyze
278  *
279  * Parse the query part of an URI
280  *
281  * query = *uric
282  *
283  * Returns 0 or the error code
284  */
285 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)286 xmlParse3986Query(xmlURIPtr uri, const char **str)
287 {
288     const char *cur;
289 
290     if (str == NULL)
291         return (-1);
292 
293     cur = *str;
294 
295     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
296            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
297         NEXT(cur);
298     if (uri != NULL) {
299         if (uri->query != NULL)
300             xmlFree(uri->query);
301 	if (uri->cleanup & 2)
302 	    uri->query = STRNDUP(*str, cur - *str);
303 	else
304 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
305 
306 	/* Save the raw bytes of the query as well.
307 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
308 	 */
309 	if (uri->query_raw != NULL)
310 	    xmlFree (uri->query_raw);
311 	uri->query_raw = STRNDUP (*str, cur - *str);
312     }
313     *str = cur;
314     return (0);
315 }
316 
317 /**
318  * xmlParse3986Port:
319  * @uri:  pointer to an URI structure
320  * @str:  the string to analyze
321  *
322  * Parse a port part and fills in the appropriate fields
323  * of the @uri structure
324  *
325  * port          = *DIGIT
326  *
327  * Returns 0 or the error code
328  */
329 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)330 xmlParse3986Port(xmlURIPtr uri, const char **str)
331 {
332     const char *cur = *str;
333     int port = 0;
334 
335     if (ISA_DIGIT(cur)) {
336 	while (ISA_DIGIT(cur)) {
337             int digit = *cur - '0';
338 
339             if (port > INT_MAX / 10)
340                 return(1);
341             port *= 10;
342             if (port > INT_MAX - digit)
343                 return(1);
344 	    port += digit;
345 
346 	    cur++;
347 	}
348 	if (uri != NULL)
349 	    uri->port = port;
350 	*str = cur;
351 	return(0);
352     }
353     return(1);
354 }
355 
356 /**
357  * xmlParse3986Userinfo:
358  * @uri:  pointer to an URI structure
359  * @str:  the string to analyze
360  *
361  * Parse an user information part and fills in the appropriate fields
362  * of the @uri structure
363  *
364  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
365  *
366  * Returns 0 or the error code
367  */
368 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)369 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
370 {
371     const char *cur;
372 
373     cur = *str;
374     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
375            ISA_SUB_DELIM(cur) || (*cur == ':'))
376 	NEXT(cur);
377     if (*cur == '@') {
378 	if (uri != NULL) {
379 	    if (uri->user != NULL) xmlFree(uri->user);
380 	    if (uri->cleanup & 2)
381 		uri->user = STRNDUP(*str, cur - *str);
382 	    else
383 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
384 	}
385 	*str = cur;
386 	return(0);
387     }
388     return(1);
389 }
390 
391 /**
392  * xmlParse3986DecOctet:
393  * @str:  the string to analyze
394  *
395  *    dec-octet     = DIGIT                 ; 0-9
396  *                  / %x31-39 DIGIT         ; 10-99
397  *                  / "1" 2DIGIT            ; 100-199
398  *                  / "2" %x30-34 DIGIT     ; 200-249
399  *                  / "25" %x30-35          ; 250-255
400  *
401  * Skip a dec-octet.
402  *
403  * Returns 0 if found and skipped, 1 otherwise
404  */
405 static int
xmlParse3986DecOctet(const char ** str)406 xmlParse3986DecOctet(const char **str) {
407     const char *cur = *str;
408 
409     if (!(ISA_DIGIT(cur)))
410         return(1);
411     if (!ISA_DIGIT(cur+1))
412 	cur++;
413     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
414 	cur += 2;
415     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
416 	cur += 3;
417     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
418 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
419 	cur += 3;
420     else if ((*cur == '2') && (*(cur + 1) == '5') &&
421 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
422 	cur += 3;
423     else
424         return(1);
425     *str = cur;
426     return(0);
427 }
428 /**
429  * xmlParse3986Host:
430  * @uri:  pointer to an URI structure
431  * @str:  the string to analyze
432  *
433  * Parse an host part and fills in the appropriate fields
434  * of the @uri structure
435  *
436  * host          = IP-literal / IPv4address / reg-name
437  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
438  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
439  * reg-name      = *( unreserved / pct-encoded / sub-delims )
440  *
441  * Returns 0 or the error code
442  */
443 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)444 xmlParse3986Host(xmlURIPtr uri, const char **str)
445 {
446     const char *cur = *str;
447     const char *host;
448 
449     host = cur;
450     /*
451      * IPv6 and future addressing scheme are enclosed between brackets
452      */
453     if (*cur == '[') {
454         cur++;
455 	while ((*cur != ']') && (*cur != 0))
456 	    cur++;
457 	if (*cur != ']')
458 	    return(1);
459 	cur++;
460 	goto found;
461     }
462     /*
463      * try to parse an IPv4
464      */
465     if (ISA_DIGIT(cur)) {
466         if (xmlParse3986DecOctet(&cur) != 0)
467 	    goto not_ipv4;
468 	if (*cur != '.')
469 	    goto not_ipv4;
470 	cur++;
471         if (xmlParse3986DecOctet(&cur) != 0)
472 	    goto not_ipv4;
473 	if (*cur != '.')
474 	    goto not_ipv4;
475         if (xmlParse3986DecOctet(&cur) != 0)
476 	    goto not_ipv4;
477 	if (*cur != '.')
478 	    goto not_ipv4;
479         if (xmlParse3986DecOctet(&cur) != 0)
480 	    goto not_ipv4;
481 	goto found;
482 not_ipv4:
483         cur = *str;
484     }
485     /*
486      * then this should be a hostname which can be empty
487      */
488     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
489         NEXT(cur);
490 found:
491     if (uri != NULL) {
492 	if (uri->authority != NULL) xmlFree(uri->authority);
493 	uri->authority = NULL;
494 	if (uri->server != NULL) xmlFree(uri->server);
495 	if (cur != host) {
496 	    if (uri->cleanup & 2)
497 		uri->server = STRNDUP(host, cur - host);
498 	    else
499 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
500 	} else
501 	    uri->server = NULL;
502     }
503     *str = cur;
504     return(0);
505 }
506 
507 /**
508  * xmlParse3986Authority:
509  * @uri:  pointer to an URI structure
510  * @str:  the string to analyze
511  *
512  * Parse an authority part and fills in the appropriate fields
513  * of the @uri structure
514  *
515  * authority     = [ userinfo "@" ] host [ ":" port ]
516  *
517  * Returns 0 or the error code
518  */
519 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)520 xmlParse3986Authority(xmlURIPtr uri, const char **str)
521 {
522     const char *cur;
523     int ret;
524 
525     cur = *str;
526     /*
527      * try to parse an userinfo and check for the trailing @
528      */
529     ret = xmlParse3986Userinfo(uri, &cur);
530     if ((ret != 0) || (*cur != '@'))
531         cur = *str;
532     else
533         cur++;
534     ret = xmlParse3986Host(uri, &cur);
535     if (ret != 0) return(ret);
536     if (*cur == ':') {
537         cur++;
538         ret = xmlParse3986Port(uri, &cur);
539 	if (ret != 0) return(ret);
540     }
541     *str = cur;
542     return(0);
543 }
544 
545 /**
546  * xmlParse3986Segment:
547  * @str:  the string to analyze
548  * @forbid: an optional forbidden character
549  * @empty: allow an empty segment
550  *
551  * Parse a segment and fills in the appropriate fields
552  * of the @uri structure
553  *
554  * segment       = *pchar
555  * segment-nz    = 1*pchar
556  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
557  *               ; non-zero-length segment without any colon ":"
558  *
559  * Returns 0 or the error code
560  */
561 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)562 xmlParse3986Segment(const char **str, char forbid, int empty)
563 {
564     const char *cur;
565 
566     cur = *str;
567     if (!ISA_PCHAR(cur)) {
568         if (empty)
569 	    return(0);
570 	return(1);
571     }
572     while (ISA_PCHAR(cur) && (*cur != forbid))
573         NEXT(cur);
574     *str = cur;
575     return (0);
576 }
577 
578 /**
579  * xmlParse3986PathAbEmpty:
580  * @uri:  pointer to an URI structure
581  * @str:  the string to analyze
582  *
583  * Parse an path absolute or empty and fills in the appropriate fields
584  * of the @uri structure
585  *
586  * path-abempty  = *( "/" segment )
587  *
588  * Returns 0 or the error code
589  */
590 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)591 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
592 {
593     const char *cur;
594     int ret;
595 
596     cur = *str;
597 
598     while (*cur == '/') {
599         cur++;
600 	ret = xmlParse3986Segment(&cur, 0, 1);
601 	if (ret != 0) return(ret);
602     }
603     if (uri != NULL) {
604 	if (uri->path != NULL) xmlFree(uri->path);
605         if (*str != cur) {
606             if (uri->cleanup & 2)
607                 uri->path = STRNDUP(*str, cur - *str);
608             else
609                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
610         } else {
611             uri->path = NULL;
612         }
613     }
614     *str = cur;
615     return (0);
616 }
617 
618 /**
619  * xmlParse3986PathAbsolute:
620  * @uri:  pointer to an URI structure
621  * @str:  the string to analyze
622  *
623  * Parse an path absolute and fills in the appropriate fields
624  * of the @uri structure
625  *
626  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
627  *
628  * Returns 0 or the error code
629  */
630 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)631 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
632 {
633     const char *cur;
634     int ret;
635 
636     cur = *str;
637 
638     if (*cur != '/')
639         return(1);
640     cur++;
641     ret = xmlParse3986Segment(&cur, 0, 0);
642     if (ret == 0) {
643 	while (*cur == '/') {
644 	    cur++;
645 	    ret = xmlParse3986Segment(&cur, 0, 1);
646 	    if (ret != 0) return(ret);
647 	}
648     }
649     if (uri != NULL) {
650 	if (uri->path != NULL) xmlFree(uri->path);
651         if (cur != *str) {
652             if (uri->cleanup & 2)
653                 uri->path = STRNDUP(*str, cur - *str);
654             else
655                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
656         } else {
657             uri->path = NULL;
658         }
659     }
660     *str = cur;
661     return (0);
662 }
663 
664 /**
665  * xmlParse3986PathRootless:
666  * @uri:  pointer to an URI structure
667  * @str:  the string to analyze
668  *
669  * Parse an path without root and fills in the appropriate fields
670  * of the @uri structure
671  *
672  * path-rootless = segment-nz *( "/" segment )
673  *
674  * Returns 0 or the error code
675  */
676 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)677 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
678 {
679     const char *cur;
680     int ret;
681 
682     cur = *str;
683 
684     ret = xmlParse3986Segment(&cur, 0, 0);
685     if (ret != 0) return(ret);
686     while (*cur == '/') {
687         cur++;
688 	ret = xmlParse3986Segment(&cur, 0, 1);
689 	if (ret != 0) return(ret);
690     }
691     if (uri != NULL) {
692 	if (uri->path != NULL) xmlFree(uri->path);
693         if (cur != *str) {
694             if (uri->cleanup & 2)
695                 uri->path = STRNDUP(*str, cur - *str);
696             else
697                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
698         } else {
699             uri->path = NULL;
700         }
701     }
702     *str = cur;
703     return (0);
704 }
705 
706 /**
707  * xmlParse3986PathNoScheme:
708  * @uri:  pointer to an URI structure
709  * @str:  the string to analyze
710  *
711  * Parse an path which is not a scheme and fills in the appropriate fields
712  * of the @uri structure
713  *
714  * path-noscheme = segment-nz-nc *( "/" segment )
715  *
716  * Returns 0 or the error code
717  */
718 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)719 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
720 {
721     const char *cur;
722     int ret;
723 
724     cur = *str;
725 
726     ret = xmlParse3986Segment(&cur, ':', 0);
727     if (ret != 0) return(ret);
728     while (*cur == '/') {
729         cur++;
730 	ret = xmlParse3986Segment(&cur, 0, 1);
731 	if (ret != 0) return(ret);
732     }
733     if (uri != NULL) {
734 	if (uri->path != NULL) xmlFree(uri->path);
735         if (cur != *str) {
736             if (uri->cleanup & 2)
737                 uri->path = STRNDUP(*str, cur - *str);
738             else
739                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
740         } else {
741             uri->path = NULL;
742         }
743     }
744     *str = cur;
745     return (0);
746 }
747 
748 /**
749  * xmlParse3986HierPart:
750  * @uri:  pointer to an URI structure
751  * @str:  the string to analyze
752  *
753  * Parse an hierarchical part and fills in the appropriate fields
754  * of the @uri structure
755  *
756  * hier-part     = "//" authority path-abempty
757  *                / path-absolute
758  *                / path-rootless
759  *                / path-empty
760  *
761  * Returns 0 or the error code
762  */
763 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)764 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
765 {
766     const char *cur;
767     int ret;
768 
769     cur = *str;
770 
771     if ((*cur == '/') && (*(cur + 1) == '/')) {
772         cur += 2;
773 	ret = xmlParse3986Authority(uri, &cur);
774 	if (ret != 0) return(ret);
775         /*
776          * An empty server is marked with a special URI value.
777          */
778 	if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
779 	    uri->port = PORT_EMPTY_SERVER;
780 	ret = xmlParse3986PathAbEmpty(uri, &cur);
781 	if (ret != 0) return(ret);
782 	*str = cur;
783 	return(0);
784     } else if (*cur == '/') {
785         ret = xmlParse3986PathAbsolute(uri, &cur);
786 	if (ret != 0) return(ret);
787     } else if (ISA_PCHAR(cur)) {
788         ret = xmlParse3986PathRootless(uri, &cur);
789 	if (ret != 0) return(ret);
790     } else {
791 	/* path-empty is effectively empty */
792 	if (uri != NULL) {
793 	    if (uri->path != NULL) xmlFree(uri->path);
794 	    uri->path = NULL;
795 	}
796     }
797     *str = cur;
798     return (0);
799 }
800 
801 /**
802  * xmlParse3986RelativeRef:
803  * @uri:  pointer to an URI structure
804  * @str:  the string to analyze
805  *
806  * Parse an URI string and fills in the appropriate fields
807  * of the @uri structure
808  *
809  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
810  * relative-part = "//" authority path-abempty
811  *               / path-absolute
812  *               / path-noscheme
813  *               / path-empty
814  *
815  * Returns 0 or the error code
816  */
817 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)818 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
819     int ret;
820 
821     if ((*str == '/') && (*(str + 1) == '/')) {
822         str += 2;
823 	ret = xmlParse3986Authority(uri, &str);
824 	if (ret != 0) return(ret);
825 	ret = xmlParse3986PathAbEmpty(uri, &str);
826 	if (ret != 0) return(ret);
827     } else if (*str == '/') {
828 	ret = xmlParse3986PathAbsolute(uri, &str);
829 	if (ret != 0) return(ret);
830     } else if (ISA_PCHAR(str)) {
831         ret = xmlParse3986PathNoScheme(uri, &str);
832 	if (ret != 0) return(ret);
833     } else {
834 	/* path-empty is effectively empty */
835 	if (uri != NULL) {
836 	    if (uri->path != NULL) xmlFree(uri->path);
837 	    uri->path = NULL;
838 	}
839     }
840 
841     if (*str == '?') {
842 	str++;
843 	ret = xmlParse3986Query(uri, &str);
844 	if (ret != 0) return(ret);
845     }
846     if (*str == '#') {
847 	str++;
848 	ret = xmlParse3986Fragment(uri, &str);
849 	if (ret != 0) return(ret);
850     }
851     if (*str != 0) {
852 	xmlCleanURI(uri);
853 	return(1);
854     }
855     return(0);
856 }
857 
858 
859 /**
860  * xmlParse3986URI:
861  * @uri:  pointer to an URI structure
862  * @str:  the string to analyze
863  *
864  * Parse an URI string and fills in the appropriate fields
865  * of the @uri structure
866  *
867  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
868  *
869  * Returns 0 or the error code
870  */
871 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)872 xmlParse3986URI(xmlURIPtr uri, const char *str) {
873     int ret;
874 
875     ret = xmlParse3986Scheme(uri, &str);
876     if (ret != 0) return(ret);
877     if (*str != ':') {
878 	return(1);
879     }
880     str++;
881     ret = xmlParse3986HierPart(uri, &str);
882     if (ret != 0) return(ret);
883     if (*str == '?') {
884 	str++;
885 	ret = xmlParse3986Query(uri, &str);
886 	if (ret != 0) return(ret);
887     }
888     if (*str == '#') {
889 	str++;
890 	ret = xmlParse3986Fragment(uri, &str);
891 	if (ret != 0) return(ret);
892     }
893     if (*str != 0) {
894 	xmlCleanURI(uri);
895 	return(1);
896     }
897     return(0);
898 }
899 
900 /**
901  * xmlParse3986URIReference:
902  * @uri:  pointer to an URI structure
903  * @str:  the string to analyze
904  *
905  * Parse an URI reference string and fills in the appropriate fields
906  * of the @uri structure
907  *
908  * URI-reference = URI / relative-ref
909  *
910  * Returns 0 or the error code
911  */
912 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)913 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
914     int ret;
915 
916     if (str == NULL)
917 	return(-1);
918     xmlCleanURI(uri);
919 
920     /*
921      * Try first to parse absolute refs, then fallback to relative if
922      * it fails.
923      */
924     ret = xmlParse3986URI(uri, str);
925     if (ret != 0) {
926 	xmlCleanURI(uri);
927         ret = xmlParse3986RelativeRef(uri, str);
928 	if (ret != 0) {
929 	    xmlCleanURI(uri);
930 	    return(ret);
931 	}
932     }
933     return(0);
934 }
935 
936 /**
937  * xmlParseURI:
938  * @str:  the URI string to analyze
939  *
940  * Parse an URI based on RFC 3986
941  *
942  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
943  *
944  * Returns a newly built xmlURIPtr or NULL in case of error
945  */
946 xmlURIPtr
xmlParseURI(const char * str)947 xmlParseURI(const char *str) {
948     xmlURIPtr uri;
949     int ret;
950 
951     if (str == NULL)
952 	return(NULL);
953     uri = xmlCreateURI();
954     if (uri != NULL) {
955 	ret = xmlParse3986URIReference(uri, str);
956         if (ret) {
957 	    xmlFreeURI(uri);
958 	    return(NULL);
959 	}
960     }
961     return(uri);
962 }
963 
964 /**
965  * xmlParseURIReference:
966  * @uri:  pointer to an URI structure
967  * @str:  the string to analyze
968  *
969  * Parse an URI reference string based on RFC 3986 and fills in the
970  * appropriate fields of the @uri structure
971  *
972  * URI-reference = URI / relative-ref
973  *
974  * Returns 0 or the error code
975  */
976 int
xmlParseURIReference(xmlURIPtr uri,const char * str)977 xmlParseURIReference(xmlURIPtr uri, const char *str) {
978     return(xmlParse3986URIReference(uri, str));
979 }
980 
981 /**
982  * xmlParseURIRaw:
983  * @str:  the URI string to analyze
984  * @raw:  if 1 unescaping of URI pieces are disabled
985  *
986  * Parse an URI but allows to keep intact the original fragments.
987  *
988  * URI-reference = URI / relative-ref
989  *
990  * Returns a newly built xmlURIPtr or NULL in case of error
991  */
992 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)993 xmlParseURIRaw(const char *str, int raw) {
994     xmlURIPtr uri;
995     int ret;
996 
997     if (str == NULL)
998 	return(NULL);
999     uri = xmlCreateURI();
1000     if (uri != NULL) {
1001         if (raw) {
1002 	    uri->cleanup |= 2;
1003 	}
1004 	ret = xmlParseURIReference(uri, str);
1005         if (ret) {
1006 	    xmlFreeURI(uri);
1007 	    return(NULL);
1008 	}
1009     }
1010     return(uri);
1011 }
1012 
1013 /************************************************************************
1014  *									*
1015  *			Generic URI structure functions			*
1016  *									*
1017  ************************************************************************/
1018 
1019 /**
1020  * xmlCreateURI:
1021  *
1022  * Simply creates an empty xmlURI
1023  *
1024  * Returns the new structure or NULL in case of error
1025  */
1026 xmlURIPtr
xmlCreateURI(void)1027 xmlCreateURI(void) {
1028     xmlURIPtr ret;
1029 
1030     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1031     if (ret == NULL) {
1032         xmlURIErrMemory("creating URI structure\n");
1033 	return(NULL);
1034     }
1035     memset(ret, 0, sizeof(xmlURI));
1036     ret->port = PORT_EMPTY;
1037     return(ret);
1038 }
1039 
1040 /**
1041  * xmlSaveUriRealloc:
1042  *
1043  * Function to handle properly a reallocation when saving an URI
1044  * Also imposes some limit on the length of an URI string output
1045  */
1046 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1047 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1048     xmlChar *temp;
1049     int tmp;
1050 
1051     if (*max > MAX_URI_LENGTH) {
1052         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1053         return(NULL);
1054     }
1055     tmp = *max * 2;
1056     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1057     if (temp == NULL) {
1058         xmlURIErrMemory("saving URI\n");
1059         return(NULL);
1060     }
1061     *max = tmp;
1062     return(temp);
1063 }
1064 
1065 /**
1066  * xmlSaveUri:
1067  * @uri:  pointer to an xmlURI
1068  *
1069  * Save the URI as an escaped string
1070  *
1071  * Returns a new string (to be deallocated by caller)
1072  */
1073 xmlChar *
xmlSaveUri(xmlURIPtr uri)1074 xmlSaveUri(xmlURIPtr uri) {
1075     xmlChar *ret = NULL;
1076     xmlChar *temp;
1077     const char *p;
1078     int len;
1079     int max;
1080 
1081     if (uri == NULL) return(NULL);
1082 
1083 
1084     max = 80;
1085     ret = (xmlChar *) xmlMallocAtomic(max + 1);
1086     if (ret == NULL) {
1087         xmlURIErrMemory("saving URI\n");
1088 	return(NULL);
1089     }
1090     len = 0;
1091 
1092     if (uri->scheme != NULL) {
1093 	p = uri->scheme;
1094 	while (*p != 0) {
1095 	    if (len >= max) {
1096                 temp = xmlSaveUriRealloc(ret, &max);
1097                 if (temp == NULL) goto mem_error;
1098 		ret = temp;
1099 	    }
1100 	    ret[len++] = *p++;
1101 	}
1102 	if (len >= max) {
1103             temp = xmlSaveUriRealloc(ret, &max);
1104             if (temp == NULL) goto mem_error;
1105             ret = temp;
1106 	}
1107 	ret[len++] = ':';
1108     }
1109     if (uri->opaque != NULL) {
1110 	p = uri->opaque;
1111 	while (*p != 0) {
1112 	    if (len + 3 >= max) {
1113                 temp = xmlSaveUriRealloc(ret, &max);
1114                 if (temp == NULL) goto mem_error;
1115                 ret = temp;
1116 	    }
1117 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1118 		ret[len++] = *p++;
1119 	    else {
1120 		int val = *(unsigned char *)p++;
1121 		int hi = val / 0x10, lo = val % 0x10;
1122 		ret[len++] = '%';
1123 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1124 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1125 	    }
1126 	}
1127     } else {
1128 	if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1129 	    if (len + 3 >= max) {
1130                 temp = xmlSaveUriRealloc(ret, &max);
1131                 if (temp == NULL) goto mem_error;
1132                 ret = temp;
1133 	    }
1134 	    ret[len++] = '/';
1135 	    ret[len++] = '/';
1136 	    if (uri->user != NULL) {
1137 		p = uri->user;
1138 		while (*p != 0) {
1139 		    if (len + 3 >= max) {
1140                         temp = xmlSaveUriRealloc(ret, &max);
1141                         if (temp == NULL) goto mem_error;
1142                         ret = temp;
1143 		    }
1144 		    if ((IS_UNRESERVED(*(p))) ||
1145 			((*(p) == ';')) || ((*(p) == ':')) ||
1146 			((*(p) == '&')) || ((*(p) == '=')) ||
1147 			((*(p) == '+')) || ((*(p) == '$')) ||
1148 			((*(p) == ',')))
1149 			ret[len++] = *p++;
1150 		    else {
1151 			int val = *(unsigned char *)p++;
1152 			int hi = val / 0x10, lo = val % 0x10;
1153 			ret[len++] = '%';
1154 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1155 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1156 		    }
1157 		}
1158 		if (len + 3 >= max) {
1159                     temp = xmlSaveUriRealloc(ret, &max);
1160                     if (temp == NULL) goto mem_error;
1161                     ret = temp;
1162 		}
1163 		ret[len++] = '@';
1164 	    }
1165 	    if (uri->server != NULL) {
1166 		p = uri->server;
1167 		while (*p != 0) {
1168 		    if (len >= max) {
1169 			temp = xmlSaveUriRealloc(ret, &max);
1170 			if (temp == NULL) goto mem_error;
1171 			ret = temp;
1172 		    }
1173                     /* TODO: escaping? */
1174 		    ret[len++] = (xmlChar) *p++;
1175 		}
1176 	    }
1177             if (uri->port > 0) {
1178                 if (len + 10 >= max) {
1179                     temp = xmlSaveUriRealloc(ret, &max);
1180                     if (temp == NULL) goto mem_error;
1181                     ret = temp;
1182                 }
1183                 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1184             }
1185 	} else if (uri->authority != NULL) {
1186 	    if (len + 3 >= max) {
1187                 temp = xmlSaveUriRealloc(ret, &max);
1188                 if (temp == NULL) goto mem_error;
1189                 ret = temp;
1190 	    }
1191 	    ret[len++] = '/';
1192 	    ret[len++] = '/';
1193 	    p = uri->authority;
1194 	    while (*p != 0) {
1195 		if (len + 3 >= max) {
1196                     temp = xmlSaveUriRealloc(ret, &max);
1197                     if (temp == NULL) goto mem_error;
1198                     ret = temp;
1199 		}
1200 		if ((IS_UNRESERVED(*(p))) ||
1201                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1202                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1203                     ((*(p) == '=')) || ((*(p) == '+')))
1204 		    ret[len++] = *p++;
1205 		else {
1206 		    int val = *(unsigned char *)p++;
1207 		    int hi = val / 0x10, lo = val % 0x10;
1208 		    ret[len++] = '%';
1209 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1210 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1211 		}
1212 	    }
1213 	} else if (uri->scheme != NULL) {
1214 	    if (len + 3 >= max) {
1215                 temp = xmlSaveUriRealloc(ret, &max);
1216                 if (temp == NULL) goto mem_error;
1217                 ret = temp;
1218 	    }
1219 	}
1220 	if (uri->path != NULL) {
1221 	    p = uri->path;
1222 	    /*
1223 	     * the colon in file:///d: should not be escaped or
1224 	     * Windows accesses fail later.
1225 	     */
1226 	    if ((uri->scheme != NULL) &&
1227 		(p[0] == '/') &&
1228 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1229 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1230 		(p[2] == ':') &&
1231 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1232 		if (len + 3 >= max) {
1233                     temp = xmlSaveUriRealloc(ret, &max);
1234                     if (temp == NULL) goto mem_error;
1235                     ret = temp;
1236 		}
1237 		ret[len++] = *p++;
1238 		ret[len++] = *p++;
1239 		ret[len++] = *p++;
1240 	    }
1241 	    while (*p != 0) {
1242 		if (len + 3 >= max) {
1243                     temp = xmlSaveUriRealloc(ret, &max);
1244                     if (temp == NULL) goto mem_error;
1245                     ret = temp;
1246 		}
1247 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1248                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1249 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1250 	            ((*(p) == ',')))
1251 		    ret[len++] = *p++;
1252 		else {
1253 		    int val = *(unsigned char *)p++;
1254 		    int hi = val / 0x10, lo = val % 0x10;
1255 		    ret[len++] = '%';
1256 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1257 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1258 		}
1259 	    }
1260 	}
1261 	if (uri->query_raw != NULL) {
1262 	    if (len + 1 >= max) {
1263                 temp = xmlSaveUriRealloc(ret, &max);
1264                 if (temp == NULL) goto mem_error;
1265                 ret = temp;
1266 	    }
1267 	    ret[len++] = '?';
1268 	    p = uri->query_raw;
1269 	    while (*p != 0) {
1270 		if (len + 1 >= max) {
1271                     temp = xmlSaveUriRealloc(ret, &max);
1272                     if (temp == NULL) goto mem_error;
1273                     ret = temp;
1274 		}
1275 		ret[len++] = *p++;
1276 	    }
1277 	} else if (uri->query != NULL) {
1278 	    if (len + 3 >= max) {
1279                 temp = xmlSaveUriRealloc(ret, &max);
1280                 if (temp == NULL) goto mem_error;
1281                 ret = temp;
1282 	    }
1283 	    ret[len++] = '?';
1284 	    p = uri->query;
1285 	    while (*p != 0) {
1286 		if (len + 3 >= max) {
1287                     temp = xmlSaveUriRealloc(ret, &max);
1288                     if (temp == NULL) goto mem_error;
1289                     ret = temp;
1290 		}
1291 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1292 		    ret[len++] = *p++;
1293 		else {
1294 		    int val = *(unsigned char *)p++;
1295 		    int hi = val / 0x10, lo = val % 0x10;
1296 		    ret[len++] = '%';
1297 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1298 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1299 		}
1300 	    }
1301 	}
1302     }
1303     if (uri->fragment != NULL) {
1304 	if (len + 3 >= max) {
1305             temp = xmlSaveUriRealloc(ret, &max);
1306             if (temp == NULL) goto mem_error;
1307             ret = temp;
1308 	}
1309 	ret[len++] = '#';
1310 	p = uri->fragment;
1311 	while (*p != 0) {
1312 	    if (len + 3 >= max) {
1313                 temp = xmlSaveUriRealloc(ret, &max);
1314                 if (temp == NULL) goto mem_error;
1315                 ret = temp;
1316 	    }
1317 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1318 		ret[len++] = *p++;
1319 	    else {
1320 		int val = *(unsigned char *)p++;
1321 		int hi = val / 0x10, lo = val % 0x10;
1322 		ret[len++] = '%';
1323 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1324 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1325 	    }
1326 	}
1327     }
1328     if (len >= max) {
1329         temp = xmlSaveUriRealloc(ret, &max);
1330         if (temp == NULL) goto mem_error;
1331         ret = temp;
1332     }
1333     ret[len] = 0;
1334     return(ret);
1335 
1336 mem_error:
1337     xmlFree(ret);
1338     return(NULL);
1339 }
1340 
1341 /**
1342  * xmlPrintURI:
1343  * @stream:  a FILE* for the output
1344  * @uri:  pointer to an xmlURI
1345  *
1346  * Prints the URI in the stream @stream.
1347  */
1348 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1349 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1350     xmlChar *out;
1351 
1352     out = xmlSaveUri(uri);
1353     if (out != NULL) {
1354 	fprintf(stream, "%s", (char *) out);
1355 	xmlFree(out);
1356     }
1357 }
1358 
1359 /**
1360  * xmlCleanURI:
1361  * @uri:  pointer to an xmlURI
1362  *
1363  * Make sure the xmlURI struct is free of content
1364  */
1365 static void
xmlCleanURI(xmlURIPtr uri)1366 xmlCleanURI(xmlURIPtr uri) {
1367     if (uri == NULL) return;
1368 
1369     if (uri->scheme != NULL) xmlFree(uri->scheme);
1370     uri->scheme = NULL;
1371     if (uri->server != NULL) xmlFree(uri->server);
1372     uri->server = NULL;
1373     if (uri->user != NULL) xmlFree(uri->user);
1374     uri->user = NULL;
1375     if (uri->path != NULL) xmlFree(uri->path);
1376     uri->path = NULL;
1377     if (uri->fragment != NULL) xmlFree(uri->fragment);
1378     uri->fragment = NULL;
1379     if (uri->opaque != NULL) xmlFree(uri->opaque);
1380     uri->opaque = NULL;
1381     if (uri->authority != NULL) xmlFree(uri->authority);
1382     uri->authority = NULL;
1383     if (uri->query != NULL) xmlFree(uri->query);
1384     uri->query = NULL;
1385     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1386     uri->query_raw = NULL;
1387 }
1388 
1389 /**
1390  * xmlFreeURI:
1391  * @uri:  pointer to an xmlURI
1392  *
1393  * Free up the xmlURI struct
1394  */
1395 void
xmlFreeURI(xmlURIPtr uri)1396 xmlFreeURI(xmlURIPtr uri) {
1397     if (uri == NULL) return;
1398 
1399     if (uri->scheme != NULL) xmlFree(uri->scheme);
1400     if (uri->server != NULL) xmlFree(uri->server);
1401     if (uri->user != NULL) xmlFree(uri->user);
1402     if (uri->path != NULL) xmlFree(uri->path);
1403     if (uri->fragment != NULL) xmlFree(uri->fragment);
1404     if (uri->opaque != NULL) xmlFree(uri->opaque);
1405     if (uri->authority != NULL) xmlFree(uri->authority);
1406     if (uri->query != NULL) xmlFree(uri->query);
1407     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1408     xmlFree(uri);
1409 }
1410 
1411 /************************************************************************
1412  *									*
1413  *			Helper functions				*
1414  *									*
1415  ************************************************************************/
1416 
1417 /**
1418  * xmlNormalizeURIPath:
1419  * @path:  pointer to the path string
1420  *
1421  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1422  * Section 5.2, steps 6.c through 6.g.
1423  *
1424  * Normalization occurs directly on the string, no new allocation is done
1425  *
1426  * Returns 0 or an error code
1427  */
1428 int
xmlNormalizeURIPath(char * path)1429 xmlNormalizeURIPath(char *path) {
1430     char *cur, *out;
1431 
1432     if (path == NULL)
1433 	return(-1);
1434 
1435     /* Skip all initial "/" chars.  We want to get to the beginning of the
1436      * first non-empty segment.
1437      */
1438     cur = path;
1439     while (cur[0] == '/')
1440       ++cur;
1441     if (cur[0] == '\0')
1442       return(0);
1443 
1444     /* Keep everything we've seen so far.  */
1445     out = cur;
1446 
1447     /*
1448      * Analyze each segment in sequence for cases (c) and (d).
1449      */
1450     while (cur[0] != '\0') {
1451 	/*
1452 	 * c) All occurrences of "./", where "." is a complete path segment,
1453 	 *    are removed from the buffer string.
1454 	 */
1455 	if ((cur[0] == '.') && (cur[1] == '/')) {
1456 	    cur += 2;
1457 	    /* '//' normalization should be done at this point too */
1458 	    while (cur[0] == '/')
1459 		cur++;
1460 	    continue;
1461 	}
1462 
1463 	/*
1464 	 * d) If the buffer string ends with "." as a complete path segment,
1465 	 *    that "." is removed.
1466 	 */
1467 	if ((cur[0] == '.') && (cur[1] == '\0'))
1468 	    break;
1469 
1470 	/* Otherwise keep the segment.  */
1471 	while (cur[0] != '/') {
1472             if (cur[0] == '\0')
1473               goto done_cd;
1474 	    (out++)[0] = (cur++)[0];
1475 	}
1476 	/* normalize // */
1477 	while ((cur[0] == '/') && (cur[1] == '/'))
1478 	    cur++;
1479 
1480         (out++)[0] = (cur++)[0];
1481     }
1482  done_cd:
1483     out[0] = '\0';
1484 
1485     /* Reset to the beginning of the first segment for the next sequence.  */
1486     cur = path;
1487     while (cur[0] == '/')
1488       ++cur;
1489     if (cur[0] == '\0')
1490 	return(0);
1491 
1492     /*
1493      * Analyze each segment in sequence for cases (e) and (f).
1494      *
1495      * e) All occurrences of "<segment>/../", where <segment> is a
1496      *    complete path segment not equal to "..", are removed from the
1497      *    buffer string.  Removal of these path segments is performed
1498      *    iteratively, removing the leftmost matching pattern on each
1499      *    iteration, until no matching pattern remains.
1500      *
1501      * f) If the buffer string ends with "<segment>/..", where <segment>
1502      *    is a complete path segment not equal to "..", that
1503      *    "<segment>/.." is removed.
1504      *
1505      * To satisfy the "iterative" clause in (e), we need to collapse the
1506      * string every time we find something that needs to be removed.  Thus,
1507      * we don't need to keep two pointers into the string: we only need a
1508      * "current position" pointer.
1509      */
1510     while (1) {
1511         char *segp, *tmp;
1512 
1513         /* At the beginning of each iteration of this loop, "cur" points to
1514          * the first character of the segment we want to examine.
1515          */
1516 
1517         /* Find the end of the current segment.  */
1518         segp = cur;
1519         while ((segp[0] != '/') && (segp[0] != '\0'))
1520           ++segp;
1521 
1522         /* If this is the last segment, we're done (we need at least two
1523          * segments to meet the criteria for the (e) and (f) cases).
1524          */
1525         if (segp[0] == '\0')
1526           break;
1527 
1528         /* If the first segment is "..", or if the next segment _isn't_ "..",
1529          * keep this segment and try the next one.
1530          */
1531         ++segp;
1532         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1533             || ((segp[0] != '.') || (segp[1] != '.')
1534                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1535           cur = segp;
1536           continue;
1537         }
1538 
1539         /* If we get here, remove this segment and the next one and back up
1540          * to the previous segment (if there is one), to implement the
1541          * "iteratively" clause.  It's pretty much impossible to back up
1542          * while maintaining two pointers into the buffer, so just compact
1543          * the whole buffer now.
1544          */
1545 
1546         /* If this is the end of the buffer, we're done.  */
1547         if (segp[2] == '\0') {
1548           cur[0] = '\0';
1549           break;
1550         }
1551         /* Valgrind complained, strcpy(cur, segp + 3); */
1552         /* string will overlap, do not use strcpy */
1553         tmp = cur;
1554         segp += 3;
1555         while ((*tmp++ = *segp++) != 0)
1556           ;
1557 
1558         /* If there are no previous segments, then keep going from here.  */
1559         segp = cur;
1560         while ((segp > path) && ((--segp)[0] == '/'))
1561           ;
1562         if (segp == path)
1563           continue;
1564 
1565         /* "segp" is pointing to the end of a previous segment; find it's
1566          * start.  We need to back up to the previous segment and start
1567          * over with that to handle things like "foo/bar/../..".  If we
1568          * don't do this, then on the first pass we'll remove the "bar/..",
1569          * but be pointing at the second ".." so we won't realize we can also
1570          * remove the "foo/..".
1571          */
1572         cur = segp;
1573         while ((cur > path) && (cur[-1] != '/'))
1574           --cur;
1575     }
1576     out[0] = '\0';
1577 
1578     /*
1579      * g) If the resulting buffer string still begins with one or more
1580      *    complete path segments of "..", then the reference is
1581      *    considered to be in error. Implementations may handle this
1582      *    error by retaining these components in the resolved path (i.e.,
1583      *    treating them as part of the final URI), by removing them from
1584      *    the resolved path (i.e., discarding relative levels above the
1585      *    root), or by avoiding traversal of the reference.
1586      *
1587      * We discard them from the final path.
1588      */
1589     if (path[0] == '/') {
1590       cur = path;
1591       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1592              && ((cur[3] == '/') || (cur[3] == '\0')))
1593 	cur += 3;
1594 
1595       if (cur != path) {
1596 	out = path;
1597 	while (cur[0] != '\0')
1598           (out++)[0] = (cur++)[0];
1599 	out[0] = 0;
1600       }
1601     }
1602 
1603     return(0);
1604 }
1605 
is_hex(char c)1606 static int is_hex(char c) {
1607     if (((c >= '0') && (c <= '9')) ||
1608         ((c >= 'a') && (c <= 'f')) ||
1609         ((c >= 'A') && (c <= 'F')))
1610 	return(1);
1611     return(0);
1612 }
1613 
1614 /**
1615  * xmlURIUnescapeString:
1616  * @str:  the string to unescape
1617  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1618  * @target:  optional destination buffer
1619  *
1620  * Unescaping routine, but does not check that the string is an URI. The
1621  * output is a direct unsigned char translation of %XX values (no encoding)
1622  * Note that the length of the result can only be smaller or same size as
1623  * the input string.
1624  *
1625  * Returns a copy of the string, but unescaped, will return NULL only in case
1626  * of error
1627  */
1628 char *
xmlURIUnescapeString(const char * str,int len,char * target)1629 xmlURIUnescapeString(const char *str, int len, char *target) {
1630     char *ret, *out;
1631     const char *in;
1632 
1633     if (str == NULL)
1634 	return(NULL);
1635     if (len <= 0) len = strlen(str);
1636     if (len < 0) return(NULL);
1637 
1638     if (target == NULL) {
1639 	ret = (char *) xmlMallocAtomic(len + 1);
1640 	if (ret == NULL) {
1641             xmlURIErrMemory("unescaping URI value\n");
1642 	    return(NULL);
1643 	}
1644     } else
1645 	ret = target;
1646     in = str;
1647     out = ret;
1648     while(len > 0) {
1649 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1650             int c = 0;
1651 	    in++;
1652 	    if ((*in >= '0') && (*in <= '9'))
1653 	        c = (*in - '0');
1654 	    else if ((*in >= 'a') && (*in <= 'f'))
1655 	        c = (*in - 'a') + 10;
1656 	    else if ((*in >= 'A') && (*in <= 'F'))
1657 	        c = (*in - 'A') + 10;
1658 	    in++;
1659 	    if ((*in >= '0') && (*in <= '9'))
1660 	        c = c * 16 + (*in - '0');
1661 	    else if ((*in >= 'a') && (*in <= 'f'))
1662 	        c = c * 16 + (*in - 'a') + 10;
1663 	    else if ((*in >= 'A') && (*in <= 'F'))
1664 	        c = c * 16 + (*in - 'A') + 10;
1665 	    in++;
1666 	    len -= 3;
1667             /* Explicit sign change */
1668 	    *out++ = (char) c;
1669 	} else {
1670 	    *out++ = *in++;
1671 	    len--;
1672 	}
1673     }
1674     *out = 0;
1675     return(ret);
1676 }
1677 
1678 /**
1679  * xmlURIEscapeStr:
1680  * @str:  string to escape
1681  * @list: exception list string of chars not to escape
1682  *
1683  * This routine escapes a string to hex, ignoring reserved characters
1684  * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1685  *
1686  * Returns a new escaped string or NULL in case of error.
1687  */
1688 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1689 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1690     xmlChar *ret, ch;
1691     xmlChar *temp;
1692     const xmlChar *in;
1693     int len, out;
1694 
1695     if (str == NULL)
1696 	return(NULL);
1697     if (str[0] == 0)
1698 	return(xmlStrdup(str));
1699     len = xmlStrlen(str);
1700     if (!(len > 0)) return(NULL);
1701 
1702     len += 20;
1703     ret = (xmlChar *) xmlMallocAtomic(len);
1704     if (ret == NULL) {
1705         xmlURIErrMemory("escaping URI value\n");
1706 	return(NULL);
1707     }
1708     in = (const xmlChar *) str;
1709     out = 0;
1710     while(*in != 0) {
1711 	if (len - out <= 3) {
1712             temp = xmlSaveUriRealloc(ret, &len);
1713 	    if (temp == NULL) {
1714                 xmlURIErrMemory("escaping URI value\n");
1715 		xmlFree(ret);
1716 		return(NULL);
1717 	    }
1718 	    ret = temp;
1719 	}
1720 
1721 	ch = *in;
1722 
1723 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1724 	    unsigned char val;
1725 	    ret[out++] = '%';
1726 	    val = ch >> 4;
1727 	    if (val <= 9)
1728 		ret[out++] = '0' + val;
1729 	    else
1730 		ret[out++] = 'A' + val - 0xA;
1731 	    val = ch & 0xF;
1732 	    if (val <= 9)
1733 		ret[out++] = '0' + val;
1734 	    else
1735 		ret[out++] = 'A' + val - 0xA;
1736 	    in++;
1737 	} else {
1738 	    ret[out++] = *in++;
1739 	}
1740 
1741     }
1742     ret[out] = 0;
1743     return(ret);
1744 }
1745 
1746 /**
1747  * xmlURIEscape:
1748  * @str:  the string of the URI to escape
1749  *
1750  * Escaping routine, does not do validity checks !
1751  * It will try to escape the chars needing this, but this is heuristic
1752  * based it's impossible to be sure.
1753  *
1754  * Returns an copy of the string, but escaped
1755  *
1756  * 25 May 2001
1757  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1758  * according to RFC2396.
1759  *   - Carl Douglas
1760  */
1761 xmlChar *
xmlURIEscape(const xmlChar * str)1762 xmlURIEscape(const xmlChar * str)
1763 {
1764     xmlChar *ret, *segment = NULL;
1765     xmlURIPtr uri;
1766     int ret2;
1767 
1768     if (str == NULL)
1769         return (NULL);
1770 
1771     uri = xmlCreateURI();
1772     if (uri != NULL) {
1773 	/*
1774 	 * Allow escaping errors in the unescaped form
1775 	 */
1776         uri->cleanup = 1;
1777         ret2 = xmlParseURIReference(uri, (const char *)str);
1778         if (ret2) {
1779             xmlFreeURI(uri);
1780             return (NULL);
1781         }
1782     }
1783 
1784     if (!uri)
1785         return NULL;
1786 
1787     ret = NULL;
1788 
1789 #define NULLCHK(p) if(!p) { \
1790          xmlURIErrMemory("escaping URI value\n"); \
1791          xmlFreeURI(uri); \
1792          xmlFree(ret); \
1793          return NULL; } \
1794 
1795     if (uri->scheme) {
1796         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1797         NULLCHK(segment)
1798         ret = xmlStrcat(ret, segment);
1799         ret = xmlStrcat(ret, BAD_CAST ":");
1800         xmlFree(segment);
1801     }
1802 
1803     if (uri->authority) {
1804         segment =
1805             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1806         NULLCHK(segment)
1807         ret = xmlStrcat(ret, BAD_CAST "//");
1808         ret = xmlStrcat(ret, segment);
1809         xmlFree(segment);
1810     }
1811 
1812     if (uri->user) {
1813         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1814         NULLCHK(segment)
1815         ret = xmlStrcat(ret,BAD_CAST "//");
1816         ret = xmlStrcat(ret, segment);
1817         ret = xmlStrcat(ret, BAD_CAST "@");
1818         xmlFree(segment);
1819     }
1820 
1821     if (uri->server) {
1822         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1823         NULLCHK(segment)
1824         if (uri->user == NULL)
1825             ret = xmlStrcat(ret, BAD_CAST "//");
1826         ret = xmlStrcat(ret, segment);
1827         xmlFree(segment);
1828     }
1829 
1830     if (uri->port > 0) {
1831         xmlChar port[11];
1832 
1833         snprintf((char *) port, 11, "%d", uri->port);
1834         ret = xmlStrcat(ret, BAD_CAST ":");
1835         ret = xmlStrcat(ret, port);
1836     }
1837 
1838     if (uri->path) {
1839         segment =
1840             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1841         NULLCHK(segment)
1842         ret = xmlStrcat(ret, segment);
1843         xmlFree(segment);
1844     }
1845 
1846     if (uri->query_raw) {
1847         ret = xmlStrcat(ret, BAD_CAST "?");
1848         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1849     }
1850     else if (uri->query) {
1851         segment =
1852             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1853         NULLCHK(segment)
1854         ret = xmlStrcat(ret, BAD_CAST "?");
1855         ret = xmlStrcat(ret, segment);
1856         xmlFree(segment);
1857     }
1858 
1859     if (uri->opaque) {
1860         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1861         NULLCHK(segment)
1862         ret = xmlStrcat(ret, segment);
1863         xmlFree(segment);
1864     }
1865 
1866     if (uri->fragment) {
1867         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1868         NULLCHK(segment)
1869         ret = xmlStrcat(ret, BAD_CAST "#");
1870         ret = xmlStrcat(ret, segment);
1871         xmlFree(segment);
1872     }
1873 
1874     xmlFreeURI(uri);
1875 #undef NULLCHK
1876 
1877     return (ret);
1878 }
1879 
1880 /************************************************************************
1881  *									*
1882  *			Public functions				*
1883  *									*
1884  ************************************************************************/
1885 
1886 /**
1887  * xmlBuildURI:
1888  * @URI:  the URI instance found in the document
1889  * @base:  the base value
1890  *
1891  * Computes he final URI of the reference done by checking that
1892  * the given URI is valid, and building the final URI using the
1893  * base URI. This is processed according to section 5.2 of the
1894  * RFC 2396
1895  *
1896  * 5.2. Resolving Relative References to Absolute Form
1897  *
1898  * Returns a new URI string (to be freed by the caller) or NULL in case
1899  *         of error.
1900  */
1901 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1902 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1903     xmlChar *val = NULL;
1904     int ret, len, indx, cur, out;
1905     xmlURIPtr ref = NULL;
1906     xmlURIPtr bas = NULL;
1907     xmlURIPtr res = NULL;
1908 
1909     /*
1910      * 1) The URI reference is parsed into the potential four components and
1911      *    fragment identifier, as described in Section 4.3.
1912      *
1913      *    NOTE that a completely empty URI is treated by modern browsers
1914      *    as a reference to "." rather than as a synonym for the current
1915      *    URI.  Should we do that here?
1916      */
1917     if (URI == NULL)
1918 	ret = -1;
1919     else {
1920 	if (*URI) {
1921 	    ref = xmlCreateURI();
1922 	    if (ref == NULL)
1923 		goto done;
1924 	    ret = xmlParseURIReference(ref, (const char *) URI);
1925 	}
1926 	else
1927 	    ret = 0;
1928     }
1929     if (ret != 0)
1930 	goto done;
1931     if ((ref != NULL) && (ref->scheme != NULL)) {
1932 	/*
1933 	 * The URI is absolute don't modify.
1934 	 */
1935 	val = xmlStrdup(URI);
1936 	goto done;
1937     }
1938     if (base == NULL)
1939 	ret = -1;
1940     else {
1941 	bas = xmlCreateURI();
1942 	if (bas == NULL)
1943 	    goto done;
1944 	ret = xmlParseURIReference(bas, (const char *) base);
1945     }
1946     if (ret != 0) {
1947 	if (ref)
1948 	    val = xmlSaveUri(ref);
1949 	goto done;
1950     }
1951     if (ref == NULL) {
1952 	/*
1953 	 * the base fragment must be ignored
1954 	 */
1955 	if (bas->fragment != NULL) {
1956 	    xmlFree(bas->fragment);
1957 	    bas->fragment = NULL;
1958 	}
1959 	val = xmlSaveUri(bas);
1960 	goto done;
1961     }
1962 
1963     /*
1964      * 2) If the path component is empty and the scheme, authority, and
1965      *    query components are undefined, then it is a reference to the
1966      *    current document and we are done.  Otherwise, the reference URI's
1967      *    query and fragment components are defined as found (or not found)
1968      *    within the URI reference and not inherited from the base URI.
1969      *
1970      *    NOTE that in modern browsers, the parsing differs from the above
1971      *    in the following aspect:  the query component is allowed to be
1972      *    defined while still treating this as a reference to the current
1973      *    document.
1974      */
1975     res = xmlCreateURI();
1976     if (res == NULL)
1977 	goto done;
1978     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1979 	((ref->authority == NULL) && (ref->server == NULL) &&
1980          (ref->port == PORT_EMPTY))) {
1981 	if (bas->scheme != NULL)
1982 	    res->scheme = xmlMemStrdup(bas->scheme);
1983 	if (bas->authority != NULL)
1984 	    res->authority = xmlMemStrdup(bas->authority);
1985 	else {
1986 	    if (bas->server != NULL)
1987 		res->server = xmlMemStrdup(bas->server);
1988 	    if (bas->user != NULL)
1989 		res->user = xmlMemStrdup(bas->user);
1990 	    res->port = bas->port;
1991 	}
1992 	if (bas->path != NULL)
1993 	    res->path = xmlMemStrdup(bas->path);
1994 	if (ref->query_raw != NULL)
1995 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1996 	else if (ref->query != NULL)
1997 	    res->query = xmlMemStrdup(ref->query);
1998 	else if (bas->query_raw != NULL)
1999 	    res->query_raw = xmlMemStrdup(bas->query_raw);
2000 	else if (bas->query != NULL)
2001 	    res->query = xmlMemStrdup(bas->query);
2002 	if (ref->fragment != NULL)
2003 	    res->fragment = xmlMemStrdup(ref->fragment);
2004 	goto step_7;
2005     }
2006 
2007     /*
2008      * 3) If the scheme component is defined, indicating that the reference
2009      *    starts with a scheme name, then the reference is interpreted as an
2010      *    absolute URI and we are done.  Otherwise, the reference URI's
2011      *    scheme is inherited from the base URI's scheme component.
2012      */
2013     if (ref->scheme != NULL) {
2014 	val = xmlSaveUri(ref);
2015 	goto done;
2016     }
2017     if (bas->scheme != NULL)
2018 	res->scheme = xmlMemStrdup(bas->scheme);
2019 
2020     if (ref->query_raw != NULL)
2021 	res->query_raw = xmlMemStrdup(ref->query_raw);
2022     else if (ref->query != NULL)
2023 	res->query = xmlMemStrdup(ref->query);
2024     if (ref->fragment != NULL)
2025 	res->fragment = xmlMemStrdup(ref->fragment);
2026 
2027     /*
2028      * 4) If the authority component is defined, then the reference is a
2029      *    network-path and we skip to step 7.  Otherwise, the reference
2030      *    URI's authority is inherited from the base URI's authority
2031      *    component, which will also be undefined if the URI scheme does not
2032      *    use an authority component.
2033      */
2034     if ((ref->authority != NULL) || (ref->server != NULL) ||
2035          (ref->port != PORT_EMPTY)) {
2036 	if (ref->authority != NULL)
2037 	    res->authority = xmlMemStrdup(ref->authority);
2038 	else {
2039             if (ref->server != NULL)
2040                 res->server = xmlMemStrdup(ref->server);
2041 	    if (ref->user != NULL)
2042 		res->user = xmlMemStrdup(ref->user);
2043             res->port = ref->port;
2044 	}
2045 	if (ref->path != NULL)
2046 	    res->path = xmlMemStrdup(ref->path);
2047 	goto step_7;
2048     }
2049     if (bas->authority != NULL)
2050 	res->authority = xmlMemStrdup(bas->authority);
2051     else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2052 	if (bas->server != NULL)
2053 	    res->server = xmlMemStrdup(bas->server);
2054 	if (bas->user != NULL)
2055 	    res->user = xmlMemStrdup(bas->user);
2056 	res->port = bas->port;
2057     }
2058 
2059     /*
2060      * 5) If the path component begins with a slash character ("/"), then
2061      *    the reference is an absolute-path and we skip to step 7.
2062      */
2063     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2064 	res->path = xmlMemStrdup(ref->path);
2065 	goto step_7;
2066     }
2067 
2068 
2069     /*
2070      * 6) If this step is reached, then we are resolving a relative-path
2071      *    reference.  The relative path needs to be merged with the base
2072      *    URI's path.  Although there are many ways to do this, we will
2073      *    describe a simple method using a separate string buffer.
2074      *
2075      * Allocate a buffer large enough for the result string.
2076      */
2077     len = 2; /* extra / and 0 */
2078     if (ref->path != NULL)
2079 	len += strlen(ref->path);
2080     if (bas->path != NULL)
2081 	len += strlen(bas->path);
2082     res->path = (char *) xmlMallocAtomic(len);
2083     if (res->path == NULL) {
2084         xmlURIErrMemory("resolving URI against base\n");
2085 	goto done;
2086     }
2087     res->path[0] = 0;
2088 
2089     /*
2090      * a) All but the last segment of the base URI's path component is
2091      *    copied to the buffer.  In other words, any characters after the
2092      *    last (right-most) slash character, if any, are excluded.
2093      */
2094     cur = 0;
2095     out = 0;
2096     if (bas->path != NULL) {
2097 	while (bas->path[cur] != 0) {
2098 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2099 		cur++;
2100 	    if (bas->path[cur] == 0)
2101 		break;
2102 
2103 	    cur++;
2104 	    while (out < cur) {
2105 		res->path[out] = bas->path[out];
2106 		out++;
2107 	    }
2108 	}
2109     }
2110     res->path[out] = 0;
2111 
2112     /*
2113      * b) The reference's path component is appended to the buffer
2114      *    string.
2115      */
2116     if (ref->path != NULL && ref->path[0] != 0) {
2117 	indx = 0;
2118 	/*
2119 	 * Ensure the path includes a '/'
2120 	 */
2121 	if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2122 	    res->path[out++] = '/';
2123 	while (ref->path[indx] != 0) {
2124 	    res->path[out++] = ref->path[indx++];
2125 	}
2126     }
2127     res->path[out] = 0;
2128 
2129     /*
2130      * Steps c) to h) are really path normalization steps
2131      */
2132     xmlNormalizeURIPath(res->path);
2133 
2134 step_7:
2135 
2136     /*
2137      * 7) The resulting URI components, including any inherited from the
2138      *    base URI, are recombined to give the absolute form of the URI
2139      *    reference.
2140      */
2141     val = xmlSaveUri(res);
2142 
2143 done:
2144     if (ref != NULL)
2145 	xmlFreeURI(ref);
2146     if (bas != NULL)
2147 	xmlFreeURI(bas);
2148     if (res != NULL)
2149 	xmlFreeURI(res);
2150     return(val);
2151 }
2152 
2153 /**
2154  * xmlBuildRelativeURI:
2155  * @URI:  the URI reference under consideration
2156  * @base:  the base value
2157  *
2158  * Expresses the URI of the reference in terms relative to the
2159  * base.  Some examples of this operation include:
2160  *     base = "http://site1.com/docs/book1.html"
2161  *        URI input                        URI returned
2162  *     docs/pic1.gif                    pic1.gif
2163  *     docs/img/pic1.gif                img/pic1.gif
2164  *     img/pic1.gif                     ../img/pic1.gif
2165  *     http://site1.com/docs/pic1.gif   pic1.gif
2166  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2167  *
2168  *     base = "docs/book1.html"
2169  *        URI input                        URI returned
2170  *     docs/pic1.gif                    pic1.gif
2171  *     docs/img/pic1.gif                img/pic1.gif
2172  *     img/pic1.gif                     ../img/pic1.gif
2173  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2174  *
2175  *
2176  * Note: if the URI reference is really weird or complicated, it may be
2177  *       worthwhile to first convert it into a "nice" one by calling
2178  *       xmlBuildURI (using 'base') before calling this routine,
2179  *       since this routine (for reasonable efficiency) assumes URI has
2180  *       already been through some validation.
2181  *
2182  * Returns a new URI string (to be freed by the caller) or NULL in case
2183  * error.
2184  */
2185 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2186 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2187 {
2188     xmlChar *val = NULL;
2189     int ret;
2190     int ix;
2191     int nbslash = 0;
2192     int len;
2193     xmlURIPtr ref = NULL;
2194     xmlURIPtr bas = NULL;
2195     xmlChar *bptr, *uptr, *vptr;
2196     int remove_path = 0;
2197 
2198     if ((URI == NULL) || (*URI == 0))
2199 	return NULL;
2200 
2201     /*
2202      * First parse URI into a standard form
2203      */
2204     ref = xmlCreateURI ();
2205     if (ref == NULL)
2206 	return NULL;
2207     /* If URI not already in "relative" form */
2208     if (URI[0] != '.') {
2209 	ret = xmlParseURIReference (ref, (const char *) URI);
2210 	if (ret != 0)
2211 	    goto done;		/* Error in URI, return NULL */
2212     } else
2213 	ref->path = (char *)xmlStrdup(URI);
2214 
2215     /*
2216      * Next parse base into the same standard form
2217      */
2218     if ((base == NULL) || (*base == 0)) {
2219 	val = xmlStrdup (URI);
2220 	goto done;
2221     }
2222     bas = xmlCreateURI ();
2223     if (bas == NULL)
2224 	goto done;
2225     if (base[0] != '.') {
2226 	ret = xmlParseURIReference (bas, (const char *) base);
2227 	if (ret != 0)
2228 	    goto done;		/* Error in base, return NULL */
2229     } else
2230 	bas->path = (char *)xmlStrdup(base);
2231 
2232     /*
2233      * If the scheme / server on the URI differs from the base,
2234      * just return the URI
2235      */
2236     if ((ref->scheme != NULL) &&
2237 	((bas->scheme == NULL) ||
2238 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2239 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2240          (bas->port != ref->port))) {
2241 	val = xmlStrdup (URI);
2242 	goto done;
2243     }
2244     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2245 	val = xmlStrdup(BAD_CAST "");
2246 	goto done;
2247     }
2248     if (bas->path == NULL) {
2249 	val = xmlStrdup((xmlChar *)ref->path);
2250 	goto done;
2251     }
2252     if (ref->path == NULL) {
2253         ref->path = (char *) "/";
2254 	remove_path = 1;
2255     }
2256 
2257     /*
2258      * At this point (at last!) we can compare the two paths
2259      *
2260      * First we take care of the special case where either of the
2261      * two path components may be missing (bug 316224)
2262      */
2263     bptr = (xmlChar *)bas->path;
2264     {
2265         xmlChar *rptr = (xmlChar *) ref->path;
2266         int pos = 0;
2267 
2268         /*
2269          * Next we compare the two strings and find where they first differ
2270          */
2271 	if ((*rptr == '.') && (rptr[1] == '/'))
2272             rptr += 2;
2273 	if ((*bptr == '.') && (bptr[1] == '/'))
2274             bptr += 2;
2275 	else if ((*bptr == '/') && (*rptr != '/'))
2276 	    bptr++;
2277 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2278 	    pos++;
2279 
2280 	if (bptr[pos] == rptr[pos]) {
2281 	    val = xmlStrdup(BAD_CAST "");
2282 	    goto done;		/* (I can't imagine why anyone would do this) */
2283 	}
2284 
2285 	/*
2286 	 * In URI, "back up" to the last '/' encountered.  This will be the
2287 	 * beginning of the "unique" suffix of URI
2288 	 */
2289 	ix = pos;
2290 	for (; ix > 0; ix--) {
2291 	    if (rptr[ix - 1] == '/')
2292 		break;
2293 	}
2294 	uptr = (xmlChar *)&rptr[ix];
2295 
2296 	/*
2297 	 * In base, count the number of '/' from the differing point
2298 	 */
2299 	for (; bptr[ix] != 0; ix++) {
2300 	    if (bptr[ix] == '/')
2301 		nbslash++;
2302 	}
2303 
2304 	/*
2305 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2306 	 */
2307 	if (nbslash == 0 && !uptr[0]) {
2308 	    val = xmlStrdup(BAD_CAST "./");
2309 	    goto done;
2310 	}
2311 
2312 	len = xmlStrlen (uptr) + 1;
2313     }
2314 
2315     if (nbslash == 0) {
2316 	if (uptr != NULL)
2317 	    /* exception characters from xmlSaveUri */
2318 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2319 	goto done;
2320     }
2321 
2322     /*
2323      * Allocate just enough space for the returned string -
2324      * length of the remainder of the URI, plus enough space
2325      * for the "../" groups, plus one for the terminator
2326      */
2327     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2328     if (val == NULL) {
2329         xmlURIErrMemory("building relative URI\n");
2330 	goto done;
2331     }
2332     vptr = val;
2333     /*
2334      * Put in as many "../" as needed
2335      */
2336     for (; nbslash>0; nbslash--) {
2337 	*vptr++ = '.';
2338 	*vptr++ = '.';
2339 	*vptr++ = '/';
2340     }
2341     /*
2342      * Finish up with the end of the URI
2343      */
2344     if (uptr != NULL) {
2345         if ((vptr > val) && (len > 0) &&
2346 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2347 	    memcpy (vptr, uptr + 1, len - 1);
2348 	    vptr[len - 2] = 0;
2349 	} else {
2350 	    memcpy (vptr, uptr, len);
2351 	    vptr[len - 1] = 0;
2352 	}
2353     } else {
2354 	vptr[len - 1] = 0;
2355     }
2356 
2357     /* escape the freshly-built path */
2358     vptr = val;
2359 	/* exception characters from xmlSaveUri */
2360     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2361     xmlFree(vptr);
2362 
2363 done:
2364     /*
2365      * Free the working variables
2366      */
2367     if (remove_path != 0)
2368         ref->path = NULL;
2369     if (ref != NULL)
2370 	xmlFreeURI (ref);
2371     if (bas != NULL)
2372 	xmlFreeURI (bas);
2373 
2374     return val;
2375 }
2376 
2377 /**
2378  * xmlCanonicPath:
2379  * @path:  the resource locator in a filesystem notation
2380  *
2381  * Constructs a canonic path from the specified path.
2382  *
2383  * Returns a new canonic path, or a duplicate of the path parameter if the
2384  * construction fails. The caller is responsible for freeing the memory occupied
2385  * by the returned string. If there is insufficient memory available, or the
2386  * argument is NULL, the function returns NULL.
2387  */
2388 #define IS_WINDOWS_PATH(p)					\
2389 	((p != NULL) &&						\
2390 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2391 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2392 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2393 xmlChar *
xmlCanonicPath(const xmlChar * path)2394 xmlCanonicPath(const xmlChar *path)
2395 {
2396 /*
2397  * For Windows implementations, additional work needs to be done to
2398  * replace backslashes in pathnames with "forward slashes"
2399  */
2400 #if defined(_WIN32)
2401     int len = 0;
2402     char *p = NULL;
2403 #endif
2404     xmlURIPtr uri;
2405     xmlChar *ret;
2406     const xmlChar *absuri;
2407 
2408     if (path == NULL)
2409 	return(NULL);
2410 
2411 #if defined(_WIN32)
2412     /*
2413      * We must not change the backslashes to slashes if the the path
2414      * starts with \\?\
2415      * Those paths can be up to 32k characters long.
2416      * Was added specifically for OpenOffice, those paths can't be converted
2417      * to URIs anyway.
2418      */
2419     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2420         (path[3] == '\\') )
2421 	return xmlStrdup((const xmlChar *) path);
2422 #endif
2423 
2424 	/* sanitize filename starting with // so it can be used as URI */
2425     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2426         path++;
2427 
2428     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2429 	xmlFreeURI(uri);
2430 	return xmlStrdup(path);
2431     }
2432 
2433     /* Check if this is an "absolute uri" */
2434     absuri = xmlStrstr(path, BAD_CAST "://");
2435     if (absuri != NULL) {
2436         int l, j;
2437 	unsigned char c;
2438 	xmlChar *escURI;
2439 
2440         /*
2441 	 * this looks like an URI where some parts have not been
2442 	 * escaped leading to a parsing problem.  Check that the first
2443 	 * part matches a protocol.
2444 	 */
2445 	l = absuri - path;
2446 	/* Bypass if first part (part before the '://') is > 20 chars */
2447 	if ((l <= 0) || (l > 20))
2448 	    goto path_processing;
2449 	/* Bypass if any non-alpha characters are present in first part */
2450 	for (j = 0;j < l;j++) {
2451 	    c = path[j];
2452 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2453 	        goto path_processing;
2454 	}
2455 
2456 	/* Escape all except the characters specified in the supplied path */
2457         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2458 	if (escURI != NULL) {
2459 	    /* Try parsing the escaped path */
2460 	    uri = xmlParseURI((const char *) escURI);
2461 	    /* If successful, return the escaped string */
2462 	    if (uri != NULL) {
2463 	        xmlFreeURI(uri);
2464 		return escURI;
2465 	    }
2466             xmlFree(escURI);
2467 	}
2468     }
2469 
2470 path_processing:
2471 /* For Windows implementations, replace backslashes with 'forward slashes' */
2472 #if defined(_WIN32)
2473     /*
2474      * Create a URI structure
2475      */
2476     uri = xmlCreateURI();
2477     if (uri == NULL) {		/* Guard against 'out of memory' */
2478         return(NULL);
2479     }
2480 
2481     len = xmlStrlen(path);
2482     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2483         /* make the scheme 'file' */
2484 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2485 	/* allocate space for leading '/' + path + string terminator */
2486 	uri->path = xmlMallocAtomic(len + 2);
2487 	if (uri->path == NULL) {
2488 	    xmlFreeURI(uri);	/* Guard against 'out of memory' */
2489 	    return(NULL);
2490 	}
2491 	/* Put in leading '/' plus path */
2492 	uri->path[0] = '/';
2493 	p = uri->path + 1;
2494 	strncpy(p, (char *) path, len + 1);
2495     } else {
2496 	uri->path = (char *) xmlStrdup(path);
2497 	if (uri->path == NULL) {
2498 	    xmlFreeURI(uri);
2499 	    return(NULL);
2500 	}
2501 	p = uri->path;
2502     }
2503     /* Now change all occurrences of '\' to '/' */
2504     while (*p != '\0') {
2505 	if (*p == '\\')
2506 	    *p = '/';
2507 	p++;
2508     }
2509 
2510     if (uri->scheme == NULL) {
2511 	ret = xmlStrdup((const xmlChar *) uri->path);
2512     } else {
2513 	ret = xmlSaveUri(uri);
2514     }
2515 
2516     xmlFreeURI(uri);
2517 #else
2518     ret = xmlStrdup((const xmlChar *) path);
2519 #endif
2520     return(ret);
2521 }
2522 
2523 /**
2524  * xmlPathToURI:
2525  * @path:  the resource locator in a filesystem notation
2526  *
2527  * Constructs an URI expressing the existing path
2528  *
2529  * Returns a new URI, or a duplicate of the path parameter if the
2530  * construction fails. The caller is responsible for freeing the memory
2531  * occupied by the returned string. If there is insufficient memory available,
2532  * or the argument is NULL, the function returns NULL.
2533  */
2534 xmlChar *
xmlPathToURI(const xmlChar * path)2535 xmlPathToURI(const xmlChar *path)
2536 {
2537     xmlURIPtr uri;
2538     xmlURI temp;
2539     xmlChar *ret, *cal;
2540 
2541     if (path == NULL)
2542         return(NULL);
2543 
2544     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2545 	xmlFreeURI(uri);
2546 	return xmlStrdup(path);
2547     }
2548     cal = xmlCanonicPath(path);
2549     if (cal == NULL)
2550         return(NULL);
2551 #if defined(_WIN32)
2552     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2553        If 'cal' is a valid URI already then we are done here, as continuing would make
2554        it invalid. */
2555     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2556 	xmlFreeURI(uri);
2557 	return cal;
2558     }
2559     /* 'cal' can contain a relative path with backslashes. If that is processed
2560        by xmlSaveURI, they will be escaped and the external entity loader machinery
2561        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2562     ret = cal;
2563     while (*ret != '\0') {
2564 	if (*ret == '\\')
2565 	    *ret = '/';
2566 	ret++;
2567     }
2568 #endif
2569     memset(&temp, 0, sizeof(temp));
2570     temp.path = (char *) cal;
2571     ret = xmlSaveUri(&temp);
2572     xmlFree(cal);
2573     return(ret);
2574 }
2575