• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20 
21 static void xmlCleanURI(xmlURIPtr uri);
22 
23 /*
24  * Old rule from 2396 used in legacy handling code
25  * alpha    = lowalpha | upalpha
26  */
27 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28 
29 
30 /*
31  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33  *            "u" | "v" | "w" | "x" | "y" | "z"
34  */
35 
36 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37 
38 /*
39  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41  *           "U" | "V" | "W" | "X" | "Y" | "Z"
42  */
43 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44 
45 #ifdef IS_DIGIT
46 #undef IS_DIGIT
47 #endif
48 /*
49  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50  */
51 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52 
53 /*
54  * alphanum = alpha | digit
55  */
56 
57 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58 
59 /*
60  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61  */
62 
63 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
64     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
65     ((x) == '(') || ((x) == ')'))
66 
67 /*
68  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69  */
70 
71 #define IS_UNWISE(p)                                                    \
72       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
73        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
74        ((*(p) == ']')) || ((*(p) == '`')))
75 /*
76  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77  *            "[" | "]"
78  */
79 
80 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83         ((x) == ']'))
84 
85 /*
86  * unreserved = alphanum | mark
87  */
88 
89 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90 
91 /*
92  * Skip to next pointer char, handle escaped sequences
93  */
94 
95 #define NEXT(p) ((*p == '%')? p += 3 : p++)
96 
97 /*
98  * Productions from the spec.
99  *
100  *    authority     = server | reg_name
101  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
102  *                        ";" | ":" | "@" | "&" | "=" | "+" )
103  *
104  * path          = [ abs_path | opaque_part ]
105  */
106 
107 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108 
109 /************************************************************************
110  *									*
111  *                         RFC 3986 parser				*
112  *									*
113  ************************************************************************/
114 
115 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
117                       ((*(p) >= 'A') && (*(p) <= 'Z')))
118 #define ISA_HEXDIG(p)							\
119        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
120         ((*(p) >= 'A') && (*(p) <= 'F')))
121 
122 /*
123  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
124  *                     / "*" / "+" / "," / ";" / "="
125  */
126 #define ISA_SUB_DELIM(p)						\
127       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
128        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
129        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
130        ((*(p) == '=')))
131 
132 /*
133  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134  */
135 #define ISA_GEN_DELIM(p)						\
136       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
137        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
138        ((*(p) == '@')))
139 
140 /*
141  *    reserved      = gen-delims / sub-delims
142  */
143 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144 
145 /*
146  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
147  */
148 #define ISA_UNRESERVED(p)						\
149       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
150        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151 
152 /*
153  *    pct-encoded   = "%" HEXDIG HEXDIG
154  */
155 #define ISA_PCT_ENCODED(p)						\
156      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157 
158 /*
159  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
160  */
161 #define ISA_PCHAR(p)							\
162      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
163       ((*(p) == ':')) || ((*(p) == '@')))
164 
165 /**
166  * xmlParse3986Scheme:
167  * @uri:  pointer to an URI structure
168  * @str:  pointer to the string to analyze
169  *
170  * Parse an URI scheme
171  *
172  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173  *
174  * Returns 0 or the error code
175  */
176 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)177 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178     const char *cur;
179 
180     if (str == NULL)
181 	return(-1);
182 
183     cur = *str;
184     if (!ISA_ALPHA(cur))
185 	return(2);
186     cur++;
187     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189     if (uri != NULL) {
190 	if (uri->scheme != NULL) xmlFree(uri->scheme);
191 	uri->scheme = STRNDUP(*str, cur - *str);
192     }
193     *str = cur;
194     return(0);
195 }
196 
197 /**
198  * xmlParse3986Fragment:
199  * @uri:  pointer to an URI structure
200  * @str:  pointer to the string to analyze
201  *
202  * Parse the query part of an URI
203  *
204  * fragment      = *( pchar / "/" / "?" )
205  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206  *       in the fragment identifier but this is used very broadly for
207  *       xpointer scheme selection, so we are allowing it here to not break
208  *       for example all the DocBook processing chains.
209  *
210  * Returns 0 or the error code
211  */
212 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)213 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214 {
215     const char *cur;
216 
217     if (str == NULL)
218         return (-1);
219 
220     cur = *str;
221 
222     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223            (*cur == '[') || (*cur == ']') ||
224            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225         NEXT(cur);
226     if (uri != NULL) {
227         if (uri->fragment != NULL)
228             xmlFree(uri->fragment);
229 	if (uri->cleanup & 2)
230 	    uri->fragment = STRNDUP(*str, cur - *str);
231 	else
232 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233     }
234     *str = cur;
235     return (0);
236 }
237 
238 /**
239  * xmlParse3986Query:
240  * @uri:  pointer to an URI structure
241  * @str:  pointer to the string to analyze
242  *
243  * Parse the query part of an URI
244  *
245  * query = *uric
246  *
247  * Returns 0 or the error code
248  */
249 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)250 xmlParse3986Query(xmlURIPtr uri, const char **str)
251 {
252     const char *cur;
253 
254     if (str == NULL)
255         return (-1);
256 
257     cur = *str;
258 
259     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261         NEXT(cur);
262     if (uri != NULL) {
263         if (uri->query != NULL)
264             xmlFree(uri->query);
265 	if (uri->cleanup & 2)
266 	    uri->query = STRNDUP(*str, cur - *str);
267 	else
268 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269 
270 	/* Save the raw bytes of the query as well.
271 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272 	 */
273 	if (uri->query_raw != NULL)
274 	    xmlFree (uri->query_raw);
275 	uri->query_raw = STRNDUP (*str, cur - *str);
276     }
277     *str = cur;
278     return (0);
279 }
280 
281 /**
282  * xmlParse3986Port:
283  * @uri:  pointer to an URI structure
284  * @str:  the string to analyze
285  *
286  * Parse a port  part and fills in the appropriate fields
287  * of the @uri structure
288  *
289  * port          = *DIGIT
290  *
291  * Returns 0 or the error code
292  */
293 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)294 xmlParse3986Port(xmlURIPtr uri, const char **str)
295 {
296     const char *cur = *str;
297 
298     if (ISA_DIGIT(cur)) {
299 	if (uri != NULL)
300 	    uri->port = 0;
301 	while (ISA_DIGIT(cur)) {
302 	    if (uri != NULL)
303 		uri->port = uri->port * 10 + (*cur - '0');
304 	    cur++;
305 	}
306 	*str = cur;
307 	return(0);
308     }
309     return(1);
310 }
311 
312 /**
313  * xmlParse3986Userinfo:
314  * @uri:  pointer to an URI structure
315  * @str:  the string to analyze
316  *
317  * Parse an user informations part and fills in the appropriate fields
318  * of the @uri structure
319  *
320  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
321  *
322  * Returns 0 or the error code
323  */
324 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)325 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326 {
327     const char *cur;
328 
329     cur = *str;
330     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331            ISA_SUB_DELIM(cur) || (*cur == ':'))
332 	NEXT(cur);
333     if (*cur == '@') {
334 	if (uri != NULL) {
335 	    if (uri->user != NULL) xmlFree(uri->user);
336 	    if (uri->cleanup & 2)
337 		uri->user = STRNDUP(*str, cur - *str);
338 	    else
339 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340 	}
341 	*str = cur;
342 	return(0);
343     }
344     return(1);
345 }
346 
347 /**
348  * xmlParse3986DecOctet:
349  * @str:  the string to analyze
350  *
351  *    dec-octet     = DIGIT                 ; 0-9
352  *                  / %x31-39 DIGIT         ; 10-99
353  *                  / "1" 2DIGIT            ; 100-199
354  *                  / "2" %x30-34 DIGIT     ; 200-249
355  *                  / "25" %x30-35          ; 250-255
356  *
357  * Skip a dec-octet.
358  *
359  * Returns 0 if found and skipped, 1 otherwise
360  */
361 static int
xmlParse3986DecOctet(const char ** str)362 xmlParse3986DecOctet(const char **str) {
363     const char *cur = *str;
364 
365     if (!(ISA_DIGIT(cur)))
366         return(1);
367     if (!ISA_DIGIT(cur+1))
368 	cur++;
369     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370 	cur += 2;
371     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372 	cur += 3;
373     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375 	cur += 3;
376     else if ((*cur == '2') && (*(cur + 1) == '5') &&
377 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378 	cur += 3;
379     else
380         return(1);
381     *str = cur;
382     return(0);
383 }
384 /**
385  * xmlParse3986Host:
386  * @uri:  pointer to an URI structure
387  * @str:  the string to analyze
388  *
389  * Parse an host part and fills in the appropriate fields
390  * of the @uri structure
391  *
392  * host          = IP-literal / IPv4address / reg-name
393  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
394  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
395  * reg-name      = *( unreserved / pct-encoded / sub-delims )
396  *
397  * Returns 0 or the error code
398  */
399 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)400 xmlParse3986Host(xmlURIPtr uri, const char **str)
401 {
402     const char *cur = *str;
403     const char *host;
404 
405     host = cur;
406     /*
407      * IPv6 and future adressing scheme are enclosed between brackets
408      */
409     if (*cur == '[') {
410         cur++;
411 	while ((*cur != ']') && (*cur != 0))
412 	    cur++;
413 	if (*cur != ']')
414 	    return(1);
415 	cur++;
416 	goto found;
417     }
418     /*
419      * try to parse an IPv4
420      */
421     if (ISA_DIGIT(cur)) {
422         if (xmlParse3986DecOctet(&cur) != 0)
423 	    goto not_ipv4;
424 	if (*cur != '.')
425 	    goto not_ipv4;
426 	cur++;
427         if (xmlParse3986DecOctet(&cur) != 0)
428 	    goto not_ipv4;
429 	if (*cur != '.')
430 	    goto not_ipv4;
431         if (xmlParse3986DecOctet(&cur) != 0)
432 	    goto not_ipv4;
433 	if (*cur != '.')
434 	    goto not_ipv4;
435         if (xmlParse3986DecOctet(&cur) != 0)
436 	    goto not_ipv4;
437 	goto found;
438 not_ipv4:
439         cur = *str;
440     }
441     /*
442      * then this should be a hostname which can be empty
443      */
444     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445         NEXT(cur);
446 found:
447     if (uri != NULL) {
448 	if (uri->authority != NULL) xmlFree(uri->authority);
449 	uri->authority = NULL;
450 	if (uri->server != NULL) xmlFree(uri->server);
451 	if (cur != host) {
452 	    if (uri->cleanup & 2)
453 		uri->server = STRNDUP(host, cur - host);
454 	    else
455 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456 	} else
457 	    uri->server = NULL;
458     }
459     *str = cur;
460     return(0);
461 }
462 
463 /**
464  * xmlParse3986Authority:
465  * @uri:  pointer to an URI structure
466  * @str:  the string to analyze
467  *
468  * Parse an authority part and fills in the appropriate fields
469  * of the @uri structure
470  *
471  * authority     = [ userinfo "@" ] host [ ":" port ]
472  *
473  * Returns 0 or the error code
474  */
475 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)476 xmlParse3986Authority(xmlURIPtr uri, const char **str)
477 {
478     const char *cur;
479     int ret;
480 
481     cur = *str;
482     /*
483      * try to parse an userinfo and check for the trailing @
484      */
485     ret = xmlParse3986Userinfo(uri, &cur);
486     if ((ret != 0) || (*cur != '@'))
487         cur = *str;
488     else
489         cur++;
490     ret = xmlParse3986Host(uri, &cur);
491     if (ret != 0) return(ret);
492     if (*cur == ':') {
493         cur++;
494         ret = xmlParse3986Port(uri, &cur);
495 	if (ret != 0) return(ret);
496     }
497     *str = cur;
498     return(0);
499 }
500 
501 /**
502  * xmlParse3986Segment:
503  * @str:  the string to analyze
504  * @forbid: an optional forbidden character
505  * @empty: allow an empty segment
506  *
507  * Parse a segment and fills in the appropriate fields
508  * of the @uri structure
509  *
510  * segment       = *pchar
511  * segment-nz    = 1*pchar
512  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513  *               ; non-zero-length segment without any colon ":"
514  *
515  * Returns 0 or the error code
516  */
517 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)518 xmlParse3986Segment(const char **str, char forbid, int empty)
519 {
520     const char *cur;
521 
522     cur = *str;
523     if (!ISA_PCHAR(cur)) {
524         if (empty)
525 	    return(0);
526 	return(1);
527     }
528     while (ISA_PCHAR(cur) && (*cur != forbid))
529         NEXT(cur);
530     *str = cur;
531     return (0);
532 }
533 
534 /**
535  * xmlParse3986PathAbEmpty:
536  * @uri:  pointer to an URI structure
537  * @str:  the string to analyze
538  *
539  * Parse an path absolute or empty and fills in the appropriate fields
540  * of the @uri structure
541  *
542  * path-abempty  = *( "/" segment )
543  *
544  * Returns 0 or the error code
545  */
546 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)547 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548 {
549     const char *cur;
550     int ret;
551 
552     cur = *str;
553 
554     while (*cur == '/') {
555         cur++;
556 	ret = xmlParse3986Segment(&cur, 0, 1);
557 	if (ret != 0) return(ret);
558     }
559     if (uri != NULL) {
560 	if (uri->path != NULL) xmlFree(uri->path);
561 	if (uri->cleanup & 2)
562 	    uri->path = STRNDUP(*str, cur - *str);
563 	else
564 	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
565     }
566     *str = cur;
567     return (0);
568 }
569 
570 /**
571  * xmlParse3986PathAbsolute:
572  * @uri:  pointer to an URI structure
573  * @str:  the string to analyze
574  *
575  * Parse an path absolute and fills in the appropriate fields
576  * of the @uri structure
577  *
578  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
579  *
580  * Returns 0 or the error code
581  */
582 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)583 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
584 {
585     const char *cur;
586     int ret;
587 
588     cur = *str;
589 
590     if (*cur != '/')
591         return(1);
592     cur++;
593     ret = xmlParse3986Segment(&cur, 0, 0);
594     if (ret == 0) {
595 	while (*cur == '/') {
596 	    cur++;
597 	    ret = xmlParse3986Segment(&cur, 0, 1);
598 	    if (ret != 0) return(ret);
599 	}
600     }
601     if (uri != NULL) {
602 	if (uri->path != NULL) xmlFree(uri->path);
603 	if (uri->cleanup & 2)
604 	    uri->path = STRNDUP(*str, cur - *str);
605 	else
606 	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
607     }
608     *str = cur;
609     return (0);
610 }
611 
612 /**
613  * xmlParse3986PathRootless:
614  * @uri:  pointer to an URI structure
615  * @str:  the string to analyze
616  *
617  * Parse an path without root and fills in the appropriate fields
618  * of the @uri structure
619  *
620  * path-rootless = segment-nz *( "/" segment )
621  *
622  * Returns 0 or the error code
623  */
624 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)625 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
626 {
627     const char *cur;
628     int ret;
629 
630     cur = *str;
631 
632     ret = xmlParse3986Segment(&cur, 0, 0);
633     if (ret != 0) return(ret);
634     while (*cur == '/') {
635         cur++;
636 	ret = xmlParse3986Segment(&cur, 0, 1);
637 	if (ret != 0) return(ret);
638     }
639     if (uri != NULL) {
640 	if (uri->path != NULL) xmlFree(uri->path);
641 	if (uri->cleanup & 2)
642 	    uri->path = STRNDUP(*str, cur - *str);
643 	else
644 	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645     }
646     *str = cur;
647     return (0);
648 }
649 
650 /**
651  * xmlParse3986PathNoScheme:
652  * @uri:  pointer to an URI structure
653  * @str:  the string to analyze
654  *
655  * Parse an path which is not a scheme and fills in the appropriate fields
656  * of the @uri structure
657  *
658  * path-noscheme = segment-nz-nc *( "/" segment )
659  *
660  * Returns 0 or the error code
661  */
662 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)663 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
664 {
665     const char *cur;
666     int ret;
667 
668     cur = *str;
669 
670     ret = xmlParse3986Segment(&cur, ':', 0);
671     if (ret != 0) return(ret);
672     while (*cur == '/') {
673         cur++;
674 	ret = xmlParse3986Segment(&cur, 0, 1);
675 	if (ret != 0) return(ret);
676     }
677     if (uri != NULL) {
678 	if (uri->path != NULL) xmlFree(uri->path);
679 	if (uri->cleanup & 2)
680 	    uri->path = STRNDUP(*str, cur - *str);
681 	else
682 	    uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
683     }
684     *str = cur;
685     return (0);
686 }
687 
688 /**
689  * xmlParse3986HierPart:
690  * @uri:  pointer to an URI structure
691  * @str:  the string to analyze
692  *
693  * Parse an hierarchical part and fills in the appropriate fields
694  * of the @uri structure
695  *
696  * hier-part     = "//" authority path-abempty
697  *                / path-absolute
698  *                / path-rootless
699  *                / path-empty
700  *
701  * Returns 0 or the error code
702  */
703 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)704 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
705 {
706     const char *cur;
707     int ret;
708 
709     cur = *str;
710 
711     if ((*cur == '/') && (*(cur + 1) == '/')) {
712         cur += 2;
713 	ret = xmlParse3986Authority(uri, &cur);
714 	if (ret != 0) return(ret);
715 	ret = xmlParse3986PathAbEmpty(uri, &cur);
716 	if (ret != 0) return(ret);
717 	*str = cur;
718 	return(0);
719     } else if (*cur == '/') {
720         ret = xmlParse3986PathAbsolute(uri, &cur);
721 	if (ret != 0) return(ret);
722     } else if (ISA_PCHAR(cur)) {
723         ret = xmlParse3986PathRootless(uri, &cur);
724 	if (ret != 0) return(ret);
725     } else {
726 	/* path-empty is effectively empty */
727 	if (uri != NULL) {
728 	    if (uri->path != NULL) xmlFree(uri->path);
729 	    uri->path = NULL;
730 	}
731     }
732     *str = cur;
733     return (0);
734 }
735 
736 /**
737  * xmlParse3986RelativeRef:
738  * @uri:  pointer to an URI structure
739  * @str:  the string to analyze
740  *
741  * Parse an URI string and fills in the appropriate fields
742  * of the @uri structure
743  *
744  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
745  * relative-part = "//" authority path-abempty
746  *               / path-absolute
747  *               / path-noscheme
748  *               / path-empty
749  *
750  * Returns 0 or the error code
751  */
752 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)753 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
754     int ret;
755 
756     if ((*str == '/') && (*(str + 1) == '/')) {
757         str += 2;
758 	ret = xmlParse3986Authority(uri, &str);
759 	if (ret != 0) return(ret);
760 	ret = xmlParse3986PathAbEmpty(uri, &str);
761 	if (ret != 0) return(ret);
762     } else if (*str == '/') {
763 	ret = xmlParse3986PathAbsolute(uri, &str);
764 	if (ret != 0) return(ret);
765     } else if (ISA_PCHAR(str)) {
766         ret = xmlParse3986PathNoScheme(uri, &str);
767 	if (ret != 0) return(ret);
768     } else {
769 	/* path-empty is effectively empty */
770 	if (uri != NULL) {
771 	    if (uri->path != NULL) xmlFree(uri->path);
772 	    uri->path = NULL;
773 	}
774     }
775 
776     if (*str == '?') {
777 	str++;
778 	ret = xmlParse3986Query(uri, &str);
779 	if (ret != 0) return(ret);
780     }
781     if (*str == '#') {
782 	str++;
783 	ret = xmlParse3986Fragment(uri, &str);
784 	if (ret != 0) return(ret);
785     }
786     if (*str != 0) {
787 	xmlCleanURI(uri);
788 	return(1);
789     }
790     return(0);
791 }
792 
793 
794 /**
795  * xmlParse3986URI:
796  * @uri:  pointer to an URI structure
797  * @str:  the string to analyze
798  *
799  * Parse an URI string and fills in the appropriate fields
800  * of the @uri structure
801  *
802  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
803  *
804  * Returns 0 or the error code
805  */
806 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)807 xmlParse3986URI(xmlURIPtr uri, const char *str) {
808     int ret;
809 
810     ret = xmlParse3986Scheme(uri, &str);
811     if (ret != 0) return(ret);
812     if (*str != ':') {
813 	return(1);
814     }
815     str++;
816     ret = xmlParse3986HierPart(uri, &str);
817     if (ret != 0) return(ret);
818     if (*str == '?') {
819 	str++;
820 	ret = xmlParse3986Query(uri, &str);
821 	if (ret != 0) return(ret);
822     }
823     if (*str == '#') {
824 	str++;
825 	ret = xmlParse3986Fragment(uri, &str);
826 	if (ret != 0) return(ret);
827     }
828     if (*str != 0) {
829 	xmlCleanURI(uri);
830 	return(1);
831     }
832     return(0);
833 }
834 
835 /**
836  * xmlParse3986URIReference:
837  * @uri:  pointer to an URI structure
838  * @str:  the string to analyze
839  *
840  * Parse an URI reference string and fills in the appropriate fields
841  * of the @uri structure
842  *
843  * URI-reference = URI / relative-ref
844  *
845  * Returns 0 or the error code
846  */
847 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)848 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
849     int ret;
850 
851     if (str == NULL)
852 	return(-1);
853     xmlCleanURI(uri);
854 
855     /*
856      * Try first to parse absolute refs, then fallback to relative if
857      * it fails.
858      */
859     ret = xmlParse3986URI(uri, str);
860     if (ret != 0) {
861 	xmlCleanURI(uri);
862         ret = xmlParse3986RelativeRef(uri, str);
863 	if (ret != 0) {
864 	    xmlCleanURI(uri);
865 	    return(ret);
866 	}
867     }
868     return(0);
869 }
870 
871 /**
872  * xmlParseURI:
873  * @str:  the URI string to analyze
874  *
875  * Parse an URI based on RFC 3986
876  *
877  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
878  *
879  * Returns a newly built xmlURIPtr or NULL in case of error
880  */
881 xmlURIPtr
xmlParseURI(const char * str)882 xmlParseURI(const char *str) {
883     xmlURIPtr uri;
884     int ret;
885 
886     if (str == NULL)
887 	return(NULL);
888     uri = xmlCreateURI();
889     if (uri != NULL) {
890 	ret = xmlParse3986URIReference(uri, str);
891         if (ret) {
892 	    xmlFreeURI(uri);
893 	    return(NULL);
894 	}
895     }
896     return(uri);
897 }
898 
899 /**
900  * xmlParseURIReference:
901  * @uri:  pointer to an URI structure
902  * @str:  the string to analyze
903  *
904  * Parse an URI reference string based on RFC 3986 and fills in the
905  * appropriate fields of the @uri structure
906  *
907  * URI-reference = URI / relative-ref
908  *
909  * Returns 0 or the error code
910  */
911 int
xmlParseURIReference(xmlURIPtr uri,const char * str)912 xmlParseURIReference(xmlURIPtr uri, const char *str) {
913     return(xmlParse3986URIReference(uri, str));
914 }
915 
916 /**
917  * xmlParseURIRaw:
918  * @str:  the URI string to analyze
919  * @raw:  if 1 unescaping of URI pieces are disabled
920  *
921  * Parse an URI but allows to keep intact the original fragments.
922  *
923  * URI-reference = URI / relative-ref
924  *
925  * Returns a newly built xmlURIPtr or NULL in case of error
926  */
927 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)928 xmlParseURIRaw(const char *str, int raw) {
929     xmlURIPtr uri;
930     int ret;
931 
932     if (str == NULL)
933 	return(NULL);
934     uri = xmlCreateURI();
935     if (uri != NULL) {
936         if (raw) {
937 	    uri->cleanup |= 2;
938 	}
939 	ret = xmlParseURIReference(uri, str);
940         if (ret) {
941 	    xmlFreeURI(uri);
942 	    return(NULL);
943 	}
944     }
945     return(uri);
946 }
947 
948 /************************************************************************
949  *									*
950  *			Generic URI structure functions			*
951  *									*
952  ************************************************************************/
953 
954 /**
955  * xmlCreateURI:
956  *
957  * Simply creates an empty xmlURI
958  *
959  * Returns the new structure or NULL in case of error
960  */
961 xmlURIPtr
xmlCreateURI(void)962 xmlCreateURI(void) {
963     xmlURIPtr ret;
964 
965     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
966     if (ret == NULL) {
967 	xmlGenericError(xmlGenericErrorContext,
968 		"xmlCreateURI: out of memory\n");
969 	return(NULL);
970     }
971     memset(ret, 0, sizeof(xmlURI));
972     return(ret);
973 }
974 
975 /**
976  * xmlSaveUri:
977  * @uri:  pointer to an xmlURI
978  *
979  * Save the URI as an escaped string
980  *
981  * Returns a new string (to be deallocated by caller)
982  */
983 xmlChar *
xmlSaveUri(xmlURIPtr uri)984 xmlSaveUri(xmlURIPtr uri) {
985     xmlChar *ret = NULL;
986     xmlChar *temp;
987     const char *p;
988     int len;
989     int max;
990 
991     if (uri == NULL) return(NULL);
992 
993 
994     max = 80;
995     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
996     if (ret == NULL) {
997 	xmlGenericError(xmlGenericErrorContext,
998 		"xmlSaveUri: out of memory\n");
999 	return(NULL);
1000     }
1001     len = 0;
1002 
1003     if (uri->scheme != NULL) {
1004 	p = uri->scheme;
1005 	while (*p != 0) {
1006 	    if (len >= max) {
1007 		max *= 2;
1008 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1009 		if (temp == NULL) {
1010 		    xmlGenericError(xmlGenericErrorContext,
1011 			    "xmlSaveUri: out of memory\n");
1012 		    xmlFree(ret);
1013 		    return(NULL);
1014 		}
1015 		ret = temp;
1016 	    }
1017 	    ret[len++] = *p++;
1018 	}
1019 	if (len >= max) {
1020 	    max *= 2;
1021 	    temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1022 	    if (temp == NULL) {
1023 		xmlGenericError(xmlGenericErrorContext,
1024 			"xmlSaveUri: out of memory\n");
1025 		xmlFree(ret);
1026 		return(NULL);
1027 	    }
1028 	    ret = temp;
1029 	}
1030 	ret[len++] = ':';
1031     }
1032     if (uri->opaque != NULL) {
1033 	p = uri->opaque;
1034 	while (*p != 0) {
1035 	    if (len + 3 >= max) {
1036 		max *= 2;
1037 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038 		if (temp == NULL) {
1039 		    xmlGenericError(xmlGenericErrorContext,
1040 			    "xmlSaveUri: out of memory\n");
1041 		    xmlFree(ret);
1042 		    return(NULL);
1043 		}
1044 		ret = temp;
1045 	    }
1046 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1047 		ret[len++] = *p++;
1048 	    else {
1049 		int val = *(unsigned char *)p++;
1050 		int hi = val / 0x10, lo = val % 0x10;
1051 		ret[len++] = '%';
1052 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1053 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1054 	    }
1055 	}
1056     } else {
1057 	if (uri->server != NULL) {
1058 	    if (len + 3 >= max) {
1059 		max *= 2;
1060 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1061 		if (temp == NULL) {
1062 		    xmlGenericError(xmlGenericErrorContext,
1063 			    "xmlSaveUri: out of memory\n");
1064                   xmlFree(ret);
1065 		    return(NULL);
1066 		}
1067 		ret = temp;
1068 	    }
1069 	    ret[len++] = '/';
1070 	    ret[len++] = '/';
1071 	    if (uri->user != NULL) {
1072 		p = uri->user;
1073 		while (*p != 0) {
1074 		    if (len + 3 >= max) {
1075 			max *= 2;
1076 			temp = (xmlChar *) xmlRealloc(ret,
1077 				(max + 1) * sizeof(xmlChar));
1078 			if (temp == NULL) {
1079 			    xmlGenericError(xmlGenericErrorContext,
1080 				    "xmlSaveUri: out of memory\n");
1081 			    xmlFree(ret);
1082 			    return(NULL);
1083 			}
1084 			ret = temp;
1085 		    }
1086 		    if ((IS_UNRESERVED(*(p))) ||
1087 			((*(p) == ';')) || ((*(p) == ':')) ||
1088 			((*(p) == '&')) || ((*(p) == '=')) ||
1089 			((*(p) == '+')) || ((*(p) == '$')) ||
1090 			((*(p) == ',')))
1091 			ret[len++] = *p++;
1092 		    else {
1093 			int val = *(unsigned char *)p++;
1094 			int hi = val / 0x10, lo = val % 0x10;
1095 			ret[len++] = '%';
1096 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1097 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1098 		    }
1099 		}
1100 		if (len + 3 >= max) {
1101 		    max *= 2;
1102 		    temp = (xmlChar *) xmlRealloc(ret,
1103 			    (max + 1) * sizeof(xmlChar));
1104 		    if (temp == NULL) {
1105 			xmlGenericError(xmlGenericErrorContext,
1106 				"xmlSaveUri: out of memory\n");
1107 			xmlFree(ret);
1108 			return(NULL);
1109 		    }
1110 		    ret = temp;
1111 		}
1112 		ret[len++] = '@';
1113 	    }
1114 	    p = uri->server;
1115 	    while (*p != 0) {
1116 		if (len >= max) {
1117 		    max *= 2;
1118 		    temp = (xmlChar *) xmlRealloc(ret,
1119 			    (max + 1) * sizeof(xmlChar));
1120 		    if (temp == NULL) {
1121 			xmlGenericError(xmlGenericErrorContext,
1122 				"xmlSaveUri: out of memory\n");
1123 			xmlFree(ret);
1124 			return(NULL);
1125 		    }
1126 		    ret = temp;
1127 		}
1128 		ret[len++] = *p++;
1129 	    }
1130 	    if (uri->port > 0) {
1131 		if (len + 10 >= max) {
1132 		    max *= 2;
1133 		    temp = (xmlChar *) xmlRealloc(ret,
1134 			    (max + 1) * sizeof(xmlChar));
1135 		    if (temp == NULL) {
1136 			xmlGenericError(xmlGenericErrorContext,
1137 				"xmlSaveUri: out of memory\n");
1138                      xmlFree(ret);
1139 			return(NULL);
1140 		    }
1141 		    ret = temp;
1142 		}
1143 		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1144 	    }
1145 	} else if (uri->authority != NULL) {
1146 	    if (len + 3 >= max) {
1147 		max *= 2;
1148 		temp = (xmlChar *) xmlRealloc(ret,
1149 			(max + 1) * sizeof(xmlChar));
1150 		if (temp == NULL) {
1151 			xmlGenericError(xmlGenericErrorContext,
1152 				"xmlSaveUri: out of memory\n");
1153                      xmlFree(ret);
1154 			return(NULL);
1155 		    }
1156 		    ret = temp;
1157 	    }
1158 	    ret[len++] = '/';
1159 	    ret[len++] = '/';
1160 	    p = uri->authority;
1161 	    while (*p != 0) {
1162 		if (len + 3 >= max) {
1163 		    max *= 2;
1164 		    temp = (xmlChar *) xmlRealloc(ret,
1165 			    (max + 1) * sizeof(xmlChar));
1166 		    if (temp == NULL) {
1167 			xmlGenericError(xmlGenericErrorContext,
1168 				"xmlSaveUri: out of memory\n");
1169                      xmlFree(ret);
1170 			return(NULL);
1171 		    }
1172 		    ret = temp;
1173 		}
1174 		if ((IS_UNRESERVED(*(p))) ||
1175                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1176                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1177                     ((*(p) == '=')) || ((*(p) == '+')))
1178 		    ret[len++] = *p++;
1179 		else {
1180 		    int val = *(unsigned char *)p++;
1181 		    int hi = val / 0x10, lo = val % 0x10;
1182 		    ret[len++] = '%';
1183 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1184 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1185 		}
1186 	    }
1187 	} else if (uri->scheme != NULL) {
1188 	    if (len + 3 >= max) {
1189 		max *= 2;
1190 		temp = (xmlChar *) xmlRealloc(ret,
1191 			(max + 1) * sizeof(xmlChar));
1192 		if (temp == NULL) {
1193 			xmlGenericError(xmlGenericErrorContext,
1194 				"xmlSaveUri: out of memory\n");
1195                      xmlFree(ret);
1196 			return(NULL);
1197 		    }
1198 		    ret = temp;
1199 	    }
1200 	    ret[len++] = '/';
1201 	    ret[len++] = '/';
1202 	}
1203 	if (uri->path != NULL) {
1204 	    p = uri->path;
1205 	    /*
1206 	     * the colon in file:///d: should not be escaped or
1207 	     * Windows accesses fail later.
1208 	     */
1209 	    if ((uri->scheme != NULL) &&
1210 		(p[0] == '/') &&
1211 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213 		(p[2] == ':') &&
1214 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215 		if (len + 3 >= max) {
1216 		    max *= 2;
1217 		    ret = (xmlChar *) xmlRealloc(ret,
1218 			    (max + 1) * sizeof(xmlChar));
1219 		    if (ret == NULL) {
1220 			xmlGenericError(xmlGenericErrorContext,
1221 				"xmlSaveUri: out of memory\n");
1222 			return(NULL);
1223 		    }
1224 		}
1225 		ret[len++] = *p++;
1226 		ret[len++] = *p++;
1227 		ret[len++] = *p++;
1228 	    }
1229 	    while (*p != 0) {
1230 		if (len + 3 >= max) {
1231 		    max *= 2;
1232 		    temp = (xmlChar *) xmlRealloc(ret,
1233 			    (max + 1) * sizeof(xmlChar));
1234 		    if (temp == NULL) {
1235 			xmlGenericError(xmlGenericErrorContext,
1236 				"xmlSaveUri: out of memory\n");
1237                      xmlFree(ret);
1238 			return(NULL);
1239 		    }
1240 		    ret = temp;
1241 		}
1242 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1243                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1244 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1245 	            ((*(p) == ',')))
1246 		    ret[len++] = *p++;
1247 		else {
1248 		    int val = *(unsigned char *)p++;
1249 		    int hi = val / 0x10, lo = val % 0x10;
1250 		    ret[len++] = '%';
1251 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1252 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1253 		}
1254 	    }
1255 	}
1256 	if (uri->query_raw != NULL) {
1257 	    if (len + 1 >= max) {
1258 		max *= 2;
1259 		temp = (xmlChar *) xmlRealloc(ret,
1260 			(max + 1) * sizeof(xmlChar));
1261 		if (temp == NULL) {
1262 			xmlGenericError(xmlGenericErrorContext,
1263 				"xmlSaveUri: out of memory\n");
1264                      xmlFree(ret);
1265 			return(NULL);
1266 		    }
1267 		    ret = temp;
1268 	    }
1269 	    ret[len++] = '?';
1270 	    p = uri->query_raw;
1271 	    while (*p != 0) {
1272 		if (len + 1 >= max) {
1273 		    max *= 2;
1274 		    temp = (xmlChar *) xmlRealloc(ret,
1275 			    (max + 1) * sizeof(xmlChar));
1276 		    if (temp == NULL) {
1277 			xmlGenericError(xmlGenericErrorContext,
1278 				"xmlSaveUri: out of memory\n");
1279                      xmlFree(ret);
1280 			return(NULL);
1281 		    }
1282 		    ret = temp;
1283 		}
1284 		ret[len++] = *p++;
1285 	    }
1286 	} else if (uri->query != NULL) {
1287 	    if (len + 3 >= max) {
1288 		max *= 2;
1289 		temp = (xmlChar *) xmlRealloc(ret,
1290 			(max + 1) * sizeof(xmlChar));
1291 		if (temp == NULL) {
1292 			xmlGenericError(xmlGenericErrorContext,
1293 				"xmlSaveUri: out of memory\n");
1294                      xmlFree(ret);
1295 			return(NULL);
1296 		    }
1297 		    ret = temp;
1298 	    }
1299 	    ret[len++] = '?';
1300 	    p = uri->query;
1301 	    while (*p != 0) {
1302 		if (len + 3 >= max) {
1303 		    max *= 2;
1304 		    temp = (xmlChar *) xmlRealloc(ret,
1305 			    (max + 1) * sizeof(xmlChar));
1306 		    if (temp == NULL) {
1307 			xmlGenericError(xmlGenericErrorContext,
1308 				"xmlSaveUri: out of memory\n");
1309                      xmlFree(ret);
1310 			return(NULL);
1311 		    }
1312 		    ret = temp;
1313 		}
1314 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1315 		    ret[len++] = *p++;
1316 		else {
1317 		    int val = *(unsigned char *)p++;
1318 		    int hi = val / 0x10, lo = val % 0x10;
1319 		    ret[len++] = '%';
1320 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1321 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1322 		}
1323 	    }
1324 	}
1325     }
1326     if (uri->fragment != NULL) {
1327 	if (len + 3 >= max) {
1328 	    max *= 2;
1329 	    temp = (xmlChar *) xmlRealloc(ret,
1330 		    (max + 1) * sizeof(xmlChar));
1331 	    if (temp == NULL) {
1332 			xmlGenericError(xmlGenericErrorContext,
1333 				"xmlSaveUri: out of memory\n");
1334                      xmlFree(ret);
1335 			return(NULL);
1336 		    }
1337 		    ret = temp;
1338 	}
1339 	ret[len++] = '#';
1340 	p = uri->fragment;
1341 	while (*p != 0) {
1342 	    if (len + 3 >= max) {
1343 		max *= 2;
1344 		temp = (xmlChar *) xmlRealloc(ret,
1345 			(max + 1) * sizeof(xmlChar));
1346 		if (temp == NULL) {
1347 			xmlGenericError(xmlGenericErrorContext,
1348 				"xmlSaveUri: out of memory\n");
1349                      xmlFree(ret);
1350 			return(NULL);
1351 		    }
1352 		    ret = temp;
1353 	    }
1354 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1355 		ret[len++] = *p++;
1356 	    else {
1357 		int val = *(unsigned char *)p++;
1358 		int hi = val / 0x10, lo = val % 0x10;
1359 		ret[len++] = '%';
1360 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1361 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1362 	    }
1363 	}
1364     }
1365     if (len >= max) {
1366 	max *= 2;
1367 	temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1368 	if (temp == NULL) {
1369 			xmlGenericError(xmlGenericErrorContext,
1370 				"xmlSaveUri: out of memory\n");
1371                      xmlFree(ret);
1372 			return(NULL);
1373 		    }
1374 		    ret = temp;
1375     }
1376     ret[len++] = 0;
1377     return(ret);
1378 }
1379 
1380 /**
1381  * xmlPrintURI:
1382  * @stream:  a FILE* for the output
1383  * @uri:  pointer to an xmlURI
1384  *
1385  * Prints the URI in the stream @stream.
1386  */
1387 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1388 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1389     xmlChar *out;
1390 
1391     out = xmlSaveUri(uri);
1392     if (out != NULL) {
1393 	fprintf(stream, "%s", (char *) out);
1394 	xmlFree(out);
1395     }
1396 }
1397 
1398 /**
1399  * xmlCleanURI:
1400  * @uri:  pointer to an xmlURI
1401  *
1402  * Make sure the xmlURI struct is free of content
1403  */
1404 static void
xmlCleanURI(xmlURIPtr uri)1405 xmlCleanURI(xmlURIPtr uri) {
1406     if (uri == NULL) return;
1407 
1408     if (uri->scheme != NULL) xmlFree(uri->scheme);
1409     uri->scheme = NULL;
1410     if (uri->server != NULL) xmlFree(uri->server);
1411     uri->server = NULL;
1412     if (uri->user != NULL) xmlFree(uri->user);
1413     uri->user = NULL;
1414     if (uri->path != NULL) xmlFree(uri->path);
1415     uri->path = NULL;
1416     if (uri->fragment != NULL) xmlFree(uri->fragment);
1417     uri->fragment = NULL;
1418     if (uri->opaque != NULL) xmlFree(uri->opaque);
1419     uri->opaque = NULL;
1420     if (uri->authority != NULL) xmlFree(uri->authority);
1421     uri->authority = NULL;
1422     if (uri->query != NULL) xmlFree(uri->query);
1423     uri->query = NULL;
1424     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1425     uri->query_raw = NULL;
1426 }
1427 
1428 /**
1429  * xmlFreeURI:
1430  * @uri:  pointer to an xmlURI
1431  *
1432  * Free up the xmlURI struct
1433  */
1434 void
xmlFreeURI(xmlURIPtr uri)1435 xmlFreeURI(xmlURIPtr uri) {
1436     if (uri == NULL) return;
1437 
1438     if (uri->scheme != NULL) xmlFree(uri->scheme);
1439     if (uri->server != NULL) xmlFree(uri->server);
1440     if (uri->user != NULL) xmlFree(uri->user);
1441     if (uri->path != NULL) xmlFree(uri->path);
1442     if (uri->fragment != NULL) xmlFree(uri->fragment);
1443     if (uri->opaque != NULL) xmlFree(uri->opaque);
1444     if (uri->authority != NULL) xmlFree(uri->authority);
1445     if (uri->query != NULL) xmlFree(uri->query);
1446     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1447     xmlFree(uri);
1448 }
1449 
1450 /************************************************************************
1451  *									*
1452  *			Helper functions				*
1453  *									*
1454  ************************************************************************/
1455 
1456 /**
1457  * xmlNormalizeURIPath:
1458  * @path:  pointer to the path string
1459  *
1460  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1461  * Section 5.2, steps 6.c through 6.g.
1462  *
1463  * Normalization occurs directly on the string, no new allocation is done
1464  *
1465  * Returns 0 or an error code
1466  */
1467 int
xmlNormalizeURIPath(char * path)1468 xmlNormalizeURIPath(char *path) {
1469     char *cur, *out;
1470 
1471     if (path == NULL)
1472 	return(-1);
1473 
1474     /* Skip all initial "/" chars.  We want to get to the beginning of the
1475      * first non-empty segment.
1476      */
1477     cur = path;
1478     while (cur[0] == '/')
1479       ++cur;
1480     if (cur[0] == '\0')
1481       return(0);
1482 
1483     /* Keep everything we've seen so far.  */
1484     out = cur;
1485 
1486     /*
1487      * Analyze each segment in sequence for cases (c) and (d).
1488      */
1489     while (cur[0] != '\0') {
1490 	/*
1491 	 * c) All occurrences of "./", where "." is a complete path segment,
1492 	 *    are removed from the buffer string.
1493 	 */
1494 	if ((cur[0] == '.') && (cur[1] == '/')) {
1495 	    cur += 2;
1496 	    /* '//' normalization should be done at this point too */
1497 	    while (cur[0] == '/')
1498 		cur++;
1499 	    continue;
1500 	}
1501 
1502 	/*
1503 	 * d) If the buffer string ends with "." as a complete path segment,
1504 	 *    that "." is removed.
1505 	 */
1506 	if ((cur[0] == '.') && (cur[1] == '\0'))
1507 	    break;
1508 
1509 	/* Otherwise keep the segment.  */
1510 	while (cur[0] != '/') {
1511             if (cur[0] == '\0')
1512               goto done_cd;
1513 	    (out++)[0] = (cur++)[0];
1514 	}
1515 	/* nomalize // */
1516 	while ((cur[0] == '/') && (cur[1] == '/'))
1517 	    cur++;
1518 
1519         (out++)[0] = (cur++)[0];
1520     }
1521  done_cd:
1522     out[0] = '\0';
1523 
1524     /* Reset to the beginning of the first segment for the next sequence.  */
1525     cur = path;
1526     while (cur[0] == '/')
1527       ++cur;
1528     if (cur[0] == '\0')
1529 	return(0);
1530 
1531     /*
1532      * Analyze each segment in sequence for cases (e) and (f).
1533      *
1534      * e) All occurrences of "<segment>/../", where <segment> is a
1535      *    complete path segment not equal to "..", are removed from the
1536      *    buffer string.  Removal of these path segments is performed
1537      *    iteratively, removing the leftmost matching pattern on each
1538      *    iteration, until no matching pattern remains.
1539      *
1540      * f) If the buffer string ends with "<segment>/..", where <segment>
1541      *    is a complete path segment not equal to "..", that
1542      *    "<segment>/.." is removed.
1543      *
1544      * To satisfy the "iterative" clause in (e), we need to collapse the
1545      * string every time we find something that needs to be removed.  Thus,
1546      * we don't need to keep two pointers into the string: we only need a
1547      * "current position" pointer.
1548      */
1549     while (1) {
1550         char *segp, *tmp;
1551 
1552         /* At the beginning of each iteration of this loop, "cur" points to
1553          * the first character of the segment we want to examine.
1554          */
1555 
1556         /* Find the end of the current segment.  */
1557         segp = cur;
1558         while ((segp[0] != '/') && (segp[0] != '\0'))
1559           ++segp;
1560 
1561         /* If this is the last segment, we're done (we need at least two
1562          * segments to meet the criteria for the (e) and (f) cases).
1563          */
1564         if (segp[0] == '\0')
1565           break;
1566 
1567         /* If the first segment is "..", or if the next segment _isn't_ "..",
1568          * keep this segment and try the next one.
1569          */
1570         ++segp;
1571         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1572             || ((segp[0] != '.') || (segp[1] != '.')
1573                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1574           cur = segp;
1575           continue;
1576         }
1577 
1578         /* If we get here, remove this segment and the next one and back up
1579          * to the previous segment (if there is one), to implement the
1580          * "iteratively" clause.  It's pretty much impossible to back up
1581          * while maintaining two pointers into the buffer, so just compact
1582          * the whole buffer now.
1583          */
1584 
1585         /* If this is the end of the buffer, we're done.  */
1586         if (segp[2] == '\0') {
1587           cur[0] = '\0';
1588           break;
1589         }
1590         /* Valgrind complained, strcpy(cur, segp + 3); */
1591 	/* string will overlap, do not use strcpy */
1592 	tmp = cur;
1593 	segp += 3;
1594 	while ((*tmp++ = *segp++) != 0);
1595 
1596         /* If there are no previous segments, then keep going from here.  */
1597         segp = cur;
1598         while ((segp > path) && ((--segp)[0] == '/'))
1599           ;
1600         if (segp == path)
1601           continue;
1602 
1603         /* "segp" is pointing to the end of a previous segment; find it's
1604          * start.  We need to back up to the previous segment and start
1605          * over with that to handle things like "foo/bar/../..".  If we
1606          * don't do this, then on the first pass we'll remove the "bar/..",
1607          * but be pointing at the second ".." so we won't realize we can also
1608          * remove the "foo/..".
1609          */
1610         cur = segp;
1611         while ((cur > path) && (cur[-1] != '/'))
1612           --cur;
1613     }
1614     out[0] = '\0';
1615 
1616     /*
1617      * g) If the resulting buffer string still begins with one or more
1618      *    complete path segments of "..", then the reference is
1619      *    considered to be in error. Implementations may handle this
1620      *    error by retaining these components in the resolved path (i.e.,
1621      *    treating them as part of the final URI), by removing them from
1622      *    the resolved path (i.e., discarding relative levels above the
1623      *    root), or by avoiding traversal of the reference.
1624      *
1625      * We discard them from the final path.
1626      */
1627     if (path[0] == '/') {
1628       cur = path;
1629       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1630              && ((cur[3] == '/') || (cur[3] == '\0')))
1631 	cur += 3;
1632 
1633       if (cur != path) {
1634 	out = path;
1635 	while (cur[0] != '\0')
1636           (out++)[0] = (cur++)[0];
1637 	out[0] = 0;
1638       }
1639     }
1640 
1641     return(0);
1642 }
1643 
is_hex(char c)1644 static int is_hex(char c) {
1645     if (((c >= '0') && (c <= '9')) ||
1646         ((c >= 'a') && (c <= 'f')) ||
1647         ((c >= 'A') && (c <= 'F')))
1648 	return(1);
1649     return(0);
1650 }
1651 
1652 /**
1653  * xmlURIUnescapeString:
1654  * @str:  the string to unescape
1655  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1656  * @target:  optional destination buffer
1657  *
1658  * Unescaping routine, but does not check that the string is an URI. The
1659  * output is a direct unsigned char translation of %XX values (no encoding)
1660  * Note that the length of the result can only be smaller or same size as
1661  * the input string.
1662  *
1663  * Returns a copy of the string, but unescaped, will return NULL only in case
1664  * of error
1665  */
1666 char *
xmlURIUnescapeString(const char * str,int len,char * target)1667 xmlURIUnescapeString(const char *str, int len, char *target) {
1668     char *ret, *out;
1669     const char *in;
1670 
1671     if (str == NULL)
1672 	return(NULL);
1673     if (len <= 0) len = strlen(str);
1674     if (len < 0) return(NULL);
1675 
1676     if (target == NULL) {
1677 	ret = (char *) xmlMallocAtomic(len + 1);
1678 	if (ret == NULL) {
1679 	    xmlGenericError(xmlGenericErrorContext,
1680 		    "xmlURIUnescapeString: out of memory\n");
1681 	    return(NULL);
1682 	}
1683     } else
1684 	ret = target;
1685     in = str;
1686     out = ret;
1687     while(len > 0) {
1688 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1689 	    in++;
1690 	    if ((*in >= '0') && (*in <= '9'))
1691 	        *out = (*in - '0');
1692 	    else if ((*in >= 'a') && (*in <= 'f'))
1693 	        *out = (*in - 'a') + 10;
1694 	    else if ((*in >= 'A') && (*in <= 'F'))
1695 	        *out = (*in - 'A') + 10;
1696 	    in++;
1697 	    if ((*in >= '0') && (*in <= '9'))
1698 	        *out = *out * 16 + (*in - '0');
1699 	    else if ((*in >= 'a') && (*in <= 'f'))
1700 	        *out = *out * 16 + (*in - 'a') + 10;
1701 	    else if ((*in >= 'A') && (*in <= 'F'))
1702 	        *out = *out * 16 + (*in - 'A') + 10;
1703 	    in++;
1704 	    len -= 3;
1705 	    out++;
1706 	} else {
1707 	    *out++ = *in++;
1708 	    len--;
1709 	}
1710     }
1711     *out = 0;
1712     return(ret);
1713 }
1714 
1715 /**
1716  * xmlURIEscapeStr:
1717  * @str:  string to escape
1718  * @list: exception list string of chars not to escape
1719  *
1720  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1721  * and the characters in the exception list.
1722  *
1723  * Returns a new escaped string or NULL in case of error.
1724  */
1725 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1726 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1727     xmlChar *ret, ch;
1728     xmlChar *temp;
1729     const xmlChar *in;
1730 
1731     unsigned int len, out;
1732 
1733     if (str == NULL)
1734 	return(NULL);
1735     if (str[0] == 0)
1736 	return(xmlStrdup(str));
1737     len = xmlStrlen(str);
1738     if (!(len > 0)) return(NULL);
1739 
1740     len += 20;
1741     ret = (xmlChar *) xmlMallocAtomic(len);
1742     if (ret == NULL) {
1743 	xmlGenericError(xmlGenericErrorContext,
1744 		"xmlURIEscapeStr: out of memory\n");
1745 	return(NULL);
1746     }
1747     in = (const xmlChar *) str;
1748     out = 0;
1749     while(*in != 0) {
1750 	if (len - out <= 3) {
1751 	    len += 20;
1752 	    temp = (xmlChar *) xmlRealloc(ret, len);
1753 	    if (temp == NULL) {
1754 		xmlGenericError(xmlGenericErrorContext,
1755 			"xmlURIEscapeStr: out of memory\n");
1756 		xmlFree(ret);
1757 		return(NULL);
1758 	    }
1759 	    ret = temp;
1760 	}
1761 
1762 	ch = *in;
1763 
1764 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1765 	    unsigned char val;
1766 	    ret[out++] = '%';
1767 	    val = ch >> 4;
1768 	    if (val <= 9)
1769 		ret[out++] = '0' + val;
1770 	    else
1771 		ret[out++] = 'A' + val - 0xA;
1772 	    val = ch & 0xF;
1773 	    if (val <= 9)
1774 		ret[out++] = '0' + val;
1775 	    else
1776 		ret[out++] = 'A' + val - 0xA;
1777 	    in++;
1778 	} else {
1779 	    ret[out++] = *in++;
1780 	}
1781 
1782     }
1783     ret[out] = 0;
1784     return(ret);
1785 }
1786 
1787 /**
1788  * xmlURIEscape:
1789  * @str:  the string of the URI to escape
1790  *
1791  * Escaping routine, does not do validity checks !
1792  * It will try to escape the chars needing this, but this is heuristic
1793  * based it's impossible to be sure.
1794  *
1795  * Returns an copy of the string, but escaped
1796  *
1797  * 25 May 2001
1798  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1799  * according to RFC2396.
1800  *   - Carl Douglas
1801  */
1802 xmlChar *
xmlURIEscape(const xmlChar * str)1803 xmlURIEscape(const xmlChar * str)
1804 {
1805     xmlChar *ret, *segment = NULL;
1806     xmlURIPtr uri;
1807     int ret2;
1808 
1809 #define NULLCHK(p) if(!p) { \
1810                    xmlGenericError(xmlGenericErrorContext, \
1811                         "xmlURIEscape: out of memory\n"); \
1812                         xmlFreeURI(uri); \
1813                         return NULL; } \
1814 
1815     if (str == NULL)
1816         return (NULL);
1817 
1818     uri = xmlCreateURI();
1819     if (uri != NULL) {
1820 	/*
1821 	 * Allow escaping errors in the unescaped form
1822 	 */
1823         uri->cleanup = 1;
1824         ret2 = xmlParseURIReference(uri, (const char *)str);
1825         if (ret2) {
1826             xmlFreeURI(uri);
1827             return (NULL);
1828         }
1829     }
1830 
1831     if (!uri)
1832         return NULL;
1833 
1834     ret = NULL;
1835 
1836     if (uri->scheme) {
1837         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1838         NULLCHK(segment)
1839         ret = xmlStrcat(ret, segment);
1840         ret = xmlStrcat(ret, BAD_CAST ":");
1841         xmlFree(segment);
1842     }
1843 
1844     if (uri->authority) {
1845         segment =
1846             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1847         NULLCHK(segment)
1848         ret = xmlStrcat(ret, BAD_CAST "//");
1849         ret = xmlStrcat(ret, segment);
1850         xmlFree(segment);
1851     }
1852 
1853     if (uri->user) {
1854         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1855         NULLCHK(segment)
1856 		ret = xmlStrcat(ret,BAD_CAST "//");
1857         ret = xmlStrcat(ret, segment);
1858         ret = xmlStrcat(ret, BAD_CAST "@");
1859         xmlFree(segment);
1860     }
1861 
1862     if (uri->server) {
1863         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1864         NULLCHK(segment)
1865 		if (uri->user == NULL)
1866 		ret = xmlStrcat(ret, BAD_CAST "//");
1867         ret = xmlStrcat(ret, segment);
1868         xmlFree(segment);
1869     }
1870 
1871     if (uri->port) {
1872         xmlChar port[10];
1873 
1874         snprintf((char *) port, 10, "%d", uri->port);
1875         ret = xmlStrcat(ret, BAD_CAST ":");
1876         ret = xmlStrcat(ret, port);
1877     }
1878 
1879     if (uri->path) {
1880         segment =
1881             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1882         NULLCHK(segment)
1883         ret = xmlStrcat(ret, segment);
1884         xmlFree(segment);
1885     }
1886 
1887     if (uri->query_raw) {
1888         ret = xmlStrcat(ret, BAD_CAST "?");
1889         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1890     }
1891     else if (uri->query) {
1892         segment =
1893             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1894         NULLCHK(segment)
1895         ret = xmlStrcat(ret, BAD_CAST "?");
1896         ret = xmlStrcat(ret, segment);
1897         xmlFree(segment);
1898     }
1899 
1900     if (uri->opaque) {
1901         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1902         NULLCHK(segment)
1903         ret = xmlStrcat(ret, segment);
1904         xmlFree(segment);
1905     }
1906 
1907     if (uri->fragment) {
1908         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1909         NULLCHK(segment)
1910         ret = xmlStrcat(ret, BAD_CAST "#");
1911         ret = xmlStrcat(ret, segment);
1912         xmlFree(segment);
1913     }
1914 
1915     xmlFreeURI(uri);
1916 #undef NULLCHK
1917 
1918     return (ret);
1919 }
1920 
1921 /************************************************************************
1922  *									*
1923  *			Public functions				*
1924  *									*
1925  ************************************************************************/
1926 
1927 /**
1928  * xmlBuildURI:
1929  * @URI:  the URI instance found in the document
1930  * @base:  the base value
1931  *
1932  * Computes he final URI of the reference done by checking that
1933  * the given URI is valid, and building the final URI using the
1934  * base URI. This is processed according to section 5.2 of the
1935  * RFC 2396
1936  *
1937  * 5.2. Resolving Relative References to Absolute Form
1938  *
1939  * Returns a new URI string (to be freed by the caller) or NULL in case
1940  *         of error.
1941  */
1942 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1943 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1944     xmlChar *val = NULL;
1945     int ret, len, indx, cur, out;
1946     xmlURIPtr ref = NULL;
1947     xmlURIPtr bas = NULL;
1948     xmlURIPtr res = NULL;
1949 
1950     /*
1951      * 1) The URI reference is parsed into the potential four components and
1952      *    fragment identifier, as described in Section 4.3.
1953      *
1954      *    NOTE that a completely empty URI is treated by modern browsers
1955      *    as a reference to "." rather than as a synonym for the current
1956      *    URI.  Should we do that here?
1957      */
1958     if (URI == NULL)
1959 	ret = -1;
1960     else {
1961 	if (*URI) {
1962 	    ref = xmlCreateURI();
1963 	    if (ref == NULL)
1964 		goto done;
1965 	    ret = xmlParseURIReference(ref, (const char *) URI);
1966 	}
1967 	else
1968 	    ret = 0;
1969     }
1970     if (ret != 0)
1971 	goto done;
1972     if ((ref != NULL) && (ref->scheme != NULL)) {
1973 	/*
1974 	 * The URI is absolute don't modify.
1975 	 */
1976 	val = xmlStrdup(URI);
1977 	goto done;
1978     }
1979     if (base == NULL)
1980 	ret = -1;
1981     else {
1982 	bas = xmlCreateURI();
1983 	if (bas == NULL)
1984 	    goto done;
1985 	ret = xmlParseURIReference(bas, (const char *) base);
1986     }
1987     if (ret != 0) {
1988 	if (ref)
1989 	    val = xmlSaveUri(ref);
1990 	goto done;
1991     }
1992     if (ref == NULL) {
1993 	/*
1994 	 * the base fragment must be ignored
1995 	 */
1996 	if (bas->fragment != NULL) {
1997 	    xmlFree(bas->fragment);
1998 	    bas->fragment = NULL;
1999 	}
2000 	val = xmlSaveUri(bas);
2001 	goto done;
2002     }
2003 
2004     /*
2005      * 2) If the path component is empty and the scheme, authority, and
2006      *    query components are undefined, then it is a reference to the
2007      *    current document and we are done.  Otherwise, the reference URI's
2008      *    query and fragment components are defined as found (or not found)
2009      *    within the URI reference and not inherited from the base URI.
2010      *
2011      *    NOTE that in modern browsers, the parsing differs from the above
2012      *    in the following aspect:  the query component is allowed to be
2013      *    defined while still treating this as a reference to the current
2014      *    document.
2015      */
2016     res = xmlCreateURI();
2017     if (res == NULL)
2018 	goto done;
2019     if ((ref->scheme == NULL) && (ref->path == NULL) &&
2020 	((ref->authority == NULL) && (ref->server == NULL))) {
2021 	if (bas->scheme != NULL)
2022 	    res->scheme = xmlMemStrdup(bas->scheme);
2023 	if (bas->authority != NULL)
2024 	    res->authority = xmlMemStrdup(bas->authority);
2025 	else if (bas->server != NULL) {
2026 	    res->server = xmlMemStrdup(bas->server);
2027 	    if (bas->user != NULL)
2028 		res->user = xmlMemStrdup(bas->user);
2029 	    res->port = bas->port;
2030 	}
2031 	if (bas->path != NULL)
2032 	    res->path = xmlMemStrdup(bas->path);
2033 	if (ref->query_raw != NULL)
2034 	    res->query_raw = xmlMemStrdup (ref->query_raw);
2035 	else if (ref->query != NULL)
2036 	    res->query = xmlMemStrdup(ref->query);
2037 	else if (bas->query_raw != NULL)
2038 	    res->query_raw = xmlMemStrdup(bas->query_raw);
2039 	else if (bas->query != NULL)
2040 	    res->query = xmlMemStrdup(bas->query);
2041 	if (ref->fragment != NULL)
2042 	    res->fragment = xmlMemStrdup(ref->fragment);
2043 	goto step_7;
2044     }
2045 
2046     /*
2047      * 3) If the scheme component is defined, indicating that the reference
2048      *    starts with a scheme name, then the reference is interpreted as an
2049      *    absolute URI and we are done.  Otherwise, the reference URI's
2050      *    scheme is inherited from the base URI's scheme component.
2051      */
2052     if (ref->scheme != NULL) {
2053 	val = xmlSaveUri(ref);
2054 	goto done;
2055     }
2056     if (bas->scheme != NULL)
2057 	res->scheme = xmlMemStrdup(bas->scheme);
2058 
2059     if (ref->query_raw != NULL)
2060 	res->query_raw = xmlMemStrdup(ref->query_raw);
2061     else if (ref->query != NULL)
2062 	res->query = xmlMemStrdup(ref->query);
2063     if (ref->fragment != NULL)
2064 	res->fragment = xmlMemStrdup(ref->fragment);
2065 
2066     /*
2067      * 4) If the authority component is defined, then the reference is a
2068      *    network-path and we skip to step 7.  Otherwise, the reference
2069      *    URI's authority is inherited from the base URI's authority
2070      *    component, which will also be undefined if the URI scheme does not
2071      *    use an authority component.
2072      */
2073     if ((ref->authority != NULL) || (ref->server != NULL)) {
2074 	if (ref->authority != NULL)
2075 	    res->authority = xmlMemStrdup(ref->authority);
2076 	else {
2077 	    res->server = xmlMemStrdup(ref->server);
2078 	    if (ref->user != NULL)
2079 		res->user = xmlMemStrdup(ref->user);
2080             res->port = ref->port;
2081 	}
2082 	if (ref->path != NULL)
2083 	    res->path = xmlMemStrdup(ref->path);
2084 	goto step_7;
2085     }
2086     if (bas->authority != NULL)
2087 	res->authority = xmlMemStrdup(bas->authority);
2088     else if (bas->server != NULL) {
2089 	res->server = xmlMemStrdup(bas->server);
2090 	if (bas->user != NULL)
2091 	    res->user = xmlMemStrdup(bas->user);
2092 	res->port = bas->port;
2093     }
2094 
2095     /*
2096      * 5) If the path component begins with a slash character ("/"), then
2097      *    the reference is an absolute-path and we skip to step 7.
2098      */
2099     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2100 	res->path = xmlMemStrdup(ref->path);
2101 	goto step_7;
2102     }
2103 
2104 
2105     /*
2106      * 6) If this step is reached, then we are resolving a relative-path
2107      *    reference.  The relative path needs to be merged with the base
2108      *    URI's path.  Although there are many ways to do this, we will
2109      *    describe a simple method using a separate string buffer.
2110      *
2111      * Allocate a buffer large enough for the result string.
2112      */
2113     len = 2; /* extra / and 0 */
2114     if (ref->path != NULL)
2115 	len += strlen(ref->path);
2116     if (bas->path != NULL)
2117 	len += strlen(bas->path);
2118     res->path = (char *) xmlMallocAtomic(len);
2119     if (res->path == NULL) {
2120 	xmlGenericError(xmlGenericErrorContext,
2121 		"xmlBuildURI: out of memory\n");
2122 	goto done;
2123     }
2124     res->path[0] = 0;
2125 
2126     /*
2127      * a) All but the last segment of the base URI's path component is
2128      *    copied to the buffer.  In other words, any characters after the
2129      *    last (right-most) slash character, if any, are excluded.
2130      */
2131     cur = 0;
2132     out = 0;
2133     if (bas->path != NULL) {
2134 	while (bas->path[cur] != 0) {
2135 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2136 		cur++;
2137 	    if (bas->path[cur] == 0)
2138 		break;
2139 
2140 	    cur++;
2141 	    while (out < cur) {
2142 		res->path[out] = bas->path[out];
2143 		out++;
2144 	    }
2145 	}
2146     }
2147     res->path[out] = 0;
2148 
2149     /*
2150      * b) The reference's path component is appended to the buffer
2151      *    string.
2152      */
2153     if (ref->path != NULL && ref->path[0] != 0) {
2154 	indx = 0;
2155 	/*
2156 	 * Ensure the path includes a '/'
2157 	 */
2158 	if ((out == 0) && (bas->server != NULL))
2159 	    res->path[out++] = '/';
2160 	while (ref->path[indx] != 0) {
2161 	    res->path[out++] = ref->path[indx++];
2162 	}
2163     }
2164     res->path[out] = 0;
2165 
2166     /*
2167      * Steps c) to h) are really path normalization steps
2168      */
2169     xmlNormalizeURIPath(res->path);
2170 
2171 step_7:
2172 
2173     /*
2174      * 7) The resulting URI components, including any inherited from the
2175      *    base URI, are recombined to give the absolute form of the URI
2176      *    reference.
2177      */
2178     val = xmlSaveUri(res);
2179 
2180 done:
2181     if (ref != NULL)
2182 	xmlFreeURI(ref);
2183     if (bas != NULL)
2184 	xmlFreeURI(bas);
2185     if (res != NULL)
2186 	xmlFreeURI(res);
2187     return(val);
2188 }
2189 
2190 /**
2191  * xmlBuildRelativeURI:
2192  * @URI:  the URI reference under consideration
2193  * @base:  the base value
2194  *
2195  * Expresses the URI of the reference in terms relative to the
2196  * base.  Some examples of this operation include:
2197  *     base = "http://site1.com/docs/book1.html"
2198  *        URI input                        URI returned
2199  *     docs/pic1.gif                    pic1.gif
2200  *     docs/img/pic1.gif                img/pic1.gif
2201  *     img/pic1.gif                     ../img/pic1.gif
2202  *     http://site1.com/docs/pic1.gif   pic1.gif
2203  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2204  *
2205  *     base = "docs/book1.html"
2206  *        URI input                        URI returned
2207  *     docs/pic1.gif                    pic1.gif
2208  *     docs/img/pic1.gif                img/pic1.gif
2209  *     img/pic1.gif                     ../img/pic1.gif
2210  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2211  *
2212  *
2213  * Note: if the URI reference is really wierd or complicated, it may be
2214  *       worthwhile to first convert it into a "nice" one by calling
2215  *       xmlBuildURI (using 'base') before calling this routine,
2216  *       since this routine (for reasonable efficiency) assumes URI has
2217  *       already been through some validation.
2218  *
2219  * Returns a new URI string (to be freed by the caller) or NULL in case
2220  * error.
2221  */
2222 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2223 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2224 {
2225     xmlChar *val = NULL;
2226     int ret;
2227     int ix;
2228     int pos = 0;
2229     int nbslash = 0;
2230     int len;
2231     xmlURIPtr ref = NULL;
2232     xmlURIPtr bas = NULL;
2233     xmlChar *bptr, *uptr, *vptr;
2234     int remove_path = 0;
2235 
2236     if ((URI == NULL) || (*URI == 0))
2237 	return NULL;
2238 
2239     /*
2240      * First parse URI into a standard form
2241      */
2242     ref = xmlCreateURI ();
2243     if (ref == NULL)
2244 	return NULL;
2245     /* If URI not already in "relative" form */
2246     if (URI[0] != '.') {
2247 	ret = xmlParseURIReference (ref, (const char *) URI);
2248 	if (ret != 0)
2249 	    goto done;		/* Error in URI, return NULL */
2250     } else
2251 	ref->path = (char *)xmlStrdup(URI);
2252 
2253     /*
2254      * Next parse base into the same standard form
2255      */
2256     if ((base == NULL) || (*base == 0)) {
2257 	val = xmlStrdup (URI);
2258 	goto done;
2259     }
2260     bas = xmlCreateURI ();
2261     if (bas == NULL)
2262 	goto done;
2263     if (base[0] != '.') {
2264 	ret = xmlParseURIReference (bas, (const char *) base);
2265 	if (ret != 0)
2266 	    goto done;		/* Error in base, return NULL */
2267     } else
2268 	bas->path = (char *)xmlStrdup(base);
2269 
2270     /*
2271      * If the scheme / server on the URI differs from the base,
2272      * just return the URI
2273      */
2274     if ((ref->scheme != NULL) &&
2275 	((bas->scheme == NULL) ||
2276 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2277 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2278 	val = xmlStrdup (URI);
2279 	goto done;
2280     }
2281     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2282 	val = xmlStrdup(BAD_CAST "");
2283 	goto done;
2284     }
2285     if (bas->path == NULL) {
2286 	val = xmlStrdup((xmlChar *)ref->path);
2287 	goto done;
2288     }
2289     if (ref->path == NULL) {
2290         ref->path = (char *) "/";
2291 	remove_path = 1;
2292     }
2293 
2294     /*
2295      * At this point (at last!) we can compare the two paths
2296      *
2297      * First we take care of the special case where either of the
2298      * two path components may be missing (bug 316224)
2299      */
2300     if (bas->path == NULL) {
2301 	if (ref->path != NULL) {
2302 	    uptr = (xmlChar *) ref->path;
2303 	    if (*uptr == '/')
2304 		uptr++;
2305 	    /* exception characters from xmlSaveUri */
2306 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307 	}
2308 	goto done;
2309     }
2310     bptr = (xmlChar *)bas->path;
2311     if (ref->path == NULL) {
2312 	for (ix = 0; bptr[ix] != 0; ix++) {
2313 	    if (bptr[ix] == '/')
2314 		nbslash++;
2315 	}
2316 	uptr = NULL;
2317 	len = 1;	/* this is for a string terminator only */
2318     } else {
2319     /*
2320      * Next we compare the two strings and find where they first differ
2321      */
2322 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2323             pos += 2;
2324 	if ((*bptr == '.') && (bptr[1] == '/'))
2325             bptr += 2;
2326 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2327 	    bptr++;
2328 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2329 	    pos++;
2330 
2331 	if (bptr[pos] == ref->path[pos]) {
2332 	    val = xmlStrdup(BAD_CAST "");
2333 	    goto done;		/* (I can't imagine why anyone would do this) */
2334 	}
2335 
2336 	/*
2337 	 * In URI, "back up" to the last '/' encountered.  This will be the
2338 	 * beginning of the "unique" suffix of URI
2339 	 */
2340 	ix = pos;
2341 	if ((ref->path[ix] == '/') && (ix > 0))
2342 	    ix--;
2343 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2344 	    ix -= 2;
2345 	for (; ix > 0; ix--) {
2346 	    if (ref->path[ix] == '/')
2347 		break;
2348 	}
2349 	if (ix == 0) {
2350 	    uptr = (xmlChar *)ref->path;
2351 	} else {
2352 	    ix++;
2353 	    uptr = (xmlChar *)&ref->path[ix];
2354 	}
2355 
2356 	/*
2357 	 * In base, count the number of '/' from the differing point
2358 	 */
2359 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2360 	    for (; bptr[ix] != 0; ix++) {
2361 		if (bptr[ix] == '/')
2362 		    nbslash++;
2363 	    }
2364 	}
2365 	len = xmlStrlen (uptr) + 1;
2366     }
2367 
2368     if (nbslash == 0) {
2369 	if (uptr != NULL)
2370 	    /* exception characters from xmlSaveUri */
2371 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2372 	goto done;
2373     }
2374 
2375     /*
2376      * Allocate just enough space for the returned string -
2377      * length of the remainder of the URI, plus enough space
2378      * for the "../" groups, plus one for the terminator
2379      */
2380     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2381     if (val == NULL) {
2382 	xmlGenericError(xmlGenericErrorContext,
2383 		"xmlBuildRelativeURI: out of memory\n");
2384 	goto done;
2385     }
2386     vptr = val;
2387     /*
2388      * Put in as many "../" as needed
2389      */
2390     for (; nbslash>0; nbslash--) {
2391 	*vptr++ = '.';
2392 	*vptr++ = '.';
2393 	*vptr++ = '/';
2394     }
2395     /*
2396      * Finish up with the end of the URI
2397      */
2398     if (uptr != NULL) {
2399         if ((vptr > val) && (len > 0) &&
2400 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2401 	    memcpy (vptr, uptr + 1, len - 1);
2402 	    vptr[len - 2] = 0;
2403 	} else {
2404 	    memcpy (vptr, uptr, len);
2405 	    vptr[len - 1] = 0;
2406 	}
2407     } else {
2408 	vptr[len - 1] = 0;
2409     }
2410 
2411     /* escape the freshly-built path */
2412     vptr = val;
2413 	/* exception characters from xmlSaveUri */
2414     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2415     xmlFree(vptr);
2416 
2417 done:
2418     /*
2419      * Free the working variables
2420      */
2421     if (remove_path != 0)
2422         ref->path = NULL;
2423     if (ref != NULL)
2424 	xmlFreeURI (ref);
2425     if (bas != NULL)
2426 	xmlFreeURI (bas);
2427 
2428     return val;
2429 }
2430 
2431 /**
2432  * xmlCanonicPath:
2433  * @path:  the resource locator in a filesystem notation
2434  *
2435  * Constructs a canonic path from the specified path.
2436  *
2437  * Returns a new canonic path, or a duplicate of the path parameter if the
2438  * construction fails. The caller is responsible for freeing the memory occupied
2439  * by the returned string. If there is insufficient memory available, or the
2440  * argument is NULL, the function returns NULL.
2441  */
2442 #define IS_WINDOWS_PATH(p) 					\
2443 	((p != NULL) &&						\
2444 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2445 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2446 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2447 xmlChar *
xmlCanonicPath(const xmlChar * path)2448 xmlCanonicPath(const xmlChar *path)
2449 {
2450 /*
2451  * For Windows implementations, additional work needs to be done to
2452  * replace backslashes in pathnames with "forward slashes"
2453  */
2454 #if defined(_WIN32) && !defined(__CYGWIN__)
2455     int len = 0;
2456     int i = 0;
2457     xmlChar *p = NULL;
2458 #endif
2459     xmlURIPtr uri;
2460     xmlChar *ret;
2461     const xmlChar *absuri;
2462 
2463     if (path == NULL)
2464 	return(NULL);
2465 
2466     /* sanitize filename starting with // so it can be used as URI */
2467     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2468         path++;
2469 
2470     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2471 	xmlFreeURI(uri);
2472 	return xmlStrdup(path);
2473     }
2474 
2475     /* Check if this is an "absolute uri" */
2476     absuri = xmlStrstr(path, BAD_CAST "://");
2477     if (absuri != NULL) {
2478         int l, j;
2479 	unsigned char c;
2480 	xmlChar *escURI;
2481 
2482         /*
2483 	 * this looks like an URI where some parts have not been
2484 	 * escaped leading to a parsing problem.  Check that the first
2485 	 * part matches a protocol.
2486 	 */
2487 	l = absuri - path;
2488 	/* Bypass if first part (part before the '://') is > 20 chars */
2489 	if ((l <= 0) || (l > 20))
2490 	    goto path_processing;
2491 	/* Bypass if any non-alpha characters are present in first part */
2492 	for (j = 0;j < l;j++) {
2493 	    c = path[j];
2494 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2495 	        goto path_processing;
2496 	}
2497 
2498 	/* Escape all except the characters specified in the supplied path */
2499         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2500 	if (escURI != NULL) {
2501 	    /* Try parsing the escaped path */
2502 	    uri = xmlParseURI((const char *) escURI);
2503 	    /* If successful, return the escaped string */
2504 	    if (uri != NULL) {
2505 	        xmlFreeURI(uri);
2506 		return escURI;
2507 	    }
2508 	}
2509     }
2510 
2511 path_processing:
2512 /* For Windows implementations, replace backslashes with 'forward slashes' */
2513 #if defined(_WIN32) && !defined(__CYGWIN__)
2514     /*
2515      * Create a URI structure
2516      */
2517     uri = xmlCreateURI();
2518     if (uri == NULL) {		/* Guard against 'out of memory' */
2519         return(NULL);
2520     }
2521 
2522     len = xmlStrlen(path);
2523     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2524         /* make the scheme 'file' */
2525 	uri->scheme = xmlStrdup(BAD_CAST "file");
2526 	/* allocate space for leading '/' + path + string terminator */
2527 	uri->path = xmlMallocAtomic(len + 2);
2528 	if (uri->path == NULL) {
2529 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2530 	    return(NULL);
2531 	}
2532 	/* Put in leading '/' plus path */
2533 	uri->path[0] = '/';
2534 	p = uri->path + 1;
2535 	strncpy(p, path, len + 1);
2536     } else {
2537 	uri->path = xmlStrdup(path);
2538 	if (uri->path == NULL) {
2539 	    xmlFreeURI(uri);
2540 	    return(NULL);
2541 	}
2542 	p = uri->path;
2543     }
2544     /* Now change all occurences of '\' to '/' */
2545     while (*p != '\0') {
2546 	if (*p == '\\')
2547 	    *p = '/';
2548 	p++;
2549     }
2550 
2551     if (uri->scheme == NULL) {
2552 	ret = xmlStrdup((const xmlChar *) uri->path);
2553     } else {
2554 	ret = xmlSaveUri(uri);
2555     }
2556 
2557     xmlFreeURI(uri);
2558 #else
2559     ret = xmlStrdup((const xmlChar *) path);
2560 #endif
2561     return(ret);
2562 }
2563 
2564 /**
2565  * xmlPathToURI:
2566  * @path:  the resource locator in a filesystem notation
2567  *
2568  * Constructs an URI expressing the existing path
2569  *
2570  * Returns a new URI, or a duplicate of the path parameter if the
2571  * construction fails. The caller is responsible for freeing the memory
2572  * occupied by the returned string. If there is insufficient memory available,
2573  * or the argument is NULL, the function returns NULL.
2574  */
2575 xmlChar *
xmlPathToURI(const xmlChar * path)2576 xmlPathToURI(const xmlChar *path)
2577 {
2578     xmlURIPtr uri;
2579     xmlURI temp;
2580     xmlChar *ret, *cal;
2581 
2582     if (path == NULL)
2583         return(NULL);
2584 
2585     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2586 	xmlFreeURI(uri);
2587 	return xmlStrdup(path);
2588     }
2589     cal = xmlCanonicPath(path);
2590     if (cal == NULL)
2591         return(NULL);
2592 #if defined(_WIN32) && !defined(__CYGWIN__)
2593     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2594        If 'cal' is a valid URI allready then we are done here, as continuing would make
2595        it invalid. */
2596     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2597 	xmlFreeURI(uri);
2598 	return cal;
2599     }
2600     /* 'cal' can contain a relative path with backslashes. If that is processed
2601        by xmlSaveURI, they will be escaped and the external entity loader machinery
2602        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2603     ret = cal;
2604     while (*ret != '\0') {
2605 	if (*ret == '\\')
2606 	    *ret = '/';
2607 	ret++;
2608     }
2609 #endif
2610     memset(&temp, 0, sizeof(temp));
2611     temp.path = (char *) cal;
2612     ret = xmlSaveUri(&temp);
2613     xmlFree(cal);
2614     return(ret);
2615 }
2616 #define bottom_uri
2617 #include "elfgcchack.h"
2618