1 /**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11 #define IN_LIBXML
12 #include "libxml.h"
13
14 #include <string.h>
15
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20
21 static void xmlCleanURI(xmlURIPtr uri);
22
23 /*
24 * Old rule from 2396 used in legacy handling code
25 * alpha = lowalpha | upalpha
26 */
27 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30 /*
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 * "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38 /*
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 * "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
45 #ifdef IS_DIGIT
46 #undef IS_DIGIT
47 #endif
48 /*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53 /*
54 * alphanum = alpha | digit
55 */
56
57 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59 /*
60 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61 */
62
63 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
64 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
65 ((x) == '(') || ((x) == ')'))
66
67 /*
68 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69 */
70
71 #define IS_UNWISE(p) \
72 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
73 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
74 ((*(p) == ']')) || ((*(p) == '`')))
75 /*
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77 * "[" | "]"
78 */
79
80 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83 ((x) == ']'))
84
85 /*
86 * unreserved = alphanum | mark
87 */
88
89 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91 /*
92 * Skip to next pointer char, handle escaped sequences
93 */
94
95 #define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97 /*
98 * Productions from the spec.
99 *
100 * authority = server | reg_name
101 * reg_name = 1*( unreserved | escaped | "$" | "," |
102 * ";" | ":" | "@" | "&" | "=" | "+" )
103 *
104 * path = [ abs_path | opaque_part ]
105 */
106
107 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
109 /************************************************************************
110 * *
111 * RFC 3986 parser *
112 * *
113 ************************************************************************/
114
115 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
117 ((*(p) >= 'A') && (*(p) <= 'Z')))
118 #define ISA_HEXDIG(p) \
119 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
120 ((*(p) >= 'A') && (*(p) <= 'F')))
121
122 /*
123 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
124 * / "*" / "+" / "," / ";" / "="
125 */
126 #define ISA_SUB_DELIM(p) \
127 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
128 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
129 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
130 ((*(p) == '=')) || ((*(p) == '\'')))
131
132 /*
133 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134 */
135 #define ISA_GEN_DELIM(p) \
136 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
137 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
138 ((*(p) == '@')))
139
140 /*
141 * reserved = gen-delims / sub-delims
142 */
143 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145 /*
146 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
147 */
148 #define ISA_UNRESERVED(p) \
149 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
150 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152 /*
153 * pct-encoded = "%" HEXDIG HEXDIG
154 */
155 #define ISA_PCT_ENCODED(p) \
156 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158 /*
159 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
160 */
161 #define ISA_PCHAR(p) \
162 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
163 ((*(p) == ':')) || ((*(p) == '@')))
164
165 /**
166 * xmlParse3986Scheme:
167 * @uri: pointer to an URI structure
168 * @str: pointer to the string to analyze
169 *
170 * Parse an URI scheme
171 *
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173 *
174 * Returns 0 or the error code
175 */
176 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)177 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178 const char *cur;
179
180 if (str == NULL)
181 return(-1);
182
183 cur = *str;
184 if (!ISA_ALPHA(cur))
185 return(2);
186 cur++;
187 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189 if (uri != NULL) {
190 if (uri->scheme != NULL) xmlFree(uri->scheme);
191 uri->scheme = STRNDUP(*str, cur - *str);
192 }
193 *str = cur;
194 return(0);
195 }
196
197 /**
198 * xmlParse3986Fragment:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse the query part of an URI
203 *
204 * fragment = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 * in the fragment identifier but this is used very broadly for
207 * xpointer scheme selection, so we are allowing it here to not break
208 * for example all the DocBook processing chains.
209 *
210 * Returns 0 or the error code
211 */
212 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)213 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214 {
215 const char *cur;
216
217 if (str == NULL)
218 return (-1);
219
220 cur = *str;
221
222 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223 (*cur == '[') || (*cur == ']') ||
224 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225 NEXT(cur);
226 if (uri != NULL) {
227 if (uri->fragment != NULL)
228 xmlFree(uri->fragment);
229 if (uri->cleanup & 2)
230 uri->fragment = STRNDUP(*str, cur - *str);
231 else
232 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233 }
234 *str = cur;
235 return (0);
236 }
237
238 /**
239 * xmlParse3986Query:
240 * @uri: pointer to an URI structure
241 * @str: pointer to the string to analyze
242 *
243 * Parse the query part of an URI
244 *
245 * query = *uric
246 *
247 * Returns 0 or the error code
248 */
249 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)250 xmlParse3986Query(xmlURIPtr uri, const char **str)
251 {
252 const char *cur;
253
254 if (str == NULL)
255 return (-1);
256
257 cur = *str;
258
259 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261 NEXT(cur);
262 if (uri != NULL) {
263 if (uri->query != NULL)
264 xmlFree(uri->query);
265 if (uri->cleanup & 2)
266 uri->query = STRNDUP(*str, cur - *str);
267 else
268 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270 /* Save the raw bytes of the query as well.
271 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272 */
273 if (uri->query_raw != NULL)
274 xmlFree (uri->query_raw);
275 uri->query_raw = STRNDUP (*str, cur - *str);
276 }
277 *str = cur;
278 return (0);
279 }
280
281 /**
282 * xmlParse3986Port:
283 * @uri: pointer to an URI structure
284 * @str: the string to analyze
285 *
286 * Parse a port part and fills in the appropriate fields
287 * of the @uri structure
288 *
289 * port = *DIGIT
290 *
291 * Returns 0 or the error code
292 */
293 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)294 xmlParse3986Port(xmlURIPtr uri, const char **str)
295 {
296 const char *cur = *str;
297
298 if (ISA_DIGIT(cur)) {
299 if (uri != NULL)
300 uri->port = 0;
301 while (ISA_DIGIT(cur)) {
302 if (uri != NULL)
303 uri->port = uri->port * 10 + (*cur - '0');
304 cur++;
305 }
306 *str = cur;
307 return(0);
308 }
309 return(1);
310 }
311
312 /**
313 * xmlParse3986Userinfo:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
321 *
322 * Returns 0 or the error code
323 */
324 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)325 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326 {
327 const char *cur;
328
329 cur = *str;
330 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331 ISA_SUB_DELIM(cur) || (*cur == ':'))
332 NEXT(cur);
333 if (*cur == '@') {
334 if (uri != NULL) {
335 if (uri->user != NULL) xmlFree(uri->user);
336 if (uri->cleanup & 2)
337 uri->user = STRNDUP(*str, cur - *str);
338 else
339 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340 }
341 *str = cur;
342 return(0);
343 }
344 return(1);
345 }
346
347 /**
348 * xmlParse3986DecOctet:
349 * @str: the string to analyze
350 *
351 * dec-octet = DIGIT ; 0-9
352 * / %x31-39 DIGIT ; 10-99
353 * / "1" 2DIGIT ; 100-199
354 * / "2" %x30-34 DIGIT ; 200-249
355 * / "25" %x30-35 ; 250-255
356 *
357 * Skip a dec-octet.
358 *
359 * Returns 0 if found and skipped, 1 otherwise
360 */
361 static int
xmlParse3986DecOctet(const char ** str)362 xmlParse3986DecOctet(const char **str) {
363 const char *cur = *str;
364
365 if (!(ISA_DIGIT(cur)))
366 return(1);
367 if (!ISA_DIGIT(cur+1))
368 cur++;
369 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370 cur += 2;
371 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372 cur += 3;
373 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375 cur += 3;
376 else if ((*cur == '2') && (*(cur + 1) == '5') &&
377 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378 cur += 3;
379 else
380 return(1);
381 *str = cur;
382 return(0);
383 }
384 /**
385 * xmlParse3986Host:
386 * @uri: pointer to an URI structure
387 * @str: the string to analyze
388 *
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
391 *
392 * host = IP-literal / IPv4address / reg-name
393 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
394 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name = *( unreserved / pct-encoded / sub-delims )
396 *
397 * Returns 0 or the error code
398 */
399 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)400 xmlParse3986Host(xmlURIPtr uri, const char **str)
401 {
402 const char *cur = *str;
403 const char *host;
404
405 host = cur;
406 /*
407 * IPv6 and future adressing scheme are enclosed between brackets
408 */
409 if (*cur == '[') {
410 cur++;
411 while ((*cur != ']') && (*cur != 0))
412 cur++;
413 if (*cur != ']')
414 return(1);
415 cur++;
416 goto found;
417 }
418 /*
419 * try to parse an IPv4
420 */
421 if (ISA_DIGIT(cur)) {
422 if (xmlParse3986DecOctet(&cur) != 0)
423 goto not_ipv4;
424 if (*cur != '.')
425 goto not_ipv4;
426 cur++;
427 if (xmlParse3986DecOctet(&cur) != 0)
428 goto not_ipv4;
429 if (*cur != '.')
430 goto not_ipv4;
431 if (xmlParse3986DecOctet(&cur) != 0)
432 goto not_ipv4;
433 if (*cur != '.')
434 goto not_ipv4;
435 if (xmlParse3986DecOctet(&cur) != 0)
436 goto not_ipv4;
437 goto found;
438 not_ipv4:
439 cur = *str;
440 }
441 /*
442 * then this should be a hostname which can be empty
443 */
444 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445 NEXT(cur);
446 found:
447 if (uri != NULL) {
448 if (uri->authority != NULL) xmlFree(uri->authority);
449 uri->authority = NULL;
450 if (uri->server != NULL) xmlFree(uri->server);
451 if (cur != host) {
452 if (uri->cleanup & 2)
453 uri->server = STRNDUP(host, cur - host);
454 else
455 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456 } else
457 uri->server = NULL;
458 }
459 *str = cur;
460 return(0);
461 }
462
463 /**
464 * xmlParse3986Authority:
465 * @uri: pointer to an URI structure
466 * @str: the string to analyze
467 *
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
470 *
471 * authority = [ userinfo "@" ] host [ ":" port ]
472 *
473 * Returns 0 or the error code
474 */
475 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)476 xmlParse3986Authority(xmlURIPtr uri, const char **str)
477 {
478 const char *cur;
479 int ret;
480
481 cur = *str;
482 /*
483 * try to parse an userinfo and check for the trailing @
484 */
485 ret = xmlParse3986Userinfo(uri, &cur);
486 if ((ret != 0) || (*cur != '@'))
487 cur = *str;
488 else
489 cur++;
490 ret = xmlParse3986Host(uri, &cur);
491 if (ret != 0) return(ret);
492 if (*cur == ':') {
493 cur++;
494 ret = xmlParse3986Port(uri, &cur);
495 if (ret != 0) return(ret);
496 }
497 *str = cur;
498 return(0);
499 }
500
501 /**
502 * xmlParse3986Segment:
503 * @str: the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
506 *
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
509 *
510 * segment = *pchar
511 * segment-nz = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 * ; non-zero-length segment without any colon ":"
514 *
515 * Returns 0 or the error code
516 */
517 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)518 xmlParse3986Segment(const char **str, char forbid, int empty)
519 {
520 const char *cur;
521
522 cur = *str;
523 if (!ISA_PCHAR(cur)) {
524 if (empty)
525 return(0);
526 return(1);
527 }
528 while (ISA_PCHAR(cur) && (*cur != forbid))
529 NEXT(cur);
530 *str = cur;
531 return (0);
532 }
533
534 /**
535 * xmlParse3986PathAbEmpty:
536 * @uri: pointer to an URI structure
537 * @str: the string to analyze
538 *
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * path-abempty = *( "/" segment )
543 *
544 * Returns 0 or the error code
545 */
546 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)547 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548 {
549 const char *cur;
550 int ret;
551
552 cur = *str;
553
554 while (*cur == '/') {
555 cur++;
556 ret = xmlParse3986Segment(&cur, 0, 1);
557 if (ret != 0) return(ret);
558 }
559 if (uri != NULL) {
560 if (uri->path != NULL) xmlFree(uri->path);
561 if (*str != cur) {
562 if (uri->cleanup & 2)
563 uri->path = STRNDUP(*str, cur - *str);
564 else
565 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
566 } else {
567 uri->path = NULL;
568 }
569 }
570 *str = cur;
571 return (0);
572 }
573
574 /**
575 * xmlParse3986PathAbsolute:
576 * @uri: pointer to an URI structure
577 * @str: the string to analyze
578 *
579 * Parse an path absolute and fills in the appropriate fields
580 * of the @uri structure
581 *
582 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
583 *
584 * Returns 0 or the error code
585 */
586 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)587 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
588 {
589 const char *cur;
590 int ret;
591
592 cur = *str;
593
594 if (*cur != '/')
595 return(1);
596 cur++;
597 ret = xmlParse3986Segment(&cur, 0, 0);
598 if (ret == 0) {
599 while (*cur == '/') {
600 cur++;
601 ret = xmlParse3986Segment(&cur, 0, 1);
602 if (ret != 0) return(ret);
603 }
604 }
605 if (uri != NULL) {
606 if (uri->path != NULL) xmlFree(uri->path);
607 if (cur != *str) {
608 if (uri->cleanup & 2)
609 uri->path = STRNDUP(*str, cur - *str);
610 else
611 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
612 } else {
613 uri->path = NULL;
614 }
615 }
616 *str = cur;
617 return (0);
618 }
619
620 /**
621 * xmlParse3986PathRootless:
622 * @uri: pointer to an URI structure
623 * @str: the string to analyze
624 *
625 * Parse an path without root and fills in the appropriate fields
626 * of the @uri structure
627 *
628 * path-rootless = segment-nz *( "/" segment )
629 *
630 * Returns 0 or the error code
631 */
632 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)633 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
634 {
635 const char *cur;
636 int ret;
637
638 cur = *str;
639
640 ret = xmlParse3986Segment(&cur, 0, 0);
641 if (ret != 0) return(ret);
642 while (*cur == '/') {
643 cur++;
644 ret = xmlParse3986Segment(&cur, 0, 1);
645 if (ret != 0) return(ret);
646 }
647 if (uri != NULL) {
648 if (uri->path != NULL) xmlFree(uri->path);
649 if (cur != *str) {
650 if (uri->cleanup & 2)
651 uri->path = STRNDUP(*str, cur - *str);
652 else
653 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
654 } else {
655 uri->path = NULL;
656 }
657 }
658 *str = cur;
659 return (0);
660 }
661
662 /**
663 * xmlParse3986PathNoScheme:
664 * @uri: pointer to an URI structure
665 * @str: the string to analyze
666 *
667 * Parse an path which is not a scheme and fills in the appropriate fields
668 * of the @uri structure
669 *
670 * path-noscheme = segment-nz-nc *( "/" segment )
671 *
672 * Returns 0 or the error code
673 */
674 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)675 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
676 {
677 const char *cur;
678 int ret;
679
680 cur = *str;
681
682 ret = xmlParse3986Segment(&cur, ':', 0);
683 if (ret != 0) return(ret);
684 while (*cur == '/') {
685 cur++;
686 ret = xmlParse3986Segment(&cur, 0, 1);
687 if (ret != 0) return(ret);
688 }
689 if (uri != NULL) {
690 if (uri->path != NULL) xmlFree(uri->path);
691 if (cur != *str) {
692 if (uri->cleanup & 2)
693 uri->path = STRNDUP(*str, cur - *str);
694 else
695 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
696 } else {
697 uri->path = NULL;
698 }
699 }
700 *str = cur;
701 return (0);
702 }
703
704 /**
705 * xmlParse3986HierPart:
706 * @uri: pointer to an URI structure
707 * @str: the string to analyze
708 *
709 * Parse an hierarchical part and fills in the appropriate fields
710 * of the @uri structure
711 *
712 * hier-part = "//" authority path-abempty
713 * / path-absolute
714 * / path-rootless
715 * / path-empty
716 *
717 * Returns 0 or the error code
718 */
719 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)720 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
721 {
722 const char *cur;
723 int ret;
724
725 cur = *str;
726
727 if ((*cur == '/') && (*(cur + 1) == '/')) {
728 cur += 2;
729 ret = xmlParse3986Authority(uri, &cur);
730 if (ret != 0) return(ret);
731 ret = xmlParse3986PathAbEmpty(uri, &cur);
732 if (ret != 0) return(ret);
733 *str = cur;
734 return(0);
735 } else if (*cur == '/') {
736 ret = xmlParse3986PathAbsolute(uri, &cur);
737 if (ret != 0) return(ret);
738 } else if (ISA_PCHAR(cur)) {
739 ret = xmlParse3986PathRootless(uri, &cur);
740 if (ret != 0) return(ret);
741 } else {
742 /* path-empty is effectively empty */
743 if (uri != NULL) {
744 if (uri->path != NULL) xmlFree(uri->path);
745 uri->path = NULL;
746 }
747 }
748 *str = cur;
749 return (0);
750 }
751
752 /**
753 * xmlParse3986RelativeRef:
754 * @uri: pointer to an URI structure
755 * @str: the string to analyze
756 *
757 * Parse an URI string and fills in the appropriate fields
758 * of the @uri structure
759 *
760 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
761 * relative-part = "//" authority path-abempty
762 * / path-absolute
763 * / path-noscheme
764 * / path-empty
765 *
766 * Returns 0 or the error code
767 */
768 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)769 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
770 int ret;
771
772 if ((*str == '/') && (*(str + 1) == '/')) {
773 str += 2;
774 ret = xmlParse3986Authority(uri, &str);
775 if (ret != 0) return(ret);
776 ret = xmlParse3986PathAbEmpty(uri, &str);
777 if (ret != 0) return(ret);
778 } else if (*str == '/') {
779 ret = xmlParse3986PathAbsolute(uri, &str);
780 if (ret != 0) return(ret);
781 } else if (ISA_PCHAR(str)) {
782 ret = xmlParse3986PathNoScheme(uri, &str);
783 if (ret != 0) return(ret);
784 } else {
785 /* path-empty is effectively empty */
786 if (uri != NULL) {
787 if (uri->path != NULL) xmlFree(uri->path);
788 uri->path = NULL;
789 }
790 }
791
792 if (*str == '?') {
793 str++;
794 ret = xmlParse3986Query(uri, &str);
795 if (ret != 0) return(ret);
796 }
797 if (*str == '#') {
798 str++;
799 ret = xmlParse3986Fragment(uri, &str);
800 if (ret != 0) return(ret);
801 }
802 if (*str != 0) {
803 xmlCleanURI(uri);
804 return(1);
805 }
806 return(0);
807 }
808
809
810 /**
811 * xmlParse3986URI:
812 * @uri: pointer to an URI structure
813 * @str: the string to analyze
814 *
815 * Parse an URI string and fills in the appropriate fields
816 * of the @uri structure
817 *
818 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
819 *
820 * Returns 0 or the error code
821 */
822 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)823 xmlParse3986URI(xmlURIPtr uri, const char *str) {
824 int ret;
825
826 ret = xmlParse3986Scheme(uri, &str);
827 if (ret != 0) return(ret);
828 if (*str != ':') {
829 return(1);
830 }
831 str++;
832 ret = xmlParse3986HierPart(uri, &str);
833 if (ret != 0) return(ret);
834 if (*str == '?') {
835 str++;
836 ret = xmlParse3986Query(uri, &str);
837 if (ret != 0) return(ret);
838 }
839 if (*str == '#') {
840 str++;
841 ret = xmlParse3986Fragment(uri, &str);
842 if (ret != 0) return(ret);
843 }
844 if (*str != 0) {
845 xmlCleanURI(uri);
846 return(1);
847 }
848 return(0);
849 }
850
851 /**
852 * xmlParse3986URIReference:
853 * @uri: pointer to an URI structure
854 * @str: the string to analyze
855 *
856 * Parse an URI reference string and fills in the appropriate fields
857 * of the @uri structure
858 *
859 * URI-reference = URI / relative-ref
860 *
861 * Returns 0 or the error code
862 */
863 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)864 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
865 int ret;
866
867 if (str == NULL)
868 return(-1);
869 xmlCleanURI(uri);
870
871 /*
872 * Try first to parse absolute refs, then fallback to relative if
873 * it fails.
874 */
875 ret = xmlParse3986URI(uri, str);
876 if (ret != 0) {
877 xmlCleanURI(uri);
878 ret = xmlParse3986RelativeRef(uri, str);
879 if (ret != 0) {
880 xmlCleanURI(uri);
881 return(ret);
882 }
883 }
884 return(0);
885 }
886
887 /**
888 * xmlParseURI:
889 * @str: the URI string to analyze
890 *
891 * Parse an URI based on RFC 3986
892 *
893 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
894 *
895 * Returns a newly built xmlURIPtr or NULL in case of error
896 */
897 xmlURIPtr
xmlParseURI(const char * str)898 xmlParseURI(const char *str) {
899 xmlURIPtr uri;
900 int ret;
901
902 if (str == NULL)
903 return(NULL);
904 uri = xmlCreateURI();
905 if (uri != NULL) {
906 ret = xmlParse3986URIReference(uri, str);
907 if (ret) {
908 xmlFreeURI(uri);
909 return(NULL);
910 }
911 }
912 return(uri);
913 }
914
915 /**
916 * xmlParseURIReference:
917 * @uri: pointer to an URI structure
918 * @str: the string to analyze
919 *
920 * Parse an URI reference string based on RFC 3986 and fills in the
921 * appropriate fields of the @uri structure
922 *
923 * URI-reference = URI / relative-ref
924 *
925 * Returns 0 or the error code
926 */
927 int
xmlParseURIReference(xmlURIPtr uri,const char * str)928 xmlParseURIReference(xmlURIPtr uri, const char *str) {
929 return(xmlParse3986URIReference(uri, str));
930 }
931
932 /**
933 * xmlParseURIRaw:
934 * @str: the URI string to analyze
935 * @raw: if 1 unescaping of URI pieces are disabled
936 *
937 * Parse an URI but allows to keep intact the original fragments.
938 *
939 * URI-reference = URI / relative-ref
940 *
941 * Returns a newly built xmlURIPtr or NULL in case of error
942 */
943 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)944 xmlParseURIRaw(const char *str, int raw) {
945 xmlURIPtr uri;
946 int ret;
947
948 if (str == NULL)
949 return(NULL);
950 uri = xmlCreateURI();
951 if (uri != NULL) {
952 if (raw) {
953 uri->cleanup |= 2;
954 }
955 ret = xmlParseURIReference(uri, str);
956 if (ret) {
957 xmlFreeURI(uri);
958 return(NULL);
959 }
960 }
961 return(uri);
962 }
963
964 /************************************************************************
965 * *
966 * Generic URI structure functions *
967 * *
968 ************************************************************************/
969
970 /**
971 * xmlCreateURI:
972 *
973 * Simply creates an empty xmlURI
974 *
975 * Returns the new structure or NULL in case of error
976 */
977 xmlURIPtr
xmlCreateURI(void)978 xmlCreateURI(void) {
979 xmlURIPtr ret;
980
981 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
982 if (ret == NULL) {
983 xmlGenericError(xmlGenericErrorContext,
984 "xmlCreateURI: out of memory\n");
985 return(NULL);
986 }
987 memset(ret, 0, sizeof(xmlURI));
988 return(ret);
989 }
990
991 /**
992 * xmlSaveUri:
993 * @uri: pointer to an xmlURI
994 *
995 * Save the URI as an escaped string
996 *
997 * Returns a new string (to be deallocated by caller)
998 */
999 xmlChar *
xmlSaveUri(xmlURIPtr uri)1000 xmlSaveUri(xmlURIPtr uri) {
1001 xmlChar *ret = NULL;
1002 xmlChar *temp;
1003 const char *p;
1004 int len;
1005 int max;
1006
1007 if (uri == NULL) return(NULL);
1008
1009
1010 max = 80;
1011 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1012 if (ret == NULL) {
1013 xmlGenericError(xmlGenericErrorContext,
1014 "xmlSaveUri: out of memory\n");
1015 return(NULL);
1016 }
1017 len = 0;
1018
1019 if (uri->scheme != NULL) {
1020 p = uri->scheme;
1021 while (*p != 0) {
1022 if (len >= max) {
1023 max *= 2;
1024 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1025 if (temp == NULL) {
1026 xmlGenericError(xmlGenericErrorContext,
1027 "xmlSaveUri: out of memory\n");
1028 xmlFree(ret);
1029 return(NULL);
1030 }
1031 ret = temp;
1032 }
1033 ret[len++] = *p++;
1034 }
1035 if (len >= max) {
1036 max *= 2;
1037 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038 if (temp == NULL) {
1039 xmlGenericError(xmlGenericErrorContext,
1040 "xmlSaveUri: out of memory\n");
1041 xmlFree(ret);
1042 return(NULL);
1043 }
1044 ret = temp;
1045 }
1046 ret[len++] = ':';
1047 }
1048 if (uri->opaque != NULL) {
1049 p = uri->opaque;
1050 while (*p != 0) {
1051 if (len + 3 >= max) {
1052 max *= 2;
1053 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1054 if (temp == NULL) {
1055 xmlGenericError(xmlGenericErrorContext,
1056 "xmlSaveUri: out of memory\n");
1057 xmlFree(ret);
1058 return(NULL);
1059 }
1060 ret = temp;
1061 }
1062 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1063 ret[len++] = *p++;
1064 else {
1065 int val = *(unsigned char *)p++;
1066 int hi = val / 0x10, lo = val % 0x10;
1067 ret[len++] = '%';
1068 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1069 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1070 }
1071 }
1072 } else {
1073 if (uri->server != NULL) {
1074 if (len + 3 >= max) {
1075 max *= 2;
1076 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1077 if (temp == NULL) {
1078 xmlGenericError(xmlGenericErrorContext,
1079 "xmlSaveUri: out of memory\n");
1080 xmlFree(ret);
1081 return(NULL);
1082 }
1083 ret = temp;
1084 }
1085 ret[len++] = '/';
1086 ret[len++] = '/';
1087 if (uri->user != NULL) {
1088 p = uri->user;
1089 while (*p != 0) {
1090 if (len + 3 >= max) {
1091 max *= 2;
1092 temp = (xmlChar *) xmlRealloc(ret,
1093 (max + 1) * sizeof(xmlChar));
1094 if (temp == NULL) {
1095 xmlGenericError(xmlGenericErrorContext,
1096 "xmlSaveUri: out of memory\n");
1097 xmlFree(ret);
1098 return(NULL);
1099 }
1100 ret = temp;
1101 }
1102 if ((IS_UNRESERVED(*(p))) ||
1103 ((*(p) == ';')) || ((*(p) == ':')) ||
1104 ((*(p) == '&')) || ((*(p) == '=')) ||
1105 ((*(p) == '+')) || ((*(p) == '$')) ||
1106 ((*(p) == ',')))
1107 ret[len++] = *p++;
1108 else {
1109 int val = *(unsigned char *)p++;
1110 int hi = val / 0x10, lo = val % 0x10;
1111 ret[len++] = '%';
1112 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1113 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1114 }
1115 }
1116 if (len + 3 >= max) {
1117 max *= 2;
1118 temp = (xmlChar *) xmlRealloc(ret,
1119 (max + 1) * sizeof(xmlChar));
1120 if (temp == NULL) {
1121 xmlGenericError(xmlGenericErrorContext,
1122 "xmlSaveUri: out of memory\n");
1123 xmlFree(ret);
1124 return(NULL);
1125 }
1126 ret = temp;
1127 }
1128 ret[len++] = '@';
1129 }
1130 p = uri->server;
1131 while (*p != 0) {
1132 if (len >= max) {
1133 max *= 2;
1134 temp = (xmlChar *) xmlRealloc(ret,
1135 (max + 1) * sizeof(xmlChar));
1136 if (temp == NULL) {
1137 xmlGenericError(xmlGenericErrorContext,
1138 "xmlSaveUri: out of memory\n");
1139 xmlFree(ret);
1140 return(NULL);
1141 }
1142 ret = temp;
1143 }
1144 ret[len++] = *p++;
1145 }
1146 if (uri->port > 0) {
1147 if (len + 10 >= max) {
1148 max *= 2;
1149 temp = (xmlChar *) xmlRealloc(ret,
1150 (max + 1) * sizeof(xmlChar));
1151 if (temp == NULL) {
1152 xmlGenericError(xmlGenericErrorContext,
1153 "xmlSaveUri: out of memory\n");
1154 xmlFree(ret);
1155 return(NULL);
1156 }
1157 ret = temp;
1158 }
1159 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1160 }
1161 } else if (uri->authority != NULL) {
1162 if (len + 3 >= max) {
1163 max *= 2;
1164 temp = (xmlChar *) xmlRealloc(ret,
1165 (max + 1) * sizeof(xmlChar));
1166 if (temp == NULL) {
1167 xmlGenericError(xmlGenericErrorContext,
1168 "xmlSaveUri: out of memory\n");
1169 xmlFree(ret);
1170 return(NULL);
1171 }
1172 ret = temp;
1173 }
1174 ret[len++] = '/';
1175 ret[len++] = '/';
1176 p = uri->authority;
1177 while (*p != 0) {
1178 if (len + 3 >= max) {
1179 max *= 2;
1180 temp = (xmlChar *) xmlRealloc(ret,
1181 (max + 1) * sizeof(xmlChar));
1182 if (temp == NULL) {
1183 xmlGenericError(xmlGenericErrorContext,
1184 "xmlSaveUri: out of memory\n");
1185 xmlFree(ret);
1186 return(NULL);
1187 }
1188 ret = temp;
1189 }
1190 if ((IS_UNRESERVED(*(p))) ||
1191 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1192 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1193 ((*(p) == '=')) || ((*(p) == '+')))
1194 ret[len++] = *p++;
1195 else {
1196 int val = *(unsigned char *)p++;
1197 int hi = val / 0x10, lo = val % 0x10;
1198 ret[len++] = '%';
1199 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1200 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1201 }
1202 }
1203 } else if (uri->scheme != NULL) {
1204 if (len + 3 >= max) {
1205 max *= 2;
1206 temp = (xmlChar *) xmlRealloc(ret,
1207 (max + 1) * sizeof(xmlChar));
1208 if (temp == NULL) {
1209 xmlGenericError(xmlGenericErrorContext,
1210 "xmlSaveUri: out of memory\n");
1211 xmlFree(ret);
1212 return(NULL);
1213 }
1214 ret = temp;
1215 }
1216 ret[len++] = '/';
1217 ret[len++] = '/';
1218 }
1219 if (uri->path != NULL) {
1220 p = uri->path;
1221 /*
1222 * the colon in file:///d: should not be escaped or
1223 * Windows accesses fail later.
1224 */
1225 if ((uri->scheme != NULL) &&
1226 (p[0] == '/') &&
1227 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1228 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1229 (p[2] == ':') &&
1230 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1231 if (len + 3 >= max) {
1232 max *= 2;
1233 ret = (xmlChar *) xmlRealloc(ret,
1234 (max + 1) * sizeof(xmlChar));
1235 if (ret == NULL) {
1236 xmlGenericError(xmlGenericErrorContext,
1237 "xmlSaveUri: out of memory\n");
1238 return(NULL);
1239 }
1240 }
1241 ret[len++] = *p++;
1242 ret[len++] = *p++;
1243 ret[len++] = *p++;
1244 }
1245 while (*p != 0) {
1246 if (len + 3 >= max) {
1247 max *= 2;
1248 temp = (xmlChar *) xmlRealloc(ret,
1249 (max + 1) * sizeof(xmlChar));
1250 if (temp == NULL) {
1251 xmlGenericError(xmlGenericErrorContext,
1252 "xmlSaveUri: out of memory\n");
1253 xmlFree(ret);
1254 return(NULL);
1255 }
1256 ret = temp;
1257 }
1258 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1259 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1260 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1261 ((*(p) == ',')))
1262 ret[len++] = *p++;
1263 else {
1264 int val = *(unsigned char *)p++;
1265 int hi = val / 0x10, lo = val % 0x10;
1266 ret[len++] = '%';
1267 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1268 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1269 }
1270 }
1271 }
1272 if (uri->query_raw != NULL) {
1273 if (len + 1 >= max) {
1274 max *= 2;
1275 temp = (xmlChar *) xmlRealloc(ret,
1276 (max + 1) * sizeof(xmlChar));
1277 if (temp == NULL) {
1278 xmlGenericError(xmlGenericErrorContext,
1279 "xmlSaveUri: out of memory\n");
1280 xmlFree(ret);
1281 return(NULL);
1282 }
1283 ret = temp;
1284 }
1285 ret[len++] = '?';
1286 p = uri->query_raw;
1287 while (*p != 0) {
1288 if (len + 1 >= max) {
1289 max *= 2;
1290 temp = (xmlChar *) xmlRealloc(ret,
1291 (max + 1) * sizeof(xmlChar));
1292 if (temp == NULL) {
1293 xmlGenericError(xmlGenericErrorContext,
1294 "xmlSaveUri: out of memory\n");
1295 xmlFree(ret);
1296 return(NULL);
1297 }
1298 ret = temp;
1299 }
1300 ret[len++] = *p++;
1301 }
1302 } else if (uri->query != NULL) {
1303 if (len + 3 >= max) {
1304 max *= 2;
1305 temp = (xmlChar *) xmlRealloc(ret,
1306 (max + 1) * sizeof(xmlChar));
1307 if (temp == NULL) {
1308 xmlGenericError(xmlGenericErrorContext,
1309 "xmlSaveUri: out of memory\n");
1310 xmlFree(ret);
1311 return(NULL);
1312 }
1313 ret = temp;
1314 }
1315 ret[len++] = '?';
1316 p = uri->query;
1317 while (*p != 0) {
1318 if (len + 3 >= max) {
1319 max *= 2;
1320 temp = (xmlChar *) xmlRealloc(ret,
1321 (max + 1) * sizeof(xmlChar));
1322 if (temp == NULL) {
1323 xmlGenericError(xmlGenericErrorContext,
1324 "xmlSaveUri: out of memory\n");
1325 xmlFree(ret);
1326 return(NULL);
1327 }
1328 ret = temp;
1329 }
1330 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1331 ret[len++] = *p++;
1332 else {
1333 int val = *(unsigned char *)p++;
1334 int hi = val / 0x10, lo = val % 0x10;
1335 ret[len++] = '%';
1336 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1337 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1338 }
1339 }
1340 }
1341 }
1342 if (uri->fragment != NULL) {
1343 if (len + 3 >= max) {
1344 max *= 2;
1345 temp = (xmlChar *) xmlRealloc(ret,
1346 (max + 1) * sizeof(xmlChar));
1347 if (temp == NULL) {
1348 xmlGenericError(xmlGenericErrorContext,
1349 "xmlSaveUri: out of memory\n");
1350 xmlFree(ret);
1351 return(NULL);
1352 }
1353 ret = temp;
1354 }
1355 ret[len++] = '#';
1356 p = uri->fragment;
1357 while (*p != 0) {
1358 if (len + 3 >= max) {
1359 max *= 2;
1360 temp = (xmlChar *) xmlRealloc(ret,
1361 (max + 1) * sizeof(xmlChar));
1362 if (temp == NULL) {
1363 xmlGenericError(xmlGenericErrorContext,
1364 "xmlSaveUri: out of memory\n");
1365 xmlFree(ret);
1366 return(NULL);
1367 }
1368 ret = temp;
1369 }
1370 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1371 ret[len++] = *p++;
1372 else {
1373 int val = *(unsigned char *)p++;
1374 int hi = val / 0x10, lo = val % 0x10;
1375 ret[len++] = '%';
1376 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1377 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1378 }
1379 }
1380 }
1381 if (len >= max) {
1382 max *= 2;
1383 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1384 if (temp == NULL) {
1385 xmlGenericError(xmlGenericErrorContext,
1386 "xmlSaveUri: out of memory\n");
1387 xmlFree(ret);
1388 return(NULL);
1389 }
1390 ret = temp;
1391 }
1392 ret[len] = 0;
1393 return(ret);
1394 }
1395
1396 /**
1397 * xmlPrintURI:
1398 * @stream: a FILE* for the output
1399 * @uri: pointer to an xmlURI
1400 *
1401 * Prints the URI in the stream @stream.
1402 */
1403 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1404 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1405 xmlChar *out;
1406
1407 out = xmlSaveUri(uri);
1408 if (out != NULL) {
1409 fprintf(stream, "%s", (char *) out);
1410 xmlFree(out);
1411 }
1412 }
1413
1414 /**
1415 * xmlCleanURI:
1416 * @uri: pointer to an xmlURI
1417 *
1418 * Make sure the xmlURI struct is free of content
1419 */
1420 static void
xmlCleanURI(xmlURIPtr uri)1421 xmlCleanURI(xmlURIPtr uri) {
1422 if (uri == NULL) return;
1423
1424 if (uri->scheme != NULL) xmlFree(uri->scheme);
1425 uri->scheme = NULL;
1426 if (uri->server != NULL) xmlFree(uri->server);
1427 uri->server = NULL;
1428 if (uri->user != NULL) xmlFree(uri->user);
1429 uri->user = NULL;
1430 if (uri->path != NULL) xmlFree(uri->path);
1431 uri->path = NULL;
1432 if (uri->fragment != NULL) xmlFree(uri->fragment);
1433 uri->fragment = NULL;
1434 if (uri->opaque != NULL) xmlFree(uri->opaque);
1435 uri->opaque = NULL;
1436 if (uri->authority != NULL) xmlFree(uri->authority);
1437 uri->authority = NULL;
1438 if (uri->query != NULL) xmlFree(uri->query);
1439 uri->query = NULL;
1440 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1441 uri->query_raw = NULL;
1442 }
1443
1444 /**
1445 * xmlFreeURI:
1446 * @uri: pointer to an xmlURI
1447 *
1448 * Free up the xmlURI struct
1449 */
1450 void
xmlFreeURI(xmlURIPtr uri)1451 xmlFreeURI(xmlURIPtr uri) {
1452 if (uri == NULL) return;
1453
1454 if (uri->scheme != NULL) xmlFree(uri->scheme);
1455 if (uri->server != NULL) xmlFree(uri->server);
1456 if (uri->user != NULL) xmlFree(uri->user);
1457 if (uri->path != NULL) xmlFree(uri->path);
1458 if (uri->fragment != NULL) xmlFree(uri->fragment);
1459 if (uri->opaque != NULL) xmlFree(uri->opaque);
1460 if (uri->authority != NULL) xmlFree(uri->authority);
1461 if (uri->query != NULL) xmlFree(uri->query);
1462 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1463 xmlFree(uri);
1464 }
1465
1466 /************************************************************************
1467 * *
1468 * Helper functions *
1469 * *
1470 ************************************************************************/
1471
1472 /**
1473 * xmlNormalizeURIPath:
1474 * @path: pointer to the path string
1475 *
1476 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1477 * Section 5.2, steps 6.c through 6.g.
1478 *
1479 * Normalization occurs directly on the string, no new allocation is done
1480 *
1481 * Returns 0 or an error code
1482 */
1483 int
xmlNormalizeURIPath(char * path)1484 xmlNormalizeURIPath(char *path) {
1485 char *cur, *out;
1486
1487 if (path == NULL)
1488 return(-1);
1489
1490 /* Skip all initial "/" chars. We want to get to the beginning of the
1491 * first non-empty segment.
1492 */
1493 cur = path;
1494 while (cur[0] == '/')
1495 ++cur;
1496 if (cur[0] == '\0')
1497 return(0);
1498
1499 /* Keep everything we've seen so far. */
1500 out = cur;
1501
1502 /*
1503 * Analyze each segment in sequence for cases (c) and (d).
1504 */
1505 while (cur[0] != '\0') {
1506 /*
1507 * c) All occurrences of "./", where "." is a complete path segment,
1508 * are removed from the buffer string.
1509 */
1510 if ((cur[0] == '.') && (cur[1] == '/')) {
1511 cur += 2;
1512 /* '//' normalization should be done at this point too */
1513 while (cur[0] == '/')
1514 cur++;
1515 continue;
1516 }
1517
1518 /*
1519 * d) If the buffer string ends with "." as a complete path segment,
1520 * that "." is removed.
1521 */
1522 if ((cur[0] == '.') && (cur[1] == '\0'))
1523 break;
1524
1525 /* Otherwise keep the segment. */
1526 while (cur[0] != '/') {
1527 if (cur[0] == '\0')
1528 goto done_cd;
1529 (out++)[0] = (cur++)[0];
1530 }
1531 /* nomalize // */
1532 while ((cur[0] == '/') && (cur[1] == '/'))
1533 cur++;
1534
1535 (out++)[0] = (cur++)[0];
1536 }
1537 done_cd:
1538 out[0] = '\0';
1539
1540 /* Reset to the beginning of the first segment for the next sequence. */
1541 cur = path;
1542 while (cur[0] == '/')
1543 ++cur;
1544 if (cur[0] == '\0')
1545 return(0);
1546
1547 /*
1548 * Analyze each segment in sequence for cases (e) and (f).
1549 *
1550 * e) All occurrences of "<segment>/../", where <segment> is a
1551 * complete path segment not equal to "..", are removed from the
1552 * buffer string. Removal of these path segments is performed
1553 * iteratively, removing the leftmost matching pattern on each
1554 * iteration, until no matching pattern remains.
1555 *
1556 * f) If the buffer string ends with "<segment>/..", where <segment>
1557 * is a complete path segment not equal to "..", that
1558 * "<segment>/.." is removed.
1559 *
1560 * To satisfy the "iterative" clause in (e), we need to collapse the
1561 * string every time we find something that needs to be removed. Thus,
1562 * we don't need to keep two pointers into the string: we only need a
1563 * "current position" pointer.
1564 */
1565 while (1) {
1566 char *segp, *tmp;
1567
1568 /* At the beginning of each iteration of this loop, "cur" points to
1569 * the first character of the segment we want to examine.
1570 */
1571
1572 /* Find the end of the current segment. */
1573 segp = cur;
1574 while ((segp[0] != '/') && (segp[0] != '\0'))
1575 ++segp;
1576
1577 /* If this is the last segment, we're done (we need at least two
1578 * segments to meet the criteria for the (e) and (f) cases).
1579 */
1580 if (segp[0] == '\0')
1581 break;
1582
1583 /* If the first segment is "..", or if the next segment _isn't_ "..",
1584 * keep this segment and try the next one.
1585 */
1586 ++segp;
1587 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1588 || ((segp[0] != '.') || (segp[1] != '.')
1589 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1590 cur = segp;
1591 continue;
1592 }
1593
1594 /* If we get here, remove this segment and the next one and back up
1595 * to the previous segment (if there is one), to implement the
1596 * "iteratively" clause. It's pretty much impossible to back up
1597 * while maintaining two pointers into the buffer, so just compact
1598 * the whole buffer now.
1599 */
1600
1601 /* If this is the end of the buffer, we're done. */
1602 if (segp[2] == '\0') {
1603 cur[0] = '\0';
1604 break;
1605 }
1606 /* Valgrind complained, strcpy(cur, segp + 3); */
1607 /* string will overlap, do not use strcpy */
1608 tmp = cur;
1609 segp += 3;
1610 while ((*tmp++ = *segp++) != 0);
1611
1612 /* If there are no previous segments, then keep going from here. */
1613 segp = cur;
1614 while ((segp > path) && ((--segp)[0] == '/'))
1615 ;
1616 if (segp == path)
1617 continue;
1618
1619 /* "segp" is pointing to the end of a previous segment; find it's
1620 * start. We need to back up to the previous segment and start
1621 * over with that to handle things like "foo/bar/../..". If we
1622 * don't do this, then on the first pass we'll remove the "bar/..",
1623 * but be pointing at the second ".." so we won't realize we can also
1624 * remove the "foo/..".
1625 */
1626 cur = segp;
1627 while ((cur > path) && (cur[-1] != '/'))
1628 --cur;
1629 }
1630 out[0] = '\0';
1631
1632 /*
1633 * g) If the resulting buffer string still begins with one or more
1634 * complete path segments of "..", then the reference is
1635 * considered to be in error. Implementations may handle this
1636 * error by retaining these components in the resolved path (i.e.,
1637 * treating them as part of the final URI), by removing them from
1638 * the resolved path (i.e., discarding relative levels above the
1639 * root), or by avoiding traversal of the reference.
1640 *
1641 * We discard them from the final path.
1642 */
1643 if (path[0] == '/') {
1644 cur = path;
1645 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1646 && ((cur[3] == '/') || (cur[3] == '\0')))
1647 cur += 3;
1648
1649 if (cur != path) {
1650 out = path;
1651 while (cur[0] != '\0')
1652 (out++)[0] = (cur++)[0];
1653 out[0] = 0;
1654 }
1655 }
1656
1657 return(0);
1658 }
1659
is_hex(char c)1660 static int is_hex(char c) {
1661 if (((c >= '0') && (c <= '9')) ||
1662 ((c >= 'a') && (c <= 'f')) ||
1663 ((c >= 'A') && (c <= 'F')))
1664 return(1);
1665 return(0);
1666 }
1667
1668 /**
1669 * xmlURIUnescapeString:
1670 * @str: the string to unescape
1671 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1672 * @target: optional destination buffer
1673 *
1674 * Unescaping routine, but does not check that the string is an URI. The
1675 * output is a direct unsigned char translation of %XX values (no encoding)
1676 * Note that the length of the result can only be smaller or same size as
1677 * the input string.
1678 *
1679 * Returns a copy of the string, but unescaped, will return NULL only in case
1680 * of error
1681 */
1682 char *
xmlURIUnescapeString(const char * str,int len,char * target)1683 xmlURIUnescapeString(const char *str, int len, char *target) {
1684 char *ret, *out;
1685 const char *in;
1686
1687 if (str == NULL)
1688 return(NULL);
1689 if (len <= 0) len = strlen(str);
1690 if (len < 0) return(NULL);
1691
1692 if (target == NULL) {
1693 ret = (char *) xmlMallocAtomic(len + 1);
1694 if (ret == NULL) {
1695 xmlGenericError(xmlGenericErrorContext,
1696 "xmlURIUnescapeString: out of memory\n");
1697 return(NULL);
1698 }
1699 } else
1700 ret = target;
1701 in = str;
1702 out = ret;
1703 while(len > 0) {
1704 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1705 in++;
1706 if ((*in >= '0') && (*in <= '9'))
1707 *out = (*in - '0');
1708 else if ((*in >= 'a') && (*in <= 'f'))
1709 *out = (*in - 'a') + 10;
1710 else if ((*in >= 'A') && (*in <= 'F'))
1711 *out = (*in - 'A') + 10;
1712 in++;
1713 if ((*in >= '0') && (*in <= '9'))
1714 *out = *out * 16 + (*in - '0');
1715 else if ((*in >= 'a') && (*in <= 'f'))
1716 *out = *out * 16 + (*in - 'a') + 10;
1717 else if ((*in >= 'A') && (*in <= 'F'))
1718 *out = *out * 16 + (*in - 'A') + 10;
1719 in++;
1720 len -= 3;
1721 out++;
1722 } else {
1723 *out++ = *in++;
1724 len--;
1725 }
1726 }
1727 *out = 0;
1728 return(ret);
1729 }
1730
1731 /**
1732 * xmlURIEscapeStr:
1733 * @str: string to escape
1734 * @list: exception list string of chars not to escape
1735 *
1736 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1737 * and the characters in the exception list.
1738 *
1739 * Returns a new escaped string or NULL in case of error.
1740 */
1741 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1742 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1743 xmlChar *ret, ch;
1744 xmlChar *temp;
1745 const xmlChar *in;
1746
1747 unsigned int len, out;
1748
1749 if (str == NULL)
1750 return(NULL);
1751 if (str[0] == 0)
1752 return(xmlStrdup(str));
1753 len = xmlStrlen(str);
1754 if (!(len > 0)) return(NULL);
1755
1756 len += 20;
1757 ret = (xmlChar *) xmlMallocAtomic(len);
1758 if (ret == NULL) {
1759 xmlGenericError(xmlGenericErrorContext,
1760 "xmlURIEscapeStr: out of memory\n");
1761 return(NULL);
1762 }
1763 in = (const xmlChar *) str;
1764 out = 0;
1765 while(*in != 0) {
1766 if (len - out <= 3) {
1767 len += 20;
1768 temp = (xmlChar *) xmlRealloc(ret, len);
1769 if (temp == NULL) {
1770 xmlGenericError(xmlGenericErrorContext,
1771 "xmlURIEscapeStr: out of memory\n");
1772 xmlFree(ret);
1773 return(NULL);
1774 }
1775 ret = temp;
1776 }
1777
1778 ch = *in;
1779
1780 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1781 unsigned char val;
1782 ret[out++] = '%';
1783 val = ch >> 4;
1784 if (val <= 9)
1785 ret[out++] = '0' + val;
1786 else
1787 ret[out++] = 'A' + val - 0xA;
1788 val = ch & 0xF;
1789 if (val <= 9)
1790 ret[out++] = '0' + val;
1791 else
1792 ret[out++] = 'A' + val - 0xA;
1793 in++;
1794 } else {
1795 ret[out++] = *in++;
1796 }
1797
1798 }
1799 ret[out] = 0;
1800 return(ret);
1801 }
1802
1803 /**
1804 * xmlURIEscape:
1805 * @str: the string of the URI to escape
1806 *
1807 * Escaping routine, does not do validity checks !
1808 * It will try to escape the chars needing this, but this is heuristic
1809 * based it's impossible to be sure.
1810 *
1811 * Returns an copy of the string, but escaped
1812 *
1813 * 25 May 2001
1814 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1815 * according to RFC2396.
1816 * - Carl Douglas
1817 */
1818 xmlChar *
xmlURIEscape(const xmlChar * str)1819 xmlURIEscape(const xmlChar * str)
1820 {
1821 xmlChar *ret, *segment = NULL;
1822 xmlURIPtr uri;
1823 int ret2;
1824
1825 #define NULLCHK(p) if(!p) { \
1826 xmlGenericError(xmlGenericErrorContext, \
1827 "xmlURIEscape: out of memory\n"); \
1828 xmlFreeURI(uri); \
1829 return NULL; } \
1830
1831 if (str == NULL)
1832 return (NULL);
1833
1834 uri = xmlCreateURI();
1835 if (uri != NULL) {
1836 /*
1837 * Allow escaping errors in the unescaped form
1838 */
1839 uri->cleanup = 1;
1840 ret2 = xmlParseURIReference(uri, (const char *)str);
1841 if (ret2) {
1842 xmlFreeURI(uri);
1843 return (NULL);
1844 }
1845 }
1846
1847 if (!uri)
1848 return NULL;
1849
1850 ret = NULL;
1851
1852 if (uri->scheme) {
1853 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1854 NULLCHK(segment)
1855 ret = xmlStrcat(ret, segment);
1856 ret = xmlStrcat(ret, BAD_CAST ":");
1857 xmlFree(segment);
1858 }
1859
1860 if (uri->authority) {
1861 segment =
1862 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1863 NULLCHK(segment)
1864 ret = xmlStrcat(ret, BAD_CAST "//");
1865 ret = xmlStrcat(ret, segment);
1866 xmlFree(segment);
1867 }
1868
1869 if (uri->user) {
1870 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1871 NULLCHK(segment)
1872 ret = xmlStrcat(ret,BAD_CAST "//");
1873 ret = xmlStrcat(ret, segment);
1874 ret = xmlStrcat(ret, BAD_CAST "@");
1875 xmlFree(segment);
1876 }
1877
1878 if (uri->server) {
1879 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1880 NULLCHK(segment)
1881 if (uri->user == NULL)
1882 ret = xmlStrcat(ret, BAD_CAST "//");
1883 ret = xmlStrcat(ret, segment);
1884 xmlFree(segment);
1885 }
1886
1887 if (uri->port) {
1888 xmlChar port[10];
1889
1890 snprintf((char *) port, 10, "%d", uri->port);
1891 ret = xmlStrcat(ret, BAD_CAST ":");
1892 ret = xmlStrcat(ret, port);
1893 }
1894
1895 if (uri->path) {
1896 segment =
1897 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1898 NULLCHK(segment)
1899 ret = xmlStrcat(ret, segment);
1900 xmlFree(segment);
1901 }
1902
1903 if (uri->query_raw) {
1904 ret = xmlStrcat(ret, BAD_CAST "?");
1905 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1906 }
1907 else if (uri->query) {
1908 segment =
1909 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1910 NULLCHK(segment)
1911 ret = xmlStrcat(ret, BAD_CAST "?");
1912 ret = xmlStrcat(ret, segment);
1913 xmlFree(segment);
1914 }
1915
1916 if (uri->opaque) {
1917 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1918 NULLCHK(segment)
1919 ret = xmlStrcat(ret, segment);
1920 xmlFree(segment);
1921 }
1922
1923 if (uri->fragment) {
1924 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1925 NULLCHK(segment)
1926 ret = xmlStrcat(ret, BAD_CAST "#");
1927 ret = xmlStrcat(ret, segment);
1928 xmlFree(segment);
1929 }
1930
1931 xmlFreeURI(uri);
1932 #undef NULLCHK
1933
1934 return (ret);
1935 }
1936
1937 /************************************************************************
1938 * *
1939 * Public functions *
1940 * *
1941 ************************************************************************/
1942
1943 /**
1944 * xmlBuildURI:
1945 * @URI: the URI instance found in the document
1946 * @base: the base value
1947 *
1948 * Computes he final URI of the reference done by checking that
1949 * the given URI is valid, and building the final URI using the
1950 * base URI. This is processed according to section 5.2 of the
1951 * RFC 2396
1952 *
1953 * 5.2. Resolving Relative References to Absolute Form
1954 *
1955 * Returns a new URI string (to be freed by the caller) or NULL in case
1956 * of error.
1957 */
1958 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1959 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1960 xmlChar *val = NULL;
1961 int ret, len, indx, cur, out;
1962 xmlURIPtr ref = NULL;
1963 xmlURIPtr bas = NULL;
1964 xmlURIPtr res = NULL;
1965
1966 /*
1967 * 1) The URI reference is parsed into the potential four components and
1968 * fragment identifier, as described in Section 4.3.
1969 *
1970 * NOTE that a completely empty URI is treated by modern browsers
1971 * as a reference to "." rather than as a synonym for the current
1972 * URI. Should we do that here?
1973 */
1974 if (URI == NULL)
1975 ret = -1;
1976 else {
1977 if (*URI) {
1978 ref = xmlCreateURI();
1979 if (ref == NULL)
1980 goto done;
1981 ret = xmlParseURIReference(ref, (const char *) URI);
1982 }
1983 else
1984 ret = 0;
1985 }
1986 if (ret != 0)
1987 goto done;
1988 if ((ref != NULL) && (ref->scheme != NULL)) {
1989 /*
1990 * The URI is absolute don't modify.
1991 */
1992 val = xmlStrdup(URI);
1993 goto done;
1994 }
1995 if (base == NULL)
1996 ret = -1;
1997 else {
1998 bas = xmlCreateURI();
1999 if (bas == NULL)
2000 goto done;
2001 ret = xmlParseURIReference(bas, (const char *) base);
2002 }
2003 if (ret != 0) {
2004 if (ref)
2005 val = xmlSaveUri(ref);
2006 goto done;
2007 }
2008 if (ref == NULL) {
2009 /*
2010 * the base fragment must be ignored
2011 */
2012 if (bas->fragment != NULL) {
2013 xmlFree(bas->fragment);
2014 bas->fragment = NULL;
2015 }
2016 val = xmlSaveUri(bas);
2017 goto done;
2018 }
2019
2020 /*
2021 * 2) If the path component is empty and the scheme, authority, and
2022 * query components are undefined, then it is a reference to the
2023 * current document and we are done. Otherwise, the reference URI's
2024 * query and fragment components are defined as found (or not found)
2025 * within the URI reference and not inherited from the base URI.
2026 *
2027 * NOTE that in modern browsers, the parsing differs from the above
2028 * in the following aspect: the query component is allowed to be
2029 * defined while still treating this as a reference to the current
2030 * document.
2031 */
2032 res = xmlCreateURI();
2033 if (res == NULL)
2034 goto done;
2035 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2036 ((ref->authority == NULL) && (ref->server == NULL))) {
2037 if (bas->scheme != NULL)
2038 res->scheme = xmlMemStrdup(bas->scheme);
2039 if (bas->authority != NULL)
2040 res->authority = xmlMemStrdup(bas->authority);
2041 else if (bas->server != NULL) {
2042 res->server = xmlMemStrdup(bas->server);
2043 if (bas->user != NULL)
2044 res->user = xmlMemStrdup(bas->user);
2045 res->port = bas->port;
2046 }
2047 if (bas->path != NULL)
2048 res->path = xmlMemStrdup(bas->path);
2049 if (ref->query_raw != NULL)
2050 res->query_raw = xmlMemStrdup (ref->query_raw);
2051 else if (ref->query != NULL)
2052 res->query = xmlMemStrdup(ref->query);
2053 else if (bas->query_raw != NULL)
2054 res->query_raw = xmlMemStrdup(bas->query_raw);
2055 else if (bas->query != NULL)
2056 res->query = xmlMemStrdup(bas->query);
2057 if (ref->fragment != NULL)
2058 res->fragment = xmlMemStrdup(ref->fragment);
2059 goto step_7;
2060 }
2061
2062 /*
2063 * 3) If the scheme component is defined, indicating that the reference
2064 * starts with a scheme name, then the reference is interpreted as an
2065 * absolute URI and we are done. Otherwise, the reference URI's
2066 * scheme is inherited from the base URI's scheme component.
2067 */
2068 if (ref->scheme != NULL) {
2069 val = xmlSaveUri(ref);
2070 goto done;
2071 }
2072 if (bas->scheme != NULL)
2073 res->scheme = xmlMemStrdup(bas->scheme);
2074
2075 if (ref->query_raw != NULL)
2076 res->query_raw = xmlMemStrdup(ref->query_raw);
2077 else if (ref->query != NULL)
2078 res->query = xmlMemStrdup(ref->query);
2079 if (ref->fragment != NULL)
2080 res->fragment = xmlMemStrdup(ref->fragment);
2081
2082 /*
2083 * 4) If the authority component is defined, then the reference is a
2084 * network-path and we skip to step 7. Otherwise, the reference
2085 * URI's authority is inherited from the base URI's authority
2086 * component, which will also be undefined if the URI scheme does not
2087 * use an authority component.
2088 */
2089 if ((ref->authority != NULL) || (ref->server != NULL)) {
2090 if (ref->authority != NULL)
2091 res->authority = xmlMemStrdup(ref->authority);
2092 else {
2093 res->server = xmlMemStrdup(ref->server);
2094 if (ref->user != NULL)
2095 res->user = xmlMemStrdup(ref->user);
2096 res->port = ref->port;
2097 }
2098 if (ref->path != NULL)
2099 res->path = xmlMemStrdup(ref->path);
2100 goto step_7;
2101 }
2102 if (bas->authority != NULL)
2103 res->authority = xmlMemStrdup(bas->authority);
2104 else if (bas->server != NULL) {
2105 res->server = xmlMemStrdup(bas->server);
2106 if (bas->user != NULL)
2107 res->user = xmlMemStrdup(bas->user);
2108 res->port = bas->port;
2109 }
2110
2111 /*
2112 * 5) If the path component begins with a slash character ("/"), then
2113 * the reference is an absolute-path and we skip to step 7.
2114 */
2115 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2116 res->path = xmlMemStrdup(ref->path);
2117 goto step_7;
2118 }
2119
2120
2121 /*
2122 * 6) If this step is reached, then we are resolving a relative-path
2123 * reference. The relative path needs to be merged with the base
2124 * URI's path. Although there are many ways to do this, we will
2125 * describe a simple method using a separate string buffer.
2126 *
2127 * Allocate a buffer large enough for the result string.
2128 */
2129 len = 2; /* extra / and 0 */
2130 if (ref->path != NULL)
2131 len += strlen(ref->path);
2132 if (bas->path != NULL)
2133 len += strlen(bas->path);
2134 res->path = (char *) xmlMallocAtomic(len);
2135 if (res->path == NULL) {
2136 xmlGenericError(xmlGenericErrorContext,
2137 "xmlBuildURI: out of memory\n");
2138 goto done;
2139 }
2140 res->path[0] = 0;
2141
2142 /*
2143 * a) All but the last segment of the base URI's path component is
2144 * copied to the buffer. In other words, any characters after the
2145 * last (right-most) slash character, if any, are excluded.
2146 */
2147 cur = 0;
2148 out = 0;
2149 if (bas->path != NULL) {
2150 while (bas->path[cur] != 0) {
2151 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2152 cur++;
2153 if (bas->path[cur] == 0)
2154 break;
2155
2156 cur++;
2157 while (out < cur) {
2158 res->path[out] = bas->path[out];
2159 out++;
2160 }
2161 }
2162 }
2163 res->path[out] = 0;
2164
2165 /*
2166 * b) The reference's path component is appended to the buffer
2167 * string.
2168 */
2169 if (ref->path != NULL && ref->path[0] != 0) {
2170 indx = 0;
2171 /*
2172 * Ensure the path includes a '/'
2173 */
2174 if ((out == 0) && (bas->server != NULL))
2175 res->path[out++] = '/';
2176 while (ref->path[indx] != 0) {
2177 res->path[out++] = ref->path[indx++];
2178 }
2179 }
2180 res->path[out] = 0;
2181
2182 /*
2183 * Steps c) to h) are really path normalization steps
2184 */
2185 xmlNormalizeURIPath(res->path);
2186
2187 step_7:
2188
2189 /*
2190 * 7) The resulting URI components, including any inherited from the
2191 * base URI, are recombined to give the absolute form of the URI
2192 * reference.
2193 */
2194 val = xmlSaveUri(res);
2195
2196 done:
2197 if (ref != NULL)
2198 xmlFreeURI(ref);
2199 if (bas != NULL)
2200 xmlFreeURI(bas);
2201 if (res != NULL)
2202 xmlFreeURI(res);
2203 return(val);
2204 }
2205
2206 /**
2207 * xmlBuildRelativeURI:
2208 * @URI: the URI reference under consideration
2209 * @base: the base value
2210 *
2211 * Expresses the URI of the reference in terms relative to the
2212 * base. Some examples of this operation include:
2213 * base = "http://site1.com/docs/book1.html"
2214 * URI input URI returned
2215 * docs/pic1.gif pic1.gif
2216 * docs/img/pic1.gif img/pic1.gif
2217 * img/pic1.gif ../img/pic1.gif
2218 * http://site1.com/docs/pic1.gif pic1.gif
2219 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2220 *
2221 * base = "docs/book1.html"
2222 * URI input URI returned
2223 * docs/pic1.gif pic1.gif
2224 * docs/img/pic1.gif img/pic1.gif
2225 * img/pic1.gif ../img/pic1.gif
2226 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2227 *
2228 *
2229 * Note: if the URI reference is really wierd or complicated, it may be
2230 * worthwhile to first convert it into a "nice" one by calling
2231 * xmlBuildURI (using 'base') before calling this routine,
2232 * since this routine (for reasonable efficiency) assumes URI has
2233 * already been through some validation.
2234 *
2235 * Returns a new URI string (to be freed by the caller) or NULL in case
2236 * error.
2237 */
2238 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2239 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2240 {
2241 xmlChar *val = NULL;
2242 int ret;
2243 int ix;
2244 int pos = 0;
2245 int nbslash = 0;
2246 int len;
2247 xmlURIPtr ref = NULL;
2248 xmlURIPtr bas = NULL;
2249 xmlChar *bptr, *uptr, *vptr;
2250 int remove_path = 0;
2251
2252 if ((URI == NULL) || (*URI == 0))
2253 return NULL;
2254
2255 /*
2256 * First parse URI into a standard form
2257 */
2258 ref = xmlCreateURI ();
2259 if (ref == NULL)
2260 return NULL;
2261 /* If URI not already in "relative" form */
2262 if (URI[0] != '.') {
2263 ret = xmlParseURIReference (ref, (const char *) URI);
2264 if (ret != 0)
2265 goto done; /* Error in URI, return NULL */
2266 } else
2267 ref->path = (char *)xmlStrdup(URI);
2268
2269 /*
2270 * Next parse base into the same standard form
2271 */
2272 if ((base == NULL) || (*base == 0)) {
2273 val = xmlStrdup (URI);
2274 goto done;
2275 }
2276 bas = xmlCreateURI ();
2277 if (bas == NULL)
2278 goto done;
2279 if (base[0] != '.') {
2280 ret = xmlParseURIReference (bas, (const char *) base);
2281 if (ret != 0)
2282 goto done; /* Error in base, return NULL */
2283 } else
2284 bas->path = (char *)xmlStrdup(base);
2285
2286 /*
2287 * If the scheme / server on the URI differs from the base,
2288 * just return the URI
2289 */
2290 if ((ref->scheme != NULL) &&
2291 ((bas->scheme == NULL) ||
2292 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2293 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2294 val = xmlStrdup (URI);
2295 goto done;
2296 }
2297 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2298 val = xmlStrdup(BAD_CAST "");
2299 goto done;
2300 }
2301 if (bas->path == NULL) {
2302 val = xmlStrdup((xmlChar *)ref->path);
2303 goto done;
2304 }
2305 if (ref->path == NULL) {
2306 ref->path = (char *) "/";
2307 remove_path = 1;
2308 }
2309
2310 /*
2311 * At this point (at last!) we can compare the two paths
2312 *
2313 * First we take care of the special case where either of the
2314 * two path components may be missing (bug 316224)
2315 */
2316 if (bas->path == NULL) {
2317 if (ref->path != NULL) {
2318 uptr = (xmlChar *) ref->path;
2319 if (*uptr == '/')
2320 uptr++;
2321 /* exception characters from xmlSaveUri */
2322 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2323 }
2324 goto done;
2325 }
2326 bptr = (xmlChar *)bas->path;
2327 if (ref->path == NULL) {
2328 for (ix = 0; bptr[ix] != 0; ix++) {
2329 if (bptr[ix] == '/')
2330 nbslash++;
2331 }
2332 uptr = NULL;
2333 len = 1; /* this is for a string terminator only */
2334 } else {
2335 /*
2336 * Next we compare the two strings and find where they first differ
2337 */
2338 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2339 pos += 2;
2340 if ((*bptr == '.') && (bptr[1] == '/'))
2341 bptr += 2;
2342 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2343 bptr++;
2344 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2345 pos++;
2346
2347 if (bptr[pos] == ref->path[pos]) {
2348 val = xmlStrdup(BAD_CAST "");
2349 goto done; /* (I can't imagine why anyone would do this) */
2350 }
2351
2352 /*
2353 * In URI, "back up" to the last '/' encountered. This will be the
2354 * beginning of the "unique" suffix of URI
2355 */
2356 ix = pos;
2357 if ((ref->path[ix] == '/') && (ix > 0))
2358 ix--;
2359 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2360 ix -= 2;
2361 for (; ix > 0; ix--) {
2362 if (ref->path[ix] == '/')
2363 break;
2364 }
2365 if (ix == 0) {
2366 uptr = (xmlChar *)ref->path;
2367 } else {
2368 ix++;
2369 uptr = (xmlChar *)&ref->path[ix];
2370 }
2371
2372 /*
2373 * In base, count the number of '/' from the differing point
2374 */
2375 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2376 for (; bptr[ix] != 0; ix++) {
2377 if (bptr[ix] == '/')
2378 nbslash++;
2379 }
2380 }
2381 len = xmlStrlen (uptr) + 1;
2382 }
2383
2384 if (nbslash == 0) {
2385 if (uptr != NULL)
2386 /* exception characters from xmlSaveUri */
2387 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2388 goto done;
2389 }
2390
2391 /*
2392 * Allocate just enough space for the returned string -
2393 * length of the remainder of the URI, plus enough space
2394 * for the "../" groups, plus one for the terminator
2395 */
2396 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2397 if (val == NULL) {
2398 xmlGenericError(xmlGenericErrorContext,
2399 "xmlBuildRelativeURI: out of memory\n");
2400 goto done;
2401 }
2402 vptr = val;
2403 /*
2404 * Put in as many "../" as needed
2405 */
2406 for (; nbslash>0; nbslash--) {
2407 *vptr++ = '.';
2408 *vptr++ = '.';
2409 *vptr++ = '/';
2410 }
2411 /*
2412 * Finish up with the end of the URI
2413 */
2414 if (uptr != NULL) {
2415 if ((vptr > val) && (len > 0) &&
2416 (uptr[0] == '/') && (vptr[-1] == '/')) {
2417 memcpy (vptr, uptr + 1, len - 1);
2418 vptr[len - 2] = 0;
2419 } else {
2420 memcpy (vptr, uptr, len);
2421 vptr[len - 1] = 0;
2422 }
2423 } else {
2424 vptr[len - 1] = 0;
2425 }
2426
2427 /* escape the freshly-built path */
2428 vptr = val;
2429 /* exception characters from xmlSaveUri */
2430 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2431 xmlFree(vptr);
2432
2433 done:
2434 /*
2435 * Free the working variables
2436 */
2437 if (remove_path != 0)
2438 ref->path = NULL;
2439 if (ref != NULL)
2440 xmlFreeURI (ref);
2441 if (bas != NULL)
2442 xmlFreeURI (bas);
2443
2444 return val;
2445 }
2446
2447 /**
2448 * xmlCanonicPath:
2449 * @path: the resource locator in a filesystem notation
2450 *
2451 * Constructs a canonic path from the specified path.
2452 *
2453 * Returns a new canonic path, or a duplicate of the path parameter if the
2454 * construction fails. The caller is responsible for freeing the memory occupied
2455 * by the returned string. If there is insufficient memory available, or the
2456 * argument is NULL, the function returns NULL.
2457 */
2458 #define IS_WINDOWS_PATH(p) \
2459 ((p != NULL) && \
2460 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2461 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2462 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2463 xmlChar *
xmlCanonicPath(const xmlChar * path)2464 xmlCanonicPath(const xmlChar *path)
2465 {
2466 /*
2467 * For Windows implementations, additional work needs to be done to
2468 * replace backslashes in pathnames with "forward slashes"
2469 */
2470 #if defined(_WIN32) && !defined(__CYGWIN__)
2471 int len = 0;
2472 int i = 0;
2473 xmlChar *p = NULL;
2474 #endif
2475 xmlURIPtr uri;
2476 xmlChar *ret;
2477 const xmlChar *absuri;
2478
2479 if (path == NULL)
2480 return(NULL);
2481
2482 /* sanitize filename starting with // so it can be used as URI */
2483 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2484 path++;
2485
2486 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2487 xmlFreeURI(uri);
2488 return xmlStrdup(path);
2489 }
2490
2491 /* Check if this is an "absolute uri" */
2492 absuri = xmlStrstr(path, BAD_CAST "://");
2493 if (absuri != NULL) {
2494 int l, j;
2495 unsigned char c;
2496 xmlChar *escURI;
2497
2498 /*
2499 * this looks like an URI where some parts have not been
2500 * escaped leading to a parsing problem. Check that the first
2501 * part matches a protocol.
2502 */
2503 l = absuri - path;
2504 /* Bypass if first part (part before the '://') is > 20 chars */
2505 if ((l <= 0) || (l > 20))
2506 goto path_processing;
2507 /* Bypass if any non-alpha characters are present in first part */
2508 for (j = 0;j < l;j++) {
2509 c = path[j];
2510 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2511 goto path_processing;
2512 }
2513
2514 /* Escape all except the characters specified in the supplied path */
2515 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2516 if (escURI != NULL) {
2517 /* Try parsing the escaped path */
2518 uri = xmlParseURI((const char *) escURI);
2519 /* If successful, return the escaped string */
2520 if (uri != NULL) {
2521 xmlFreeURI(uri);
2522 return escURI;
2523 }
2524 }
2525 }
2526
2527 path_processing:
2528 /* For Windows implementations, replace backslashes with 'forward slashes' */
2529 #if defined(_WIN32) && !defined(__CYGWIN__)
2530 /*
2531 * Create a URI structure
2532 */
2533 uri = xmlCreateURI();
2534 if (uri == NULL) { /* Guard against 'out of memory' */
2535 return(NULL);
2536 }
2537
2538 len = xmlStrlen(path);
2539 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2540 /* make the scheme 'file' */
2541 uri->scheme = xmlStrdup(BAD_CAST "file");
2542 /* allocate space for leading '/' + path + string terminator */
2543 uri->path = xmlMallocAtomic(len + 2);
2544 if (uri->path == NULL) {
2545 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2546 return(NULL);
2547 }
2548 /* Put in leading '/' plus path */
2549 uri->path[0] = '/';
2550 p = uri->path + 1;
2551 strncpy(p, path, len + 1);
2552 } else {
2553 uri->path = xmlStrdup(path);
2554 if (uri->path == NULL) {
2555 xmlFreeURI(uri);
2556 return(NULL);
2557 }
2558 p = uri->path;
2559 }
2560 /* Now change all occurences of '\' to '/' */
2561 while (*p != '\0') {
2562 if (*p == '\\')
2563 *p = '/';
2564 p++;
2565 }
2566
2567 if (uri->scheme == NULL) {
2568 ret = xmlStrdup((const xmlChar *) uri->path);
2569 } else {
2570 ret = xmlSaveUri(uri);
2571 }
2572
2573 xmlFreeURI(uri);
2574 #else
2575 ret = xmlStrdup((const xmlChar *) path);
2576 #endif
2577 return(ret);
2578 }
2579
2580 /**
2581 * xmlPathToURI:
2582 * @path: the resource locator in a filesystem notation
2583 *
2584 * Constructs an URI expressing the existing path
2585 *
2586 * Returns a new URI, or a duplicate of the path parameter if the
2587 * construction fails. The caller is responsible for freeing the memory
2588 * occupied by the returned string. If there is insufficient memory available,
2589 * or the argument is NULL, the function returns NULL.
2590 */
2591 xmlChar *
xmlPathToURI(const xmlChar * path)2592 xmlPathToURI(const xmlChar *path)
2593 {
2594 xmlURIPtr uri;
2595 xmlURI temp;
2596 xmlChar *ret, *cal;
2597
2598 if (path == NULL)
2599 return(NULL);
2600
2601 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2602 xmlFreeURI(uri);
2603 return xmlStrdup(path);
2604 }
2605 cal = xmlCanonicPath(path);
2606 if (cal == NULL)
2607 return(NULL);
2608 #if defined(_WIN32) && !defined(__CYGWIN__)
2609 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2610 If 'cal' is a valid URI allready then we are done here, as continuing would make
2611 it invalid. */
2612 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2613 xmlFreeURI(uri);
2614 return cal;
2615 }
2616 /* 'cal' can contain a relative path with backslashes. If that is processed
2617 by xmlSaveURI, they will be escaped and the external entity loader machinery
2618 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2619 ret = cal;
2620 while (*ret != '\0') {
2621 if (*ret == '\\')
2622 *ret = '/';
2623 ret++;
2624 }
2625 #endif
2626 memset(&temp, 0, sizeof(temp));
2627 temp.path = (char *) cal;
2628 ret = xmlSaveUri(&temp);
2629 xmlFree(cal);
2630 return(ret);
2631 }
2632 #define bottom_uri
2633 #include "elfgcchack.h"
2634