1 /**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11 #define IN_LIBXML
12 #include "libxml.h"
13
14 #include <string.h>
15
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20
21 static void xmlCleanURI(xmlURIPtr uri);
22
23 /*
24 * Old rule from 2396 used in legacy handling code
25 * alpha = lowalpha | upalpha
26 */
27 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28
29
30 /*
31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33 * "u" | "v" | "w" | "x" | "y" | "z"
34 */
35
36 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37
38 /*
39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41 * "U" | "V" | "W" | "X" | "Y" | "Z"
42 */
43 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44
45 #ifdef IS_DIGIT
46 #undef IS_DIGIT
47 #endif
48 /*
49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50 */
51 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52
53 /*
54 * alphanum = alpha | digit
55 */
56
57 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58
59 /*
60 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61 */
62
63 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
64 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
65 ((x) == '(') || ((x) == ')'))
66
67 /*
68 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69 */
70
71 #define IS_UNWISE(p) \
72 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
73 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
74 ((*(p) == ']')) || ((*(p) == '`')))
75 /*
76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77 * "[" | "]"
78 */
79
80 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83 ((x) == ']'))
84
85 /*
86 * unreserved = alphanum | mark
87 */
88
89 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90
91 /*
92 * Skip to next pointer char, handle escaped sequences
93 */
94
95 #define NEXT(p) ((*p == '%')? p += 3 : p++)
96
97 /*
98 * Productions from the spec.
99 *
100 * authority = server | reg_name
101 * reg_name = 1*( unreserved | escaped | "$" | "," |
102 * ";" | ":" | "@" | "&" | "=" | "+" )
103 *
104 * path = [ abs_path | opaque_part ]
105 */
106
107 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108
109 /************************************************************************
110 * *
111 * RFC 3986 parser *
112 * *
113 ************************************************************************/
114
115 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
117 ((*(p) >= 'A') && (*(p) <= 'Z')))
118 #define ISA_HEXDIG(p) \
119 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
120 ((*(p) >= 'A') && (*(p) <= 'F')))
121
122 /*
123 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
124 * / "*" / "+" / "," / ";" / "="
125 */
126 #define ISA_SUB_DELIM(p) \
127 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
128 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
129 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
130 ((*(p) == '=')))
131
132 /*
133 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134 */
135 #define ISA_GEN_DELIM(p) \
136 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
137 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
138 ((*(p) == '@')))
139
140 /*
141 * reserved = gen-delims / sub-delims
142 */
143 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144
145 /*
146 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
147 */
148 #define ISA_UNRESERVED(p) \
149 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
150 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151
152 /*
153 * pct-encoded = "%" HEXDIG HEXDIG
154 */
155 #define ISA_PCT_ENCODED(p) \
156 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157
158 /*
159 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
160 */
161 #define ISA_PCHAR(p) \
162 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
163 ((*(p) == ':')) || ((*(p) == '@')))
164
165 /**
166 * xmlParse3986Scheme:
167 * @uri: pointer to an URI structure
168 * @str: pointer to the string to analyze
169 *
170 * Parse an URI scheme
171 *
172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173 *
174 * Returns 0 or the error code
175 */
176 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)177 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178 const char *cur;
179
180 if (str == NULL)
181 return(-1);
182
183 cur = *str;
184 if (!ISA_ALPHA(cur))
185 return(2);
186 cur++;
187 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189 if (uri != NULL) {
190 if (uri->scheme != NULL) xmlFree(uri->scheme);
191 uri->scheme = STRNDUP(*str, cur - *str);
192 }
193 *str = cur;
194 return(0);
195 }
196
197 /**
198 * xmlParse3986Fragment:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse the query part of an URI
203 *
204 * fragment = *( pchar / "/" / "?" )
205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206 * in the fragment identifier but this is used very broadly for
207 * xpointer scheme selection, so we are allowing it here to not break
208 * for example all the DocBook processing chains.
209 *
210 * Returns 0 or the error code
211 */
212 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)213 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214 {
215 const char *cur;
216
217 if (str == NULL)
218 return (-1);
219
220 cur = *str;
221
222 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223 (*cur == '[') || (*cur == ']') ||
224 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225 NEXT(cur);
226 if (uri != NULL) {
227 if (uri->fragment != NULL)
228 xmlFree(uri->fragment);
229 if (uri->cleanup & 2)
230 uri->fragment = STRNDUP(*str, cur - *str);
231 else
232 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233 }
234 *str = cur;
235 return (0);
236 }
237
238 /**
239 * xmlParse3986Query:
240 * @uri: pointer to an URI structure
241 * @str: pointer to the string to analyze
242 *
243 * Parse the query part of an URI
244 *
245 * query = *uric
246 *
247 * Returns 0 or the error code
248 */
249 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)250 xmlParse3986Query(xmlURIPtr uri, const char **str)
251 {
252 const char *cur;
253
254 if (str == NULL)
255 return (-1);
256
257 cur = *str;
258
259 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261 NEXT(cur);
262 if (uri != NULL) {
263 if (uri->query != NULL)
264 xmlFree(uri->query);
265 if (uri->cleanup & 2)
266 uri->query = STRNDUP(*str, cur - *str);
267 else
268 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269
270 /* Save the raw bytes of the query as well.
271 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272 */
273 if (uri->query_raw != NULL)
274 xmlFree (uri->query_raw);
275 uri->query_raw = STRNDUP (*str, cur - *str);
276 }
277 *str = cur;
278 return (0);
279 }
280
281 /**
282 * xmlParse3986Port:
283 * @uri: pointer to an URI structure
284 * @str: the string to analyze
285 *
286 * Parse a port part and fills in the appropriate fields
287 * of the @uri structure
288 *
289 * port = *DIGIT
290 *
291 * Returns 0 or the error code
292 */
293 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)294 xmlParse3986Port(xmlURIPtr uri, const char **str)
295 {
296 const char *cur = *str;
297
298 if (ISA_DIGIT(cur)) {
299 if (uri != NULL)
300 uri->port = 0;
301 while (ISA_DIGIT(cur)) {
302 if (uri != NULL)
303 uri->port = uri->port * 10 + (*cur - '0');
304 cur++;
305 }
306 *str = cur;
307 return(0);
308 }
309 return(1);
310 }
311
312 /**
313 * xmlParse3986Userinfo:
314 * @uri: pointer to an URI structure
315 * @str: the string to analyze
316 *
317 * Parse an user informations part and fills in the appropriate fields
318 * of the @uri structure
319 *
320 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
321 *
322 * Returns 0 or the error code
323 */
324 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)325 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326 {
327 const char *cur;
328
329 cur = *str;
330 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331 ISA_SUB_DELIM(cur) || (*cur == ':'))
332 NEXT(cur);
333 if (*cur == '@') {
334 if (uri != NULL) {
335 if (uri->user != NULL) xmlFree(uri->user);
336 if (uri->cleanup & 2)
337 uri->user = STRNDUP(*str, cur - *str);
338 else
339 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340 }
341 *str = cur;
342 return(0);
343 }
344 return(1);
345 }
346
347 /**
348 * xmlParse3986DecOctet:
349 * @str: the string to analyze
350 *
351 * dec-octet = DIGIT ; 0-9
352 * / %x31-39 DIGIT ; 10-99
353 * / "1" 2DIGIT ; 100-199
354 * / "2" %x30-34 DIGIT ; 200-249
355 * / "25" %x30-35 ; 250-255
356 *
357 * Skip a dec-octet.
358 *
359 * Returns 0 if found and skipped, 1 otherwise
360 */
361 static int
xmlParse3986DecOctet(const char ** str)362 xmlParse3986DecOctet(const char **str) {
363 const char *cur = *str;
364
365 if (!(ISA_DIGIT(cur)))
366 return(1);
367 if (!ISA_DIGIT(cur+1))
368 cur++;
369 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370 cur += 2;
371 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372 cur += 3;
373 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375 cur += 3;
376 else if ((*cur == '2') && (*(cur + 1) == '5') &&
377 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378 cur += 3;
379 else
380 return(1);
381 *str = cur;
382 return(0);
383 }
384 /**
385 * xmlParse3986Host:
386 * @uri: pointer to an URI structure
387 * @str: the string to analyze
388 *
389 * Parse an host part and fills in the appropriate fields
390 * of the @uri structure
391 *
392 * host = IP-literal / IPv4address / reg-name
393 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
394 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
395 * reg-name = *( unreserved / pct-encoded / sub-delims )
396 *
397 * Returns 0 or the error code
398 */
399 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)400 xmlParse3986Host(xmlURIPtr uri, const char **str)
401 {
402 const char *cur = *str;
403 const char *host;
404
405 host = cur;
406 /*
407 * IPv6 and future adressing scheme are enclosed between brackets
408 */
409 if (*cur == '[') {
410 cur++;
411 while ((*cur != ']') && (*cur != 0))
412 cur++;
413 if (*cur != ']')
414 return(1);
415 cur++;
416 goto found;
417 }
418 /*
419 * try to parse an IPv4
420 */
421 if (ISA_DIGIT(cur)) {
422 if (xmlParse3986DecOctet(&cur) != 0)
423 goto not_ipv4;
424 if (*cur != '.')
425 goto not_ipv4;
426 cur++;
427 if (xmlParse3986DecOctet(&cur) != 0)
428 goto not_ipv4;
429 if (*cur != '.')
430 goto not_ipv4;
431 if (xmlParse3986DecOctet(&cur) != 0)
432 goto not_ipv4;
433 if (*cur != '.')
434 goto not_ipv4;
435 if (xmlParse3986DecOctet(&cur) != 0)
436 goto not_ipv4;
437 goto found;
438 not_ipv4:
439 cur = *str;
440 }
441 /*
442 * then this should be a hostname which can be empty
443 */
444 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445 NEXT(cur);
446 found:
447 if (uri != NULL) {
448 if (uri->authority != NULL) xmlFree(uri->authority);
449 uri->authority = NULL;
450 if (uri->server != NULL) xmlFree(uri->server);
451 if (cur != host) {
452 if (uri->cleanup & 2)
453 uri->server = STRNDUP(host, cur - host);
454 else
455 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456 } else
457 uri->server = NULL;
458 }
459 *str = cur;
460 return(0);
461 }
462
463 /**
464 * xmlParse3986Authority:
465 * @uri: pointer to an URI structure
466 * @str: the string to analyze
467 *
468 * Parse an authority part and fills in the appropriate fields
469 * of the @uri structure
470 *
471 * authority = [ userinfo "@" ] host [ ":" port ]
472 *
473 * Returns 0 or the error code
474 */
475 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)476 xmlParse3986Authority(xmlURIPtr uri, const char **str)
477 {
478 const char *cur;
479 int ret;
480
481 cur = *str;
482 /*
483 * try to parse an userinfo and check for the trailing @
484 */
485 ret = xmlParse3986Userinfo(uri, &cur);
486 if ((ret != 0) || (*cur != '@'))
487 cur = *str;
488 else
489 cur++;
490 ret = xmlParse3986Host(uri, &cur);
491 if (ret != 0) return(ret);
492 if (*cur == ':') {
493 cur++;
494 ret = xmlParse3986Port(uri, &cur);
495 if (ret != 0) return(ret);
496 }
497 *str = cur;
498 return(0);
499 }
500
501 /**
502 * xmlParse3986Segment:
503 * @str: the string to analyze
504 * @forbid: an optional forbidden character
505 * @empty: allow an empty segment
506 *
507 * Parse a segment and fills in the appropriate fields
508 * of the @uri structure
509 *
510 * segment = *pchar
511 * segment-nz = 1*pchar
512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513 * ; non-zero-length segment without any colon ":"
514 *
515 * Returns 0 or the error code
516 */
517 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)518 xmlParse3986Segment(const char **str, char forbid, int empty)
519 {
520 const char *cur;
521
522 cur = *str;
523 if (!ISA_PCHAR(cur)) {
524 if (empty)
525 return(0);
526 return(1);
527 }
528 while (ISA_PCHAR(cur) && (*cur != forbid))
529 NEXT(cur);
530 *str = cur;
531 return (0);
532 }
533
534 /**
535 * xmlParse3986PathAbEmpty:
536 * @uri: pointer to an URI structure
537 * @str: the string to analyze
538 *
539 * Parse an path absolute or empty and fills in the appropriate fields
540 * of the @uri structure
541 *
542 * path-abempty = *( "/" segment )
543 *
544 * Returns 0 or the error code
545 */
546 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)547 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548 {
549 const char *cur;
550 int ret;
551
552 cur = *str;
553
554 while (*cur == '/') {
555 cur++;
556 ret = xmlParse3986Segment(&cur, 0, 1);
557 if (ret != 0) return(ret);
558 }
559 if (uri != NULL) {
560 if (uri->path != NULL) xmlFree(uri->path);
561 if (uri->cleanup & 2)
562 uri->path = STRNDUP(*str, cur - *str);
563 else
564 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
565 }
566 *str = cur;
567 return (0);
568 }
569
570 /**
571 * xmlParse3986PathAbsolute:
572 * @uri: pointer to an URI structure
573 * @str: the string to analyze
574 *
575 * Parse an path absolute and fills in the appropriate fields
576 * of the @uri structure
577 *
578 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
579 *
580 * Returns 0 or the error code
581 */
582 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)583 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
584 {
585 const char *cur;
586 int ret;
587
588 cur = *str;
589
590 if (*cur != '/')
591 return(1);
592 cur++;
593 ret = xmlParse3986Segment(&cur, 0, 0);
594 if (ret == 0) {
595 while (*cur == '/') {
596 cur++;
597 ret = xmlParse3986Segment(&cur, 0, 1);
598 if (ret != 0) return(ret);
599 }
600 }
601 if (uri != NULL) {
602 if (uri->path != NULL) xmlFree(uri->path);
603 if (uri->cleanup & 2)
604 uri->path = STRNDUP(*str, cur - *str);
605 else
606 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
607 }
608 *str = cur;
609 return (0);
610 }
611
612 /**
613 * xmlParse3986PathRootless:
614 * @uri: pointer to an URI structure
615 * @str: the string to analyze
616 *
617 * Parse an path without root and fills in the appropriate fields
618 * of the @uri structure
619 *
620 * path-rootless = segment-nz *( "/" segment )
621 *
622 * Returns 0 or the error code
623 */
624 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)625 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
626 {
627 const char *cur;
628 int ret;
629
630 cur = *str;
631
632 ret = xmlParse3986Segment(&cur, 0, 0);
633 if (ret != 0) return(ret);
634 while (*cur == '/') {
635 cur++;
636 ret = xmlParse3986Segment(&cur, 0, 1);
637 if (ret != 0) return(ret);
638 }
639 if (uri != NULL) {
640 if (uri->path != NULL) xmlFree(uri->path);
641 if (uri->cleanup & 2)
642 uri->path = STRNDUP(*str, cur - *str);
643 else
644 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
645 }
646 *str = cur;
647 return (0);
648 }
649
650 /**
651 * xmlParse3986PathNoScheme:
652 * @uri: pointer to an URI structure
653 * @str: the string to analyze
654 *
655 * Parse an path which is not a scheme and fills in the appropriate fields
656 * of the @uri structure
657 *
658 * path-noscheme = segment-nz-nc *( "/" segment )
659 *
660 * Returns 0 or the error code
661 */
662 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)663 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
664 {
665 const char *cur;
666 int ret;
667
668 cur = *str;
669
670 ret = xmlParse3986Segment(&cur, ':', 0);
671 if (ret != 0) return(ret);
672 while (*cur == '/') {
673 cur++;
674 ret = xmlParse3986Segment(&cur, 0, 1);
675 if (ret != 0) return(ret);
676 }
677 if (uri != NULL) {
678 if (uri->path != NULL) xmlFree(uri->path);
679 if (uri->cleanup & 2)
680 uri->path = STRNDUP(*str, cur - *str);
681 else
682 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
683 }
684 *str = cur;
685 return (0);
686 }
687
688 /**
689 * xmlParse3986HierPart:
690 * @uri: pointer to an URI structure
691 * @str: the string to analyze
692 *
693 * Parse an hierarchical part and fills in the appropriate fields
694 * of the @uri structure
695 *
696 * hier-part = "//" authority path-abempty
697 * / path-absolute
698 * / path-rootless
699 * / path-empty
700 *
701 * Returns 0 or the error code
702 */
703 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)704 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
705 {
706 const char *cur;
707 int ret;
708
709 cur = *str;
710
711 if ((*cur == '/') && (*(cur + 1) == '/')) {
712 cur += 2;
713 ret = xmlParse3986Authority(uri, &cur);
714 if (ret != 0) return(ret);
715 ret = xmlParse3986PathAbEmpty(uri, &cur);
716 if (ret != 0) return(ret);
717 *str = cur;
718 return(0);
719 } else if (*cur == '/') {
720 ret = xmlParse3986PathAbsolute(uri, &cur);
721 if (ret != 0) return(ret);
722 } else if (ISA_PCHAR(cur)) {
723 ret = xmlParse3986PathRootless(uri, &cur);
724 if (ret != 0) return(ret);
725 } else {
726 /* path-empty is effectively empty */
727 if (uri != NULL) {
728 if (uri->path != NULL) xmlFree(uri->path);
729 uri->path = NULL;
730 }
731 }
732 *str = cur;
733 return (0);
734 }
735
736 /**
737 * xmlParse3986RelativeRef:
738 * @uri: pointer to an URI structure
739 * @str: the string to analyze
740 *
741 * Parse an URI string and fills in the appropriate fields
742 * of the @uri structure
743 *
744 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
745 * relative-part = "//" authority path-abempty
746 * / path-absolute
747 * / path-noscheme
748 * / path-empty
749 *
750 * Returns 0 or the error code
751 */
752 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)753 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
754 int ret;
755
756 if ((*str == '/') && (*(str + 1) == '/')) {
757 str += 2;
758 ret = xmlParse3986Authority(uri, &str);
759 if (ret != 0) return(ret);
760 ret = xmlParse3986PathAbEmpty(uri, &str);
761 if (ret != 0) return(ret);
762 } else if (*str == '/') {
763 ret = xmlParse3986PathAbsolute(uri, &str);
764 if (ret != 0) return(ret);
765 } else if (ISA_PCHAR(str)) {
766 ret = xmlParse3986PathNoScheme(uri, &str);
767 if (ret != 0) return(ret);
768 } else {
769 /* path-empty is effectively empty */
770 if (uri != NULL) {
771 if (uri->path != NULL) xmlFree(uri->path);
772 uri->path = NULL;
773 }
774 }
775
776 if (*str == '?') {
777 str++;
778 ret = xmlParse3986Query(uri, &str);
779 if (ret != 0) return(ret);
780 }
781 if (*str == '#') {
782 str++;
783 ret = xmlParse3986Fragment(uri, &str);
784 if (ret != 0) return(ret);
785 }
786 if (*str != 0) {
787 xmlCleanURI(uri);
788 return(1);
789 }
790 return(0);
791 }
792
793
794 /**
795 * xmlParse3986URI:
796 * @uri: pointer to an URI structure
797 * @str: the string to analyze
798 *
799 * Parse an URI string and fills in the appropriate fields
800 * of the @uri structure
801 *
802 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
803 *
804 * Returns 0 or the error code
805 */
806 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)807 xmlParse3986URI(xmlURIPtr uri, const char *str) {
808 int ret;
809
810 ret = xmlParse3986Scheme(uri, &str);
811 if (ret != 0) return(ret);
812 if (*str != ':') {
813 return(1);
814 }
815 str++;
816 ret = xmlParse3986HierPart(uri, &str);
817 if (ret != 0) return(ret);
818 if (*str == '?') {
819 str++;
820 ret = xmlParse3986Query(uri, &str);
821 if (ret != 0) return(ret);
822 }
823 if (*str == '#') {
824 str++;
825 ret = xmlParse3986Fragment(uri, &str);
826 if (ret != 0) return(ret);
827 }
828 if (*str != 0) {
829 xmlCleanURI(uri);
830 return(1);
831 }
832 return(0);
833 }
834
835 /**
836 * xmlParse3986URIReference:
837 * @uri: pointer to an URI structure
838 * @str: the string to analyze
839 *
840 * Parse an URI reference string and fills in the appropriate fields
841 * of the @uri structure
842 *
843 * URI-reference = URI / relative-ref
844 *
845 * Returns 0 or the error code
846 */
847 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)848 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
849 int ret;
850
851 if (str == NULL)
852 return(-1);
853 xmlCleanURI(uri);
854
855 /*
856 * Try first to parse absolute refs, then fallback to relative if
857 * it fails.
858 */
859 ret = xmlParse3986URI(uri, str);
860 if (ret != 0) {
861 xmlCleanURI(uri);
862 ret = xmlParse3986RelativeRef(uri, str);
863 if (ret != 0) {
864 xmlCleanURI(uri);
865 return(ret);
866 }
867 }
868 return(0);
869 }
870
871 /**
872 * xmlParseURI:
873 * @str: the URI string to analyze
874 *
875 * Parse an URI based on RFC 3986
876 *
877 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
878 *
879 * Returns a newly built xmlURIPtr or NULL in case of error
880 */
881 xmlURIPtr
xmlParseURI(const char * str)882 xmlParseURI(const char *str) {
883 xmlURIPtr uri;
884 int ret;
885
886 if (str == NULL)
887 return(NULL);
888 uri = xmlCreateURI();
889 if (uri != NULL) {
890 ret = xmlParse3986URIReference(uri, str);
891 if (ret) {
892 xmlFreeURI(uri);
893 return(NULL);
894 }
895 }
896 return(uri);
897 }
898
899 /**
900 * xmlParseURIReference:
901 * @uri: pointer to an URI structure
902 * @str: the string to analyze
903 *
904 * Parse an URI reference string based on RFC 3986 and fills in the
905 * appropriate fields of the @uri structure
906 *
907 * URI-reference = URI / relative-ref
908 *
909 * Returns 0 or the error code
910 */
911 int
xmlParseURIReference(xmlURIPtr uri,const char * str)912 xmlParseURIReference(xmlURIPtr uri, const char *str) {
913 return(xmlParse3986URIReference(uri, str));
914 }
915
916 /**
917 * xmlParseURIRaw:
918 * @str: the URI string to analyze
919 * @raw: if 1 unescaping of URI pieces are disabled
920 *
921 * Parse an URI but allows to keep intact the original fragments.
922 *
923 * URI-reference = URI / relative-ref
924 *
925 * Returns a newly built xmlURIPtr or NULL in case of error
926 */
927 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)928 xmlParseURIRaw(const char *str, int raw) {
929 xmlURIPtr uri;
930 int ret;
931
932 if (str == NULL)
933 return(NULL);
934 uri = xmlCreateURI();
935 if (uri != NULL) {
936 if (raw) {
937 uri->cleanup |= 2;
938 }
939 ret = xmlParseURIReference(uri, str);
940 if (ret) {
941 xmlFreeURI(uri);
942 return(NULL);
943 }
944 }
945 return(uri);
946 }
947
948 /************************************************************************
949 * *
950 * Generic URI structure functions *
951 * *
952 ************************************************************************/
953
954 /**
955 * xmlCreateURI:
956 *
957 * Simply creates an empty xmlURI
958 *
959 * Returns the new structure or NULL in case of error
960 */
961 xmlURIPtr
xmlCreateURI(void)962 xmlCreateURI(void) {
963 xmlURIPtr ret;
964
965 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
966 if (ret == NULL) {
967 xmlGenericError(xmlGenericErrorContext,
968 "xmlCreateURI: out of memory\n");
969 return(NULL);
970 }
971 memset(ret, 0, sizeof(xmlURI));
972 return(ret);
973 }
974
975 /**
976 * xmlSaveUri:
977 * @uri: pointer to an xmlURI
978 *
979 * Save the URI as an escaped string
980 *
981 * Returns a new string (to be deallocated by caller)
982 */
983 xmlChar *
xmlSaveUri(xmlURIPtr uri)984 xmlSaveUri(xmlURIPtr uri) {
985 xmlChar *ret = NULL;
986 xmlChar *temp;
987 const char *p;
988 int len;
989 int max;
990
991 if (uri == NULL) return(NULL);
992
993
994 max = 80;
995 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
996 if (ret == NULL) {
997 xmlGenericError(xmlGenericErrorContext,
998 "xmlSaveUri: out of memory\n");
999 return(NULL);
1000 }
1001 len = 0;
1002
1003 if (uri->scheme != NULL) {
1004 p = uri->scheme;
1005 while (*p != 0) {
1006 if (len >= max) {
1007 max *= 2;
1008 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1009 if (temp == NULL) {
1010 xmlGenericError(xmlGenericErrorContext,
1011 "xmlSaveUri: out of memory\n");
1012 xmlFree(ret);
1013 return(NULL);
1014 }
1015 ret = temp;
1016 }
1017 ret[len++] = *p++;
1018 }
1019 if (len >= max) {
1020 max *= 2;
1021 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1022 if (temp == NULL) {
1023 xmlGenericError(xmlGenericErrorContext,
1024 "xmlSaveUri: out of memory\n");
1025 xmlFree(ret);
1026 return(NULL);
1027 }
1028 ret = temp;
1029 }
1030 ret[len++] = ':';
1031 }
1032 if (uri->opaque != NULL) {
1033 p = uri->opaque;
1034 while (*p != 0) {
1035 if (len + 3 >= max) {
1036 max *= 2;
1037 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038 if (temp == NULL) {
1039 xmlGenericError(xmlGenericErrorContext,
1040 "xmlSaveUri: out of memory\n");
1041 xmlFree(ret);
1042 return(NULL);
1043 }
1044 ret = temp;
1045 }
1046 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1047 ret[len++] = *p++;
1048 else {
1049 int val = *(unsigned char *)p++;
1050 int hi = val / 0x10, lo = val % 0x10;
1051 ret[len++] = '%';
1052 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1053 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1054 }
1055 }
1056 } else {
1057 if (uri->server != NULL) {
1058 if (len + 3 >= max) {
1059 max *= 2;
1060 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1061 if (temp == NULL) {
1062 xmlGenericError(xmlGenericErrorContext,
1063 "xmlSaveUri: out of memory\n");
1064 xmlFree(ret);
1065 return(NULL);
1066 }
1067 ret = temp;
1068 }
1069 ret[len++] = '/';
1070 ret[len++] = '/';
1071 if (uri->user != NULL) {
1072 p = uri->user;
1073 while (*p != 0) {
1074 if (len + 3 >= max) {
1075 max *= 2;
1076 temp = (xmlChar *) xmlRealloc(ret,
1077 (max + 1) * sizeof(xmlChar));
1078 if (temp == NULL) {
1079 xmlGenericError(xmlGenericErrorContext,
1080 "xmlSaveUri: out of memory\n");
1081 xmlFree(ret);
1082 return(NULL);
1083 }
1084 ret = temp;
1085 }
1086 if ((IS_UNRESERVED(*(p))) ||
1087 ((*(p) == ';')) || ((*(p) == ':')) ||
1088 ((*(p) == '&')) || ((*(p) == '=')) ||
1089 ((*(p) == '+')) || ((*(p) == '$')) ||
1090 ((*(p) == ',')))
1091 ret[len++] = *p++;
1092 else {
1093 int val = *(unsigned char *)p++;
1094 int hi = val / 0x10, lo = val % 0x10;
1095 ret[len++] = '%';
1096 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1097 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1098 }
1099 }
1100 if (len + 3 >= max) {
1101 max *= 2;
1102 temp = (xmlChar *) xmlRealloc(ret,
1103 (max + 1) * sizeof(xmlChar));
1104 if (temp == NULL) {
1105 xmlGenericError(xmlGenericErrorContext,
1106 "xmlSaveUri: out of memory\n");
1107 xmlFree(ret);
1108 return(NULL);
1109 }
1110 ret = temp;
1111 }
1112 ret[len++] = '@';
1113 }
1114 p = uri->server;
1115 while (*p != 0) {
1116 if (len >= max) {
1117 max *= 2;
1118 temp = (xmlChar *) xmlRealloc(ret,
1119 (max + 1) * sizeof(xmlChar));
1120 if (temp == NULL) {
1121 xmlGenericError(xmlGenericErrorContext,
1122 "xmlSaveUri: out of memory\n");
1123 xmlFree(ret);
1124 return(NULL);
1125 }
1126 ret = temp;
1127 }
1128 ret[len++] = *p++;
1129 }
1130 if (uri->port > 0) {
1131 if (len + 10 >= max) {
1132 max *= 2;
1133 temp = (xmlChar *) xmlRealloc(ret,
1134 (max + 1) * sizeof(xmlChar));
1135 if (temp == NULL) {
1136 xmlGenericError(xmlGenericErrorContext,
1137 "xmlSaveUri: out of memory\n");
1138 xmlFree(ret);
1139 return(NULL);
1140 }
1141 ret = temp;
1142 }
1143 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1144 }
1145 } else if (uri->authority != NULL) {
1146 if (len + 3 >= max) {
1147 max *= 2;
1148 temp = (xmlChar *) xmlRealloc(ret,
1149 (max + 1) * sizeof(xmlChar));
1150 if (temp == NULL) {
1151 xmlGenericError(xmlGenericErrorContext,
1152 "xmlSaveUri: out of memory\n");
1153 xmlFree(ret);
1154 return(NULL);
1155 }
1156 ret = temp;
1157 }
1158 ret[len++] = '/';
1159 ret[len++] = '/';
1160 p = uri->authority;
1161 while (*p != 0) {
1162 if (len + 3 >= max) {
1163 max *= 2;
1164 temp = (xmlChar *) xmlRealloc(ret,
1165 (max + 1) * sizeof(xmlChar));
1166 if (temp == NULL) {
1167 xmlGenericError(xmlGenericErrorContext,
1168 "xmlSaveUri: out of memory\n");
1169 xmlFree(ret);
1170 return(NULL);
1171 }
1172 ret = temp;
1173 }
1174 if ((IS_UNRESERVED(*(p))) ||
1175 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1176 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1177 ((*(p) == '=')) || ((*(p) == '+')))
1178 ret[len++] = *p++;
1179 else {
1180 int val = *(unsigned char *)p++;
1181 int hi = val / 0x10, lo = val % 0x10;
1182 ret[len++] = '%';
1183 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1184 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1185 }
1186 }
1187 } else if (uri->scheme != NULL) {
1188 if (len + 3 >= max) {
1189 max *= 2;
1190 temp = (xmlChar *) xmlRealloc(ret,
1191 (max + 1) * sizeof(xmlChar));
1192 if (temp == NULL) {
1193 xmlGenericError(xmlGenericErrorContext,
1194 "xmlSaveUri: out of memory\n");
1195 xmlFree(ret);
1196 return(NULL);
1197 }
1198 ret = temp;
1199 }
1200 ret[len++] = '/';
1201 ret[len++] = '/';
1202 }
1203 if (uri->path != NULL) {
1204 p = uri->path;
1205 /*
1206 * the colon in file:///d: should not be escaped or
1207 * Windows accesses fail later.
1208 */
1209 if ((uri->scheme != NULL) &&
1210 (p[0] == '/') &&
1211 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213 (p[2] == ':') &&
1214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215 if (len + 3 >= max) {
1216 max *= 2;
1217 ret = (xmlChar *) xmlRealloc(ret,
1218 (max + 1) * sizeof(xmlChar));
1219 if (ret == NULL) {
1220 xmlGenericError(xmlGenericErrorContext,
1221 "xmlSaveUri: out of memory\n");
1222 return(NULL);
1223 }
1224 }
1225 ret[len++] = *p++;
1226 ret[len++] = *p++;
1227 ret[len++] = *p++;
1228 }
1229 while (*p != 0) {
1230 if (len + 3 >= max) {
1231 max *= 2;
1232 temp = (xmlChar *) xmlRealloc(ret,
1233 (max + 1) * sizeof(xmlChar));
1234 if (temp == NULL) {
1235 xmlGenericError(xmlGenericErrorContext,
1236 "xmlSaveUri: out of memory\n");
1237 xmlFree(ret);
1238 return(NULL);
1239 }
1240 ret = temp;
1241 }
1242 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1243 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1244 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1245 ((*(p) == ',')))
1246 ret[len++] = *p++;
1247 else {
1248 int val = *(unsigned char *)p++;
1249 int hi = val / 0x10, lo = val % 0x10;
1250 ret[len++] = '%';
1251 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1252 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1253 }
1254 }
1255 }
1256 if (uri->query_raw != NULL) {
1257 if (len + 1 >= max) {
1258 max *= 2;
1259 temp = (xmlChar *) xmlRealloc(ret,
1260 (max + 1) * sizeof(xmlChar));
1261 if (temp == NULL) {
1262 xmlGenericError(xmlGenericErrorContext,
1263 "xmlSaveUri: out of memory\n");
1264 xmlFree(ret);
1265 return(NULL);
1266 }
1267 ret = temp;
1268 }
1269 ret[len++] = '?';
1270 p = uri->query_raw;
1271 while (*p != 0) {
1272 if (len + 1 >= max) {
1273 max *= 2;
1274 temp = (xmlChar *) xmlRealloc(ret,
1275 (max + 1) * sizeof(xmlChar));
1276 if (temp == NULL) {
1277 xmlGenericError(xmlGenericErrorContext,
1278 "xmlSaveUri: out of memory\n");
1279 xmlFree(ret);
1280 return(NULL);
1281 }
1282 ret = temp;
1283 }
1284 ret[len++] = *p++;
1285 }
1286 } else if (uri->query != NULL) {
1287 if (len + 3 >= max) {
1288 max *= 2;
1289 temp = (xmlChar *) xmlRealloc(ret,
1290 (max + 1) * sizeof(xmlChar));
1291 if (temp == NULL) {
1292 xmlGenericError(xmlGenericErrorContext,
1293 "xmlSaveUri: out of memory\n");
1294 xmlFree(ret);
1295 return(NULL);
1296 }
1297 ret = temp;
1298 }
1299 ret[len++] = '?';
1300 p = uri->query;
1301 while (*p != 0) {
1302 if (len + 3 >= max) {
1303 max *= 2;
1304 temp = (xmlChar *) xmlRealloc(ret,
1305 (max + 1) * sizeof(xmlChar));
1306 if (temp == NULL) {
1307 xmlGenericError(xmlGenericErrorContext,
1308 "xmlSaveUri: out of memory\n");
1309 xmlFree(ret);
1310 return(NULL);
1311 }
1312 ret = temp;
1313 }
1314 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1315 ret[len++] = *p++;
1316 else {
1317 int val = *(unsigned char *)p++;
1318 int hi = val / 0x10, lo = val % 0x10;
1319 ret[len++] = '%';
1320 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1321 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1322 }
1323 }
1324 }
1325 }
1326 if (uri->fragment != NULL) {
1327 if (len + 3 >= max) {
1328 max *= 2;
1329 temp = (xmlChar *) xmlRealloc(ret,
1330 (max + 1) * sizeof(xmlChar));
1331 if (temp == NULL) {
1332 xmlGenericError(xmlGenericErrorContext,
1333 "xmlSaveUri: out of memory\n");
1334 xmlFree(ret);
1335 return(NULL);
1336 }
1337 ret = temp;
1338 }
1339 ret[len++] = '#';
1340 p = uri->fragment;
1341 while (*p != 0) {
1342 if (len + 3 >= max) {
1343 max *= 2;
1344 temp = (xmlChar *) xmlRealloc(ret,
1345 (max + 1) * sizeof(xmlChar));
1346 if (temp == NULL) {
1347 xmlGenericError(xmlGenericErrorContext,
1348 "xmlSaveUri: out of memory\n");
1349 xmlFree(ret);
1350 return(NULL);
1351 }
1352 ret = temp;
1353 }
1354 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1355 ret[len++] = *p++;
1356 else {
1357 int val = *(unsigned char *)p++;
1358 int hi = val / 0x10, lo = val % 0x10;
1359 ret[len++] = '%';
1360 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1361 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1362 }
1363 }
1364 }
1365 if (len >= max) {
1366 max *= 2;
1367 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1368 if (temp == NULL) {
1369 xmlGenericError(xmlGenericErrorContext,
1370 "xmlSaveUri: out of memory\n");
1371 xmlFree(ret);
1372 return(NULL);
1373 }
1374 ret = temp;
1375 }
1376 ret[len++] = 0;
1377 return(ret);
1378 }
1379
1380 /**
1381 * xmlPrintURI:
1382 * @stream: a FILE* for the output
1383 * @uri: pointer to an xmlURI
1384 *
1385 * Prints the URI in the stream @stream.
1386 */
1387 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1388 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1389 xmlChar *out;
1390
1391 out = xmlSaveUri(uri);
1392 if (out != NULL) {
1393 fprintf(stream, "%s", (char *) out);
1394 xmlFree(out);
1395 }
1396 }
1397
1398 /**
1399 * xmlCleanURI:
1400 * @uri: pointer to an xmlURI
1401 *
1402 * Make sure the xmlURI struct is free of content
1403 */
1404 static void
xmlCleanURI(xmlURIPtr uri)1405 xmlCleanURI(xmlURIPtr uri) {
1406 if (uri == NULL) return;
1407
1408 if (uri->scheme != NULL) xmlFree(uri->scheme);
1409 uri->scheme = NULL;
1410 if (uri->server != NULL) xmlFree(uri->server);
1411 uri->server = NULL;
1412 if (uri->user != NULL) xmlFree(uri->user);
1413 uri->user = NULL;
1414 if (uri->path != NULL) xmlFree(uri->path);
1415 uri->path = NULL;
1416 if (uri->fragment != NULL) xmlFree(uri->fragment);
1417 uri->fragment = NULL;
1418 if (uri->opaque != NULL) xmlFree(uri->opaque);
1419 uri->opaque = NULL;
1420 if (uri->authority != NULL) xmlFree(uri->authority);
1421 uri->authority = NULL;
1422 if (uri->query != NULL) xmlFree(uri->query);
1423 uri->query = NULL;
1424 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1425 uri->query_raw = NULL;
1426 }
1427
1428 /**
1429 * xmlFreeURI:
1430 * @uri: pointer to an xmlURI
1431 *
1432 * Free up the xmlURI struct
1433 */
1434 void
xmlFreeURI(xmlURIPtr uri)1435 xmlFreeURI(xmlURIPtr uri) {
1436 if (uri == NULL) return;
1437
1438 if (uri->scheme != NULL) xmlFree(uri->scheme);
1439 if (uri->server != NULL) xmlFree(uri->server);
1440 if (uri->user != NULL) xmlFree(uri->user);
1441 if (uri->path != NULL) xmlFree(uri->path);
1442 if (uri->fragment != NULL) xmlFree(uri->fragment);
1443 if (uri->opaque != NULL) xmlFree(uri->opaque);
1444 if (uri->authority != NULL) xmlFree(uri->authority);
1445 if (uri->query != NULL) xmlFree(uri->query);
1446 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1447 xmlFree(uri);
1448 }
1449
1450 /************************************************************************
1451 * *
1452 * Helper functions *
1453 * *
1454 ************************************************************************/
1455
1456 /**
1457 * xmlNormalizeURIPath:
1458 * @path: pointer to the path string
1459 *
1460 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1461 * Section 5.2, steps 6.c through 6.g.
1462 *
1463 * Normalization occurs directly on the string, no new allocation is done
1464 *
1465 * Returns 0 or an error code
1466 */
1467 int
xmlNormalizeURIPath(char * path)1468 xmlNormalizeURIPath(char *path) {
1469 char *cur, *out;
1470
1471 if (path == NULL)
1472 return(-1);
1473
1474 /* Skip all initial "/" chars. We want to get to the beginning of the
1475 * first non-empty segment.
1476 */
1477 cur = path;
1478 while (cur[0] == '/')
1479 ++cur;
1480 if (cur[0] == '\0')
1481 return(0);
1482
1483 /* Keep everything we've seen so far. */
1484 out = cur;
1485
1486 /*
1487 * Analyze each segment in sequence for cases (c) and (d).
1488 */
1489 while (cur[0] != '\0') {
1490 /*
1491 * c) All occurrences of "./", where "." is a complete path segment,
1492 * are removed from the buffer string.
1493 */
1494 if ((cur[0] == '.') && (cur[1] == '/')) {
1495 cur += 2;
1496 /* '//' normalization should be done at this point too */
1497 while (cur[0] == '/')
1498 cur++;
1499 continue;
1500 }
1501
1502 /*
1503 * d) If the buffer string ends with "." as a complete path segment,
1504 * that "." is removed.
1505 */
1506 if ((cur[0] == '.') && (cur[1] == '\0'))
1507 break;
1508
1509 /* Otherwise keep the segment. */
1510 while (cur[0] != '/') {
1511 if (cur[0] == '\0')
1512 goto done_cd;
1513 (out++)[0] = (cur++)[0];
1514 }
1515 /* nomalize // */
1516 while ((cur[0] == '/') && (cur[1] == '/'))
1517 cur++;
1518
1519 (out++)[0] = (cur++)[0];
1520 }
1521 done_cd:
1522 out[0] = '\0';
1523
1524 /* Reset to the beginning of the first segment for the next sequence. */
1525 cur = path;
1526 while (cur[0] == '/')
1527 ++cur;
1528 if (cur[0] == '\0')
1529 return(0);
1530
1531 /*
1532 * Analyze each segment in sequence for cases (e) and (f).
1533 *
1534 * e) All occurrences of "<segment>/../", where <segment> is a
1535 * complete path segment not equal to "..", are removed from the
1536 * buffer string. Removal of these path segments is performed
1537 * iteratively, removing the leftmost matching pattern on each
1538 * iteration, until no matching pattern remains.
1539 *
1540 * f) If the buffer string ends with "<segment>/..", where <segment>
1541 * is a complete path segment not equal to "..", that
1542 * "<segment>/.." is removed.
1543 *
1544 * To satisfy the "iterative" clause in (e), we need to collapse the
1545 * string every time we find something that needs to be removed. Thus,
1546 * we don't need to keep two pointers into the string: we only need a
1547 * "current position" pointer.
1548 */
1549 while (1) {
1550 char *segp, *tmp;
1551
1552 /* At the beginning of each iteration of this loop, "cur" points to
1553 * the first character of the segment we want to examine.
1554 */
1555
1556 /* Find the end of the current segment. */
1557 segp = cur;
1558 while ((segp[0] != '/') && (segp[0] != '\0'))
1559 ++segp;
1560
1561 /* If this is the last segment, we're done (we need at least two
1562 * segments to meet the criteria for the (e) and (f) cases).
1563 */
1564 if (segp[0] == '\0')
1565 break;
1566
1567 /* If the first segment is "..", or if the next segment _isn't_ "..",
1568 * keep this segment and try the next one.
1569 */
1570 ++segp;
1571 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1572 || ((segp[0] != '.') || (segp[1] != '.')
1573 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1574 cur = segp;
1575 continue;
1576 }
1577
1578 /* If we get here, remove this segment and the next one and back up
1579 * to the previous segment (if there is one), to implement the
1580 * "iteratively" clause. It's pretty much impossible to back up
1581 * while maintaining two pointers into the buffer, so just compact
1582 * the whole buffer now.
1583 */
1584
1585 /* If this is the end of the buffer, we're done. */
1586 if (segp[2] == '\0') {
1587 cur[0] = '\0';
1588 break;
1589 }
1590 /* Valgrind complained, strcpy(cur, segp + 3); */
1591 /* string will overlap, do not use strcpy */
1592 tmp = cur;
1593 segp += 3;
1594 while ((*tmp++ = *segp++) != 0);
1595
1596 /* If there are no previous segments, then keep going from here. */
1597 segp = cur;
1598 while ((segp > path) && ((--segp)[0] == '/'))
1599 ;
1600 if (segp == path)
1601 continue;
1602
1603 /* "segp" is pointing to the end of a previous segment; find it's
1604 * start. We need to back up to the previous segment and start
1605 * over with that to handle things like "foo/bar/../..". If we
1606 * don't do this, then on the first pass we'll remove the "bar/..",
1607 * but be pointing at the second ".." so we won't realize we can also
1608 * remove the "foo/..".
1609 */
1610 cur = segp;
1611 while ((cur > path) && (cur[-1] != '/'))
1612 --cur;
1613 }
1614 out[0] = '\0';
1615
1616 /*
1617 * g) If the resulting buffer string still begins with one or more
1618 * complete path segments of "..", then the reference is
1619 * considered to be in error. Implementations may handle this
1620 * error by retaining these components in the resolved path (i.e.,
1621 * treating them as part of the final URI), by removing them from
1622 * the resolved path (i.e., discarding relative levels above the
1623 * root), or by avoiding traversal of the reference.
1624 *
1625 * We discard them from the final path.
1626 */
1627 if (path[0] == '/') {
1628 cur = path;
1629 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1630 && ((cur[3] == '/') || (cur[3] == '\0')))
1631 cur += 3;
1632
1633 if (cur != path) {
1634 out = path;
1635 while (cur[0] != '\0')
1636 (out++)[0] = (cur++)[0];
1637 out[0] = 0;
1638 }
1639 }
1640
1641 return(0);
1642 }
1643
is_hex(char c)1644 static int is_hex(char c) {
1645 if (((c >= '0') && (c <= '9')) ||
1646 ((c >= 'a') && (c <= 'f')) ||
1647 ((c >= 'A') && (c <= 'F')))
1648 return(1);
1649 return(0);
1650 }
1651
1652 /**
1653 * xmlURIUnescapeString:
1654 * @str: the string to unescape
1655 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1656 * @target: optional destination buffer
1657 *
1658 * Unescaping routine, but does not check that the string is an URI. The
1659 * output is a direct unsigned char translation of %XX values (no encoding)
1660 * Note that the length of the result can only be smaller or same size as
1661 * the input string.
1662 *
1663 * Returns a copy of the string, but unescaped, will return NULL only in case
1664 * of error
1665 */
1666 char *
xmlURIUnescapeString(const char * str,int len,char * target)1667 xmlURIUnescapeString(const char *str, int len, char *target) {
1668 char *ret, *out;
1669 const char *in;
1670
1671 if (str == NULL)
1672 return(NULL);
1673 if (len <= 0) len = strlen(str);
1674 if (len < 0) return(NULL);
1675
1676 if (target == NULL) {
1677 ret = (char *) xmlMallocAtomic(len + 1);
1678 if (ret == NULL) {
1679 xmlGenericError(xmlGenericErrorContext,
1680 "xmlURIUnescapeString: out of memory\n");
1681 return(NULL);
1682 }
1683 } else
1684 ret = target;
1685 in = str;
1686 out = ret;
1687 while(len > 0) {
1688 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1689 in++;
1690 if ((*in >= '0') && (*in <= '9'))
1691 *out = (*in - '0');
1692 else if ((*in >= 'a') && (*in <= 'f'))
1693 *out = (*in - 'a') + 10;
1694 else if ((*in >= 'A') && (*in <= 'F'))
1695 *out = (*in - 'A') + 10;
1696 in++;
1697 if ((*in >= '0') && (*in <= '9'))
1698 *out = *out * 16 + (*in - '0');
1699 else if ((*in >= 'a') && (*in <= 'f'))
1700 *out = *out * 16 + (*in - 'a') + 10;
1701 else if ((*in >= 'A') && (*in <= 'F'))
1702 *out = *out * 16 + (*in - 'A') + 10;
1703 in++;
1704 len -= 3;
1705 out++;
1706 } else {
1707 *out++ = *in++;
1708 len--;
1709 }
1710 }
1711 *out = 0;
1712 return(ret);
1713 }
1714
1715 /**
1716 * xmlURIEscapeStr:
1717 * @str: string to escape
1718 * @list: exception list string of chars not to escape
1719 *
1720 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1721 * and the characters in the exception list.
1722 *
1723 * Returns a new escaped string or NULL in case of error.
1724 */
1725 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1726 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1727 xmlChar *ret, ch;
1728 xmlChar *temp;
1729 const xmlChar *in;
1730
1731 unsigned int len, out;
1732
1733 if (str == NULL)
1734 return(NULL);
1735 if (str[0] == 0)
1736 return(xmlStrdup(str));
1737 len = xmlStrlen(str);
1738 if (!(len > 0)) return(NULL);
1739
1740 len += 20;
1741 ret = (xmlChar *) xmlMallocAtomic(len);
1742 if (ret == NULL) {
1743 xmlGenericError(xmlGenericErrorContext,
1744 "xmlURIEscapeStr: out of memory\n");
1745 return(NULL);
1746 }
1747 in = (const xmlChar *) str;
1748 out = 0;
1749 while(*in != 0) {
1750 if (len - out <= 3) {
1751 len += 20;
1752 temp = (xmlChar *) xmlRealloc(ret, len);
1753 if (temp == NULL) {
1754 xmlGenericError(xmlGenericErrorContext,
1755 "xmlURIEscapeStr: out of memory\n");
1756 xmlFree(ret);
1757 return(NULL);
1758 }
1759 ret = temp;
1760 }
1761
1762 ch = *in;
1763
1764 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1765 unsigned char val;
1766 ret[out++] = '%';
1767 val = ch >> 4;
1768 if (val <= 9)
1769 ret[out++] = '0' + val;
1770 else
1771 ret[out++] = 'A' + val - 0xA;
1772 val = ch & 0xF;
1773 if (val <= 9)
1774 ret[out++] = '0' + val;
1775 else
1776 ret[out++] = 'A' + val - 0xA;
1777 in++;
1778 } else {
1779 ret[out++] = *in++;
1780 }
1781
1782 }
1783 ret[out] = 0;
1784 return(ret);
1785 }
1786
1787 /**
1788 * xmlURIEscape:
1789 * @str: the string of the URI to escape
1790 *
1791 * Escaping routine, does not do validity checks !
1792 * It will try to escape the chars needing this, but this is heuristic
1793 * based it's impossible to be sure.
1794 *
1795 * Returns an copy of the string, but escaped
1796 *
1797 * 25 May 2001
1798 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1799 * according to RFC2396.
1800 * - Carl Douglas
1801 */
1802 xmlChar *
xmlURIEscape(const xmlChar * str)1803 xmlURIEscape(const xmlChar * str)
1804 {
1805 xmlChar *ret, *segment = NULL;
1806 xmlURIPtr uri;
1807 int ret2;
1808
1809 #define NULLCHK(p) if(!p) { \
1810 xmlGenericError(xmlGenericErrorContext, \
1811 "xmlURIEscape: out of memory\n"); \
1812 xmlFreeURI(uri); \
1813 return NULL; } \
1814
1815 if (str == NULL)
1816 return (NULL);
1817
1818 uri = xmlCreateURI();
1819 if (uri != NULL) {
1820 /*
1821 * Allow escaping errors in the unescaped form
1822 */
1823 uri->cleanup = 1;
1824 ret2 = xmlParseURIReference(uri, (const char *)str);
1825 if (ret2) {
1826 xmlFreeURI(uri);
1827 return (NULL);
1828 }
1829 }
1830
1831 if (!uri)
1832 return NULL;
1833
1834 ret = NULL;
1835
1836 if (uri->scheme) {
1837 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1838 NULLCHK(segment)
1839 ret = xmlStrcat(ret, segment);
1840 ret = xmlStrcat(ret, BAD_CAST ":");
1841 xmlFree(segment);
1842 }
1843
1844 if (uri->authority) {
1845 segment =
1846 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1847 NULLCHK(segment)
1848 ret = xmlStrcat(ret, BAD_CAST "//");
1849 ret = xmlStrcat(ret, segment);
1850 xmlFree(segment);
1851 }
1852
1853 if (uri->user) {
1854 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1855 NULLCHK(segment)
1856 ret = xmlStrcat(ret,BAD_CAST "//");
1857 ret = xmlStrcat(ret, segment);
1858 ret = xmlStrcat(ret, BAD_CAST "@");
1859 xmlFree(segment);
1860 }
1861
1862 if (uri->server) {
1863 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1864 NULLCHK(segment)
1865 if (uri->user == NULL)
1866 ret = xmlStrcat(ret, BAD_CAST "//");
1867 ret = xmlStrcat(ret, segment);
1868 xmlFree(segment);
1869 }
1870
1871 if (uri->port) {
1872 xmlChar port[10];
1873
1874 snprintf((char *) port, 10, "%d", uri->port);
1875 ret = xmlStrcat(ret, BAD_CAST ":");
1876 ret = xmlStrcat(ret, port);
1877 }
1878
1879 if (uri->path) {
1880 segment =
1881 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1882 NULLCHK(segment)
1883 ret = xmlStrcat(ret, segment);
1884 xmlFree(segment);
1885 }
1886
1887 if (uri->query_raw) {
1888 ret = xmlStrcat(ret, BAD_CAST "?");
1889 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1890 }
1891 else if (uri->query) {
1892 segment =
1893 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1894 NULLCHK(segment)
1895 ret = xmlStrcat(ret, BAD_CAST "?");
1896 ret = xmlStrcat(ret, segment);
1897 xmlFree(segment);
1898 }
1899
1900 if (uri->opaque) {
1901 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1902 NULLCHK(segment)
1903 ret = xmlStrcat(ret, segment);
1904 xmlFree(segment);
1905 }
1906
1907 if (uri->fragment) {
1908 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1909 NULLCHK(segment)
1910 ret = xmlStrcat(ret, BAD_CAST "#");
1911 ret = xmlStrcat(ret, segment);
1912 xmlFree(segment);
1913 }
1914
1915 xmlFreeURI(uri);
1916 #undef NULLCHK
1917
1918 return (ret);
1919 }
1920
1921 /************************************************************************
1922 * *
1923 * Public functions *
1924 * *
1925 ************************************************************************/
1926
1927 /**
1928 * xmlBuildURI:
1929 * @URI: the URI instance found in the document
1930 * @base: the base value
1931 *
1932 * Computes he final URI of the reference done by checking that
1933 * the given URI is valid, and building the final URI using the
1934 * base URI. This is processed according to section 5.2 of the
1935 * RFC 2396
1936 *
1937 * 5.2. Resolving Relative References to Absolute Form
1938 *
1939 * Returns a new URI string (to be freed by the caller) or NULL in case
1940 * of error.
1941 */
1942 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1943 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1944 xmlChar *val = NULL;
1945 int ret, len, indx, cur, out;
1946 xmlURIPtr ref = NULL;
1947 xmlURIPtr bas = NULL;
1948 xmlURIPtr res = NULL;
1949
1950 /*
1951 * 1) The URI reference is parsed into the potential four components and
1952 * fragment identifier, as described in Section 4.3.
1953 *
1954 * NOTE that a completely empty URI is treated by modern browsers
1955 * as a reference to "." rather than as a synonym for the current
1956 * URI. Should we do that here?
1957 */
1958 if (URI == NULL)
1959 ret = -1;
1960 else {
1961 if (*URI) {
1962 ref = xmlCreateURI();
1963 if (ref == NULL)
1964 goto done;
1965 ret = xmlParseURIReference(ref, (const char *) URI);
1966 }
1967 else
1968 ret = 0;
1969 }
1970 if (ret != 0)
1971 goto done;
1972 if ((ref != NULL) && (ref->scheme != NULL)) {
1973 /*
1974 * The URI is absolute don't modify.
1975 */
1976 val = xmlStrdup(URI);
1977 goto done;
1978 }
1979 if (base == NULL)
1980 ret = -1;
1981 else {
1982 bas = xmlCreateURI();
1983 if (bas == NULL)
1984 goto done;
1985 ret = xmlParseURIReference(bas, (const char *) base);
1986 }
1987 if (ret != 0) {
1988 if (ref)
1989 val = xmlSaveUri(ref);
1990 goto done;
1991 }
1992 if (ref == NULL) {
1993 /*
1994 * the base fragment must be ignored
1995 */
1996 if (bas->fragment != NULL) {
1997 xmlFree(bas->fragment);
1998 bas->fragment = NULL;
1999 }
2000 val = xmlSaveUri(bas);
2001 goto done;
2002 }
2003
2004 /*
2005 * 2) If the path component is empty and the scheme, authority, and
2006 * query components are undefined, then it is a reference to the
2007 * current document and we are done. Otherwise, the reference URI's
2008 * query and fragment components are defined as found (or not found)
2009 * within the URI reference and not inherited from the base URI.
2010 *
2011 * NOTE that in modern browsers, the parsing differs from the above
2012 * in the following aspect: the query component is allowed to be
2013 * defined while still treating this as a reference to the current
2014 * document.
2015 */
2016 res = xmlCreateURI();
2017 if (res == NULL)
2018 goto done;
2019 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2020 ((ref->authority == NULL) && (ref->server == NULL))) {
2021 if (bas->scheme != NULL)
2022 res->scheme = xmlMemStrdup(bas->scheme);
2023 if (bas->authority != NULL)
2024 res->authority = xmlMemStrdup(bas->authority);
2025 else if (bas->server != NULL) {
2026 res->server = xmlMemStrdup(bas->server);
2027 if (bas->user != NULL)
2028 res->user = xmlMemStrdup(bas->user);
2029 res->port = bas->port;
2030 }
2031 if (bas->path != NULL)
2032 res->path = xmlMemStrdup(bas->path);
2033 if (ref->query_raw != NULL)
2034 res->query_raw = xmlMemStrdup (ref->query_raw);
2035 else if (ref->query != NULL)
2036 res->query = xmlMemStrdup(ref->query);
2037 else if (bas->query_raw != NULL)
2038 res->query_raw = xmlMemStrdup(bas->query_raw);
2039 else if (bas->query != NULL)
2040 res->query = xmlMemStrdup(bas->query);
2041 if (ref->fragment != NULL)
2042 res->fragment = xmlMemStrdup(ref->fragment);
2043 goto step_7;
2044 }
2045
2046 /*
2047 * 3) If the scheme component is defined, indicating that the reference
2048 * starts with a scheme name, then the reference is interpreted as an
2049 * absolute URI and we are done. Otherwise, the reference URI's
2050 * scheme is inherited from the base URI's scheme component.
2051 */
2052 if (ref->scheme != NULL) {
2053 val = xmlSaveUri(ref);
2054 goto done;
2055 }
2056 if (bas->scheme != NULL)
2057 res->scheme = xmlMemStrdup(bas->scheme);
2058
2059 if (ref->query_raw != NULL)
2060 res->query_raw = xmlMemStrdup(ref->query_raw);
2061 else if (ref->query != NULL)
2062 res->query = xmlMemStrdup(ref->query);
2063 if (ref->fragment != NULL)
2064 res->fragment = xmlMemStrdup(ref->fragment);
2065
2066 /*
2067 * 4) If the authority component is defined, then the reference is a
2068 * network-path and we skip to step 7. Otherwise, the reference
2069 * URI's authority is inherited from the base URI's authority
2070 * component, which will also be undefined if the URI scheme does not
2071 * use an authority component.
2072 */
2073 if ((ref->authority != NULL) || (ref->server != NULL)) {
2074 if (ref->authority != NULL)
2075 res->authority = xmlMemStrdup(ref->authority);
2076 else {
2077 res->server = xmlMemStrdup(ref->server);
2078 if (ref->user != NULL)
2079 res->user = xmlMemStrdup(ref->user);
2080 res->port = ref->port;
2081 }
2082 if (ref->path != NULL)
2083 res->path = xmlMemStrdup(ref->path);
2084 goto step_7;
2085 }
2086 if (bas->authority != NULL)
2087 res->authority = xmlMemStrdup(bas->authority);
2088 else if (bas->server != NULL) {
2089 res->server = xmlMemStrdup(bas->server);
2090 if (bas->user != NULL)
2091 res->user = xmlMemStrdup(bas->user);
2092 res->port = bas->port;
2093 }
2094
2095 /*
2096 * 5) If the path component begins with a slash character ("/"), then
2097 * the reference is an absolute-path and we skip to step 7.
2098 */
2099 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2100 res->path = xmlMemStrdup(ref->path);
2101 goto step_7;
2102 }
2103
2104
2105 /*
2106 * 6) If this step is reached, then we are resolving a relative-path
2107 * reference. The relative path needs to be merged with the base
2108 * URI's path. Although there are many ways to do this, we will
2109 * describe a simple method using a separate string buffer.
2110 *
2111 * Allocate a buffer large enough for the result string.
2112 */
2113 len = 2; /* extra / and 0 */
2114 if (ref->path != NULL)
2115 len += strlen(ref->path);
2116 if (bas->path != NULL)
2117 len += strlen(bas->path);
2118 res->path = (char *) xmlMallocAtomic(len);
2119 if (res->path == NULL) {
2120 xmlGenericError(xmlGenericErrorContext,
2121 "xmlBuildURI: out of memory\n");
2122 goto done;
2123 }
2124 res->path[0] = 0;
2125
2126 /*
2127 * a) All but the last segment of the base URI's path component is
2128 * copied to the buffer. In other words, any characters after the
2129 * last (right-most) slash character, if any, are excluded.
2130 */
2131 cur = 0;
2132 out = 0;
2133 if (bas->path != NULL) {
2134 while (bas->path[cur] != 0) {
2135 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2136 cur++;
2137 if (bas->path[cur] == 0)
2138 break;
2139
2140 cur++;
2141 while (out < cur) {
2142 res->path[out] = bas->path[out];
2143 out++;
2144 }
2145 }
2146 }
2147 res->path[out] = 0;
2148
2149 /*
2150 * b) The reference's path component is appended to the buffer
2151 * string.
2152 */
2153 if (ref->path != NULL && ref->path[0] != 0) {
2154 indx = 0;
2155 /*
2156 * Ensure the path includes a '/'
2157 */
2158 if ((out == 0) && (bas->server != NULL))
2159 res->path[out++] = '/';
2160 while (ref->path[indx] != 0) {
2161 res->path[out++] = ref->path[indx++];
2162 }
2163 }
2164 res->path[out] = 0;
2165
2166 /*
2167 * Steps c) to h) are really path normalization steps
2168 */
2169 xmlNormalizeURIPath(res->path);
2170
2171 step_7:
2172
2173 /*
2174 * 7) The resulting URI components, including any inherited from the
2175 * base URI, are recombined to give the absolute form of the URI
2176 * reference.
2177 */
2178 val = xmlSaveUri(res);
2179
2180 done:
2181 if (ref != NULL)
2182 xmlFreeURI(ref);
2183 if (bas != NULL)
2184 xmlFreeURI(bas);
2185 if (res != NULL)
2186 xmlFreeURI(res);
2187 return(val);
2188 }
2189
2190 /**
2191 * xmlBuildRelativeURI:
2192 * @URI: the URI reference under consideration
2193 * @base: the base value
2194 *
2195 * Expresses the URI of the reference in terms relative to the
2196 * base. Some examples of this operation include:
2197 * base = "http://site1.com/docs/book1.html"
2198 * URI input URI returned
2199 * docs/pic1.gif pic1.gif
2200 * docs/img/pic1.gif img/pic1.gif
2201 * img/pic1.gif ../img/pic1.gif
2202 * http://site1.com/docs/pic1.gif pic1.gif
2203 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2204 *
2205 * base = "docs/book1.html"
2206 * URI input URI returned
2207 * docs/pic1.gif pic1.gif
2208 * docs/img/pic1.gif img/pic1.gif
2209 * img/pic1.gif ../img/pic1.gif
2210 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2211 *
2212 *
2213 * Note: if the URI reference is really wierd or complicated, it may be
2214 * worthwhile to first convert it into a "nice" one by calling
2215 * xmlBuildURI (using 'base') before calling this routine,
2216 * since this routine (for reasonable efficiency) assumes URI has
2217 * already been through some validation.
2218 *
2219 * Returns a new URI string (to be freed by the caller) or NULL in case
2220 * error.
2221 */
2222 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2223 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2224 {
2225 xmlChar *val = NULL;
2226 int ret;
2227 int ix;
2228 int pos = 0;
2229 int nbslash = 0;
2230 int len;
2231 xmlURIPtr ref = NULL;
2232 xmlURIPtr bas = NULL;
2233 xmlChar *bptr, *uptr, *vptr;
2234 int remove_path = 0;
2235
2236 if ((URI == NULL) || (*URI == 0))
2237 return NULL;
2238
2239 /*
2240 * First parse URI into a standard form
2241 */
2242 ref = xmlCreateURI ();
2243 if (ref == NULL)
2244 return NULL;
2245 /* If URI not already in "relative" form */
2246 if (URI[0] != '.') {
2247 ret = xmlParseURIReference (ref, (const char *) URI);
2248 if (ret != 0)
2249 goto done; /* Error in URI, return NULL */
2250 } else
2251 ref->path = (char *)xmlStrdup(URI);
2252
2253 /*
2254 * Next parse base into the same standard form
2255 */
2256 if ((base == NULL) || (*base == 0)) {
2257 val = xmlStrdup (URI);
2258 goto done;
2259 }
2260 bas = xmlCreateURI ();
2261 if (bas == NULL)
2262 goto done;
2263 if (base[0] != '.') {
2264 ret = xmlParseURIReference (bas, (const char *) base);
2265 if (ret != 0)
2266 goto done; /* Error in base, return NULL */
2267 } else
2268 bas->path = (char *)xmlStrdup(base);
2269
2270 /*
2271 * If the scheme / server on the URI differs from the base,
2272 * just return the URI
2273 */
2274 if ((ref->scheme != NULL) &&
2275 ((bas->scheme == NULL) ||
2276 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2277 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2278 val = xmlStrdup (URI);
2279 goto done;
2280 }
2281 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2282 val = xmlStrdup(BAD_CAST "");
2283 goto done;
2284 }
2285 if (bas->path == NULL) {
2286 val = xmlStrdup((xmlChar *)ref->path);
2287 goto done;
2288 }
2289 if (ref->path == NULL) {
2290 ref->path = (char *) "/";
2291 remove_path = 1;
2292 }
2293
2294 /*
2295 * At this point (at last!) we can compare the two paths
2296 *
2297 * First we take care of the special case where either of the
2298 * two path components may be missing (bug 316224)
2299 */
2300 if (bas->path == NULL) {
2301 if (ref->path != NULL) {
2302 uptr = (xmlChar *) ref->path;
2303 if (*uptr == '/')
2304 uptr++;
2305 /* exception characters from xmlSaveUri */
2306 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307 }
2308 goto done;
2309 }
2310 bptr = (xmlChar *)bas->path;
2311 if (ref->path == NULL) {
2312 for (ix = 0; bptr[ix] != 0; ix++) {
2313 if (bptr[ix] == '/')
2314 nbslash++;
2315 }
2316 uptr = NULL;
2317 len = 1; /* this is for a string terminator only */
2318 } else {
2319 /*
2320 * Next we compare the two strings and find where they first differ
2321 */
2322 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2323 pos += 2;
2324 if ((*bptr == '.') && (bptr[1] == '/'))
2325 bptr += 2;
2326 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2327 bptr++;
2328 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2329 pos++;
2330
2331 if (bptr[pos] == ref->path[pos]) {
2332 val = xmlStrdup(BAD_CAST "");
2333 goto done; /* (I can't imagine why anyone would do this) */
2334 }
2335
2336 /*
2337 * In URI, "back up" to the last '/' encountered. This will be the
2338 * beginning of the "unique" suffix of URI
2339 */
2340 ix = pos;
2341 if ((ref->path[ix] == '/') && (ix > 0))
2342 ix--;
2343 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2344 ix -= 2;
2345 for (; ix > 0; ix--) {
2346 if (ref->path[ix] == '/')
2347 break;
2348 }
2349 if (ix == 0) {
2350 uptr = (xmlChar *)ref->path;
2351 } else {
2352 ix++;
2353 uptr = (xmlChar *)&ref->path[ix];
2354 }
2355
2356 /*
2357 * In base, count the number of '/' from the differing point
2358 */
2359 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2360 for (; bptr[ix] != 0; ix++) {
2361 if (bptr[ix] == '/')
2362 nbslash++;
2363 }
2364 }
2365 len = xmlStrlen (uptr) + 1;
2366 }
2367
2368 if (nbslash == 0) {
2369 if (uptr != NULL)
2370 /* exception characters from xmlSaveUri */
2371 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2372 goto done;
2373 }
2374
2375 /*
2376 * Allocate just enough space for the returned string -
2377 * length of the remainder of the URI, plus enough space
2378 * for the "../" groups, plus one for the terminator
2379 */
2380 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2381 if (val == NULL) {
2382 xmlGenericError(xmlGenericErrorContext,
2383 "xmlBuildRelativeURI: out of memory\n");
2384 goto done;
2385 }
2386 vptr = val;
2387 /*
2388 * Put in as many "../" as needed
2389 */
2390 for (; nbslash>0; nbslash--) {
2391 *vptr++ = '.';
2392 *vptr++ = '.';
2393 *vptr++ = '/';
2394 }
2395 /*
2396 * Finish up with the end of the URI
2397 */
2398 if (uptr != NULL) {
2399 if ((vptr > val) && (len > 0) &&
2400 (uptr[0] == '/') && (vptr[-1] == '/')) {
2401 memcpy (vptr, uptr + 1, len - 1);
2402 vptr[len - 2] = 0;
2403 } else {
2404 memcpy (vptr, uptr, len);
2405 vptr[len - 1] = 0;
2406 }
2407 } else {
2408 vptr[len - 1] = 0;
2409 }
2410
2411 /* escape the freshly-built path */
2412 vptr = val;
2413 /* exception characters from xmlSaveUri */
2414 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2415 xmlFree(vptr);
2416
2417 done:
2418 /*
2419 * Free the working variables
2420 */
2421 if (remove_path != 0)
2422 ref->path = NULL;
2423 if (ref != NULL)
2424 xmlFreeURI (ref);
2425 if (bas != NULL)
2426 xmlFreeURI (bas);
2427
2428 return val;
2429 }
2430
2431 /**
2432 * xmlCanonicPath:
2433 * @path: the resource locator in a filesystem notation
2434 *
2435 * Constructs a canonic path from the specified path.
2436 *
2437 * Returns a new canonic path, or a duplicate of the path parameter if the
2438 * construction fails. The caller is responsible for freeing the memory occupied
2439 * by the returned string. If there is insufficient memory available, or the
2440 * argument is NULL, the function returns NULL.
2441 */
2442 #define IS_WINDOWS_PATH(p) \
2443 ((p != NULL) && \
2444 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2445 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2446 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2447 xmlChar *
xmlCanonicPath(const xmlChar * path)2448 xmlCanonicPath(const xmlChar *path)
2449 {
2450 /*
2451 * For Windows implementations, additional work needs to be done to
2452 * replace backslashes in pathnames with "forward slashes"
2453 */
2454 #if defined(_WIN32) && !defined(__CYGWIN__)
2455 int len = 0;
2456 int i = 0;
2457 xmlChar *p = NULL;
2458 #endif
2459 xmlURIPtr uri;
2460 xmlChar *ret;
2461 const xmlChar *absuri;
2462
2463 if (path == NULL)
2464 return(NULL);
2465
2466 /* sanitize filename starting with // so it can be used as URI */
2467 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2468 path++;
2469
2470 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2471 xmlFreeURI(uri);
2472 return xmlStrdup(path);
2473 }
2474
2475 /* Check if this is an "absolute uri" */
2476 absuri = xmlStrstr(path, BAD_CAST "://");
2477 if (absuri != NULL) {
2478 int l, j;
2479 unsigned char c;
2480 xmlChar *escURI;
2481
2482 /*
2483 * this looks like an URI where some parts have not been
2484 * escaped leading to a parsing problem. Check that the first
2485 * part matches a protocol.
2486 */
2487 l = absuri - path;
2488 /* Bypass if first part (part before the '://') is > 20 chars */
2489 if ((l <= 0) || (l > 20))
2490 goto path_processing;
2491 /* Bypass if any non-alpha characters are present in first part */
2492 for (j = 0;j < l;j++) {
2493 c = path[j];
2494 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2495 goto path_processing;
2496 }
2497
2498 /* Escape all except the characters specified in the supplied path */
2499 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2500 if (escURI != NULL) {
2501 /* Try parsing the escaped path */
2502 uri = xmlParseURI((const char *) escURI);
2503 /* If successful, return the escaped string */
2504 if (uri != NULL) {
2505 xmlFreeURI(uri);
2506 return escURI;
2507 }
2508 }
2509 }
2510
2511 path_processing:
2512 /* For Windows implementations, replace backslashes with 'forward slashes' */
2513 #if defined(_WIN32) && !defined(__CYGWIN__)
2514 /*
2515 * Create a URI structure
2516 */
2517 uri = xmlCreateURI();
2518 if (uri == NULL) { /* Guard against 'out of memory' */
2519 return(NULL);
2520 }
2521
2522 len = xmlStrlen(path);
2523 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2524 /* make the scheme 'file' */
2525 uri->scheme = xmlStrdup(BAD_CAST "file");
2526 /* allocate space for leading '/' + path + string terminator */
2527 uri->path = xmlMallocAtomic(len + 2);
2528 if (uri->path == NULL) {
2529 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2530 return(NULL);
2531 }
2532 /* Put in leading '/' plus path */
2533 uri->path[0] = '/';
2534 p = uri->path + 1;
2535 strncpy(p, path, len + 1);
2536 } else {
2537 uri->path = xmlStrdup(path);
2538 if (uri->path == NULL) {
2539 xmlFreeURI(uri);
2540 return(NULL);
2541 }
2542 p = uri->path;
2543 }
2544 /* Now change all occurences of '\' to '/' */
2545 while (*p != '\0') {
2546 if (*p == '\\')
2547 *p = '/';
2548 p++;
2549 }
2550
2551 if (uri->scheme == NULL) {
2552 ret = xmlStrdup((const xmlChar *) uri->path);
2553 } else {
2554 ret = xmlSaveUri(uri);
2555 }
2556
2557 xmlFreeURI(uri);
2558 #else
2559 ret = xmlStrdup((const xmlChar *) path);
2560 #endif
2561 return(ret);
2562 }
2563
2564 /**
2565 * xmlPathToURI:
2566 * @path: the resource locator in a filesystem notation
2567 *
2568 * Constructs an URI expressing the existing path
2569 *
2570 * Returns a new URI, or a duplicate of the path parameter if the
2571 * construction fails. The caller is responsible for freeing the memory
2572 * occupied by the returned string. If there is insufficient memory available,
2573 * or the argument is NULL, the function returns NULL.
2574 */
2575 xmlChar *
xmlPathToURI(const xmlChar * path)2576 xmlPathToURI(const xmlChar *path)
2577 {
2578 xmlURIPtr uri;
2579 xmlURI temp;
2580 xmlChar *ret, *cal;
2581
2582 if (path == NULL)
2583 return(NULL);
2584
2585 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2586 xmlFreeURI(uri);
2587 return xmlStrdup(path);
2588 }
2589 cal = xmlCanonicPath(path);
2590 if (cal == NULL)
2591 return(NULL);
2592 #if defined(_WIN32) && !defined(__CYGWIN__)
2593 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2594 If 'cal' is a valid URI allready then we are done here, as continuing would make
2595 it invalid. */
2596 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2597 xmlFreeURI(uri);
2598 return cal;
2599 }
2600 /* 'cal' can contain a relative path with backslashes. If that is processed
2601 by xmlSaveURI, they will be escaped and the external entity loader machinery
2602 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2603 ret = cal;
2604 while (*ret != '\0') {
2605 if (*ret == '\\')
2606 *ret = '/';
2607 ret++;
2608 }
2609 #endif
2610 memset(&temp, 0, sizeof(temp));
2611 temp.path = (char *) cal;
2612 ret = xmlSaveUri(&temp);
2613 xmlFree(cal);
2614 return(ret);
2615 }
2616 #define bottom_uri
2617 #include "elfgcchack.h"
2618