1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37
38 /* The last 3 #include files should be in this order */
39 #include "curl_printf.h"
40 #include "curl_memory.h"
41 #include "memdebug.h"
42
43 /* MS-DOS/Windows style drive prefix, eg c: in c:foo */
44 #define STARTS_WITH_DRIVE_PREFIX(str) \
45 ((('a' <= str[0] && str[0] <= 'z') || \
46 ('A' <= str[0] && str[0] <= 'Z')) && \
47 (str[1] == ':'))
48
49 /* MS-DOS/Windows style drive prefix, optionally with
50 * a '|' instead of ':', followed by a slash or NUL */
51 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
53 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54 ((str)[1] == ':' || (str)[1] == '|') && \
55 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
56
57 /* scheme is not URL encoded, the longest libcurl supported ones are... */
58 #define MAX_SCHEME_LEN 40
59
60 /*
61 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62 * sure we have _some_ value for AF_INET6 without polluting our fake value
63 * everywhere.
64 */
65 #if !defined(USE_IPV6) && !defined(AF_INET6)
66 #define AF_INET6 (AF_INET + 1)
67 #endif
68
69 /* Internal representation of CURLU. Point to URL-encoded strings. */
70 struct Curl_URL {
71 char *scheme;
72 char *user;
73 char *password;
74 char *options; /* IMAP only? */
75 char *host;
76 char *zoneid; /* for numerical IPv6 addresses */
77 char *port;
78 char *path;
79 char *query;
80 char *fragment;
81 unsigned short portnum; /* the numerical version (if 'port' is set) */
82 BIT(query_present); /* to support blank */
83 BIT(fragment_present); /* to support blank */
84 BIT(guessed_scheme); /* when a URL without scheme is parsed */
85 };
86
87 #define DEFAULT_SCHEME "https"
88
89 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
90 unsigned int flags);
91
free_urlhandle(struct Curl_URL * u)92 static void free_urlhandle(struct Curl_URL *u)
93 {
94 free(u->scheme);
95 free(u->user);
96 free(u->password);
97 free(u->options);
98 free(u->host);
99 free(u->zoneid);
100 free(u->port);
101 free(u->path);
102 free(u->query);
103 free(u->fragment);
104 }
105
106 /*
107 * Find the separator at the end of the hostname, or the '?' in cases like
108 * http://www.example.com?id=2380
109 */
find_host_sep(const char * url)110 static const char *find_host_sep(const char *url)
111 {
112 const char *sep;
113 const char *query;
114
115 /* Find the start of the hostname */
116 sep = strstr(url, "//");
117 if(!sep)
118 sep = url;
119 else
120 sep += 2;
121
122 query = strchr(sep, '?');
123 sep = strchr(sep, '/');
124
125 if(!sep)
126 sep = url + strlen(url);
127
128 if(!query)
129 query = url + strlen(url);
130
131 return sep < query ? sep : query;
132 }
133
134 /* convert CURLcode to CURLUcode */
135 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
136 CURLUE_OUT_OF_MEMORY)
137 /*
138 * Decide whether a character in a URL must be escaped.
139 */
140 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
141
142 static const char hexdigits[] = "0123456789abcdef";
143 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
144 * spaces in the source URL accordingly.
145 *
146 * URL encoding should be skipped for hostnames, otherwise IDN resolution
147 * will fail.
148 */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)149 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
150 size_t len, bool relative,
151 bool query)
152 {
153 /* we must add this with whitespace-replacing */
154 bool left = !query;
155 const unsigned char *iptr;
156 const unsigned char *host_sep = (const unsigned char *) url;
157 CURLcode result;
158
159 if(!relative)
160 host_sep = (const unsigned char *) find_host_sep(url);
161
162 for(iptr = (unsigned char *)url; /* read from here */
163 len; iptr++, len--) {
164
165 if(iptr < host_sep) {
166 result = Curl_dyn_addn(o, iptr, 1);
167 if(result)
168 return cc2cu(result);
169 continue;
170 }
171
172 if(*iptr == ' ') {
173 if(left)
174 result = Curl_dyn_addn(o, "%20", 3);
175 else
176 result = Curl_dyn_addn(o, "+", 1);
177 if(result)
178 return cc2cu(result);
179 continue;
180 }
181
182 if(*iptr == '?')
183 left = FALSE;
184
185 if(urlchar_needs_escaping(*iptr)) {
186 char out[3]={'%'};
187 out[1] = hexdigits[*iptr >> 4];
188 out[2] = hexdigits[*iptr & 0xf];
189 result = Curl_dyn_addn(o, out, 3);
190 }
191 else
192 result = Curl_dyn_addn(o, iptr, 1);
193 if(result)
194 return cc2cu(result);
195 }
196
197 return CURLUE_OK;
198 }
199
200 /*
201 * Returns the length of the scheme if the given URL is absolute (as opposed
202 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
203 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
204 *
205 * If 'guess_scheme' is TRUE, it means the URL might be provided without
206 * scheme.
207 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)208 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
209 bool guess_scheme)
210 {
211 size_t i = 0;
212 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
213 (void)buflen; /* only used in debug-builds */
214 if(buf)
215 buf[0] = 0; /* always leave a defined value in buf */
216 #ifdef _WIN32
217 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
218 return 0;
219 #endif
220 if(ISALPHA(url[0]))
221 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
222 char s = url[i];
223 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
224 /* RFC 3986 3.1 explains:
225 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
226 */
227 }
228 else {
229 break;
230 }
231 }
232 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
233 /* If this does not guess scheme, the scheme always ends with the colon so
234 that this also detects data: URLs etc. In guessing mode, data: could
235 be the hostname "data" with a specified port number. */
236
237 /* the length of the scheme is the name part only */
238 size_t len = i;
239 if(buf) {
240 Curl_strntolower(buf, url, i);
241 buf[i] = 0;
242 }
243 return len;
244 }
245 return 0;
246 }
247
248 /*
249 * Concatenate a relative URL to a base URL making it absolute.
250 *
251 * Note that this function destroys the 'base' string.
252 */
redirect_url(char * base,const char * relurl,CURLU * u,unsigned int flags)253 static CURLUcode redirect_url(char *base, const char *relurl,
254 CURLU *u, unsigned int flags)
255 {
256 struct dynbuf urlbuf;
257 bool host_changed = FALSE;
258 const char *useurl = relurl;
259 CURLcode result = CURLE_OK;
260 CURLUcode uc;
261 /* protsep points to the start of the hostname */
262 char *protsep = strstr(base, "//");
263 DEBUGASSERT(protsep);
264 if(!protsep)
265 protsep = base;
266 else
267 protsep += 2; /* pass the slashes */
268
269 if(('/' != relurl[0]) && ('#' != relurl[0])) {
270 /* First we need to find out if there is a ?-letter in the original URL,
271 and cut it and the right-side of that off */
272 char *pathsep = strchr(protsep, '?');
273 if(pathsep)
274 *pathsep = 0;
275 else {
276 /* if not, cut off the potential fragment */
277 pathsep = strchr(protsep, '#');
278 if(pathsep)
279 *pathsep = 0;
280 }
281
282 /* if the redirect-to piece is not just a query, cut the path after the
283 last slash */
284 if(useurl[0] != '?') {
285 pathsep = strrchr(protsep, '/');
286 if(pathsep)
287 pathsep[1] = 0; /* leave the slash */
288 }
289 }
290 else if('/' == relurl[0]) {
291 /* We got a new absolute path for this server */
292
293 if(relurl[1] == '/') {
294 /* the new URL starts with //, just keep the protocol part from the
295 original one */
296 *protsep = 0;
297 useurl = &relurl[2]; /* we keep the slashes from the original, so we
298 skip the new ones */
299 host_changed = TRUE;
300 }
301 else {
302 /* cut the original URL at first slash */
303 char *pathsep = strchr(protsep, '/');
304 if(pathsep)
305 *pathsep = 0;
306 }
307 }
308 else {
309 /* the relative piece starts with '#' */
310
311 /* If there is a fragment in the original URL, cut it off */
312 char *pathsep = strchr(protsep, '#');
313 if(pathsep)
314 *pathsep = 0;
315 }
316
317 Curl_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
318
319 /* copy over the root URL part */
320 result = Curl_dyn_add(&urlbuf, base);
321 if(result)
322 return cc2cu(result);
323
324 /* then append the new piece on the right side */
325 uc = urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed,
326 FALSE);
327 if(!uc)
328 uc = parseurl_and_replace(Curl_dyn_ptr(&urlbuf), u,
329 flags&~CURLU_PATH_AS_IS);
330 Curl_dyn_free(&urlbuf);
331 return uc;
332 }
333
334 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)335 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
336 {
337 static const char badbytes[]={
338 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
339 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
340 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
341 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
342 0x7f, 0x00 /* null-terminate */
343 };
344 size_t n = strlen(url);
345 size_t nfine;
346
347 if(n > CURL_MAX_INPUT_LENGTH)
348 /* excessive input length */
349 return CURLUE_MALFORMED_INPUT;
350
351 nfine = strcspn(url, badbytes);
352 if((nfine != n) ||
353 (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
354 return CURLUE_MALFORMED_INPUT;
355
356 *urllen = n;
357 return CURLUE_OK;
358 }
359
360 /*
361 * parse_hostname_login()
362 *
363 * Parse the login details (username, password and options) from the URL and
364 * strip them out of the hostname
365 *
366 */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)367 static CURLUcode parse_hostname_login(struct Curl_URL *u,
368 const char *login,
369 size_t len,
370 unsigned int flags,
371 size_t *offset) /* to the hostname */
372 {
373 CURLUcode result = CURLUE_OK;
374 CURLcode ccode;
375 char *userp = NULL;
376 char *passwdp = NULL;
377 char *optionsp = NULL;
378 const struct Curl_handler *h = NULL;
379
380 /* At this point, we assume all the other special cases have been taken
381 * care of, so the host is at most
382 *
383 * [user[:password][;options]]@]hostname
384 *
385 * We need somewhere to put the embedded details, so do that first.
386 */
387 char *ptr;
388
389 DEBUGASSERT(login);
390
391 *offset = 0;
392 ptr = memchr(login, '@', len);
393 if(!ptr)
394 goto out;
395
396 /* We will now try to extract the
397 * possible login information in a string like:
398 * ftp://user:password@ftp.my.site:8021/README */
399 ptr++;
400
401 /* if this is a known scheme, get some details */
402 if(u->scheme)
403 h = Curl_get_scheme_handler(u->scheme);
404
405 /* We could use the login information in the URL so extract it. Only parse
406 options if the handler says we should. Note that 'h' might be NULL! */
407 ccode = Curl_parse_login_details(login, ptr - login - 1,
408 &userp, &passwdp,
409 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
410 &optionsp : NULL);
411 if(ccode) {
412 result = CURLUE_BAD_LOGIN;
413 goto out;
414 }
415
416 if(userp) {
417 if(flags & CURLU_DISALLOW_USER) {
418 /* Option DISALLOW_USER is set and URL contains username. */
419 result = CURLUE_USER_NOT_ALLOWED;
420 goto out;
421 }
422 free(u->user);
423 u->user = userp;
424 }
425
426 if(passwdp) {
427 free(u->password);
428 u->password = passwdp;
429 }
430
431 if(optionsp) {
432 free(u->options);
433 u->options = optionsp;
434 }
435
436 /* the hostname starts at this offset */
437 *offset = ptr - login;
438 return CURLUE_OK;
439
440 out:
441
442 free(userp);
443 free(passwdp);
444 free(optionsp);
445 u->user = NULL;
446 u->password = NULL;
447 u->options = NULL;
448
449 return result;
450 }
451
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)452 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
453 bool has_scheme)
454 {
455 char *portptr;
456 char *hostname = Curl_dyn_ptr(host);
457 /*
458 * Find the end of an IPv6 address on the ']' ending bracket.
459 */
460 if(hostname[0] == '[') {
461 portptr = strchr(hostname, ']');
462 if(!portptr)
463 return CURLUE_BAD_IPV6;
464 portptr++;
465 /* this is a RFC2732-style specified IP-address */
466 if(*portptr) {
467 if(*portptr != ':')
468 return CURLUE_BAD_PORT_NUMBER;
469 }
470 else
471 portptr = NULL;
472 }
473 else
474 portptr = strchr(hostname, ':');
475
476 if(portptr) {
477 char *rest = NULL;
478 unsigned long port;
479 size_t keep = portptr - hostname;
480
481 /* Browser behavior adaptation. If there is a colon with no digits after,
482 just cut off the name there which makes us ignore the colon and just
483 use the default port. Firefox, Chrome and Safari all do that.
484
485 Do not do it if the URL has no scheme, to make something that looks like
486 a scheme not work!
487 */
488 Curl_dyn_setlen(host, keep);
489 portptr++;
490 if(!*portptr)
491 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
492
493 if(!ISDIGIT(*portptr))
494 return CURLUE_BAD_PORT_NUMBER;
495
496 errno = 0;
497 port = strtoul(portptr, &rest, 10); /* Port number must be decimal */
498
499 if(errno || (port > 0xffff) || *rest)
500 return CURLUE_BAD_PORT_NUMBER;
501
502 u->portnum = (unsigned short) port;
503 /* generate a new port number string to get rid of leading zeroes etc */
504 free(u->port);
505 u->port = aprintf("%ld", port);
506 if(!u->port)
507 return CURLUE_OUT_OF_MEMORY;
508 }
509
510 return CURLUE_OK;
511 }
512
513 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)514 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
515 size_t hlen) /* length of hostname */
516 {
517 size_t len;
518 DEBUGASSERT(*hostname == '[');
519 if(hlen < 4) /* '[::]' is the shortest possible valid string */
520 return CURLUE_BAD_IPV6;
521 hostname++;
522 hlen -= 2;
523
524 /* only valid IPv6 letters are ok */
525 len = strspn(hostname, "0123456789abcdefABCDEF:.");
526
527 if(hlen != len) {
528 hlen = len;
529 if(hostname[len] == '%') {
530 /* this could now be '%[zone id]' */
531 char zoneid[16];
532 int i = 0;
533 char *h = &hostname[len + 1];
534 /* pass '25' if present and is a URL encoded percent sign */
535 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
536 h += 2;
537 while(*h && (*h != ']') && (i < 15))
538 zoneid[i++] = *h++;
539 if(!i || (']' != *h))
540 return CURLUE_BAD_IPV6;
541 zoneid[i] = 0;
542 u->zoneid = strdup(zoneid);
543 if(!u->zoneid)
544 return CURLUE_OUT_OF_MEMORY;
545 hostname[len] = ']'; /* insert end bracket */
546 hostname[len + 1] = 0; /* terminate the hostname */
547 }
548 else
549 return CURLUE_BAD_IPV6;
550 /* hostname is fine */
551 }
552
553 /* Normalize the IPv6 address */
554 {
555 char dest[16]; /* fits a binary IPv6 address */
556 hostname[hlen] = 0; /* end the address there */
557 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
558 return CURLUE_BAD_IPV6;
559 if(Curl_inet_ntop(AF_INET6, dest, hostname, hlen)) {
560 hlen = strlen(hostname); /* might be shorter now */
561 hostname[hlen + 1] = 0;
562 }
563 hostname[hlen] = ']'; /* restore ending bracket */
564 }
565 return CURLUE_OK;
566 }
567
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)568 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
569 size_t hlen) /* length of hostname */
570 {
571 size_t len;
572 DEBUGASSERT(hostname);
573
574 if(!hlen)
575 return CURLUE_NO_HOST;
576 else if(hostname[0] == '[')
577 return ipv6_parse(u, hostname, hlen);
578 else {
579 /* letters from the second string are not ok */
580 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
581 if(hlen != len)
582 /* hostname with bad content */
583 return CURLUE_BAD_HOSTNAME;
584 }
585 return CURLUE_OK;
586 }
587
588 /*
589 * Handle partial IPv4 numerical addresses and different bases, like
590 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
591 *
592 * If the given input string is syntactically wrong IPv4 or any part for
593 * example is too big, this function returns HOST_NAME.
594 *
595 * Output the "normalized" version of that input string in plain quad decimal
596 * integers.
597 *
598 * Returns the host type.
599 */
600
601 #define HOST_ERROR -1 /* out of memory */
602
603 #define HOST_NAME 1
604 #define HOST_IPV4 2
605 #define HOST_IPV6 3
606
ipv4_normalize(struct dynbuf * host)607 static int ipv4_normalize(struct dynbuf *host)
608 {
609 bool done = FALSE;
610 int n = 0;
611 const char *c = Curl_dyn_ptr(host);
612 unsigned long parts[4] = {0, 0, 0, 0};
613 CURLcode result = CURLE_OK;
614
615 if(*c == '[')
616 return HOST_IPV6;
617
618 errno = 0; /* for strtoul */
619 while(!done) {
620 char *endp = NULL;
621 unsigned long l;
622 if(!ISDIGIT(*c))
623 /* most importantly this does not allow a leading plus or minus */
624 return HOST_NAME;
625 l = strtoul(c, &endp, 0);
626 if(errno)
627 return HOST_NAME;
628 #if SIZEOF_LONG > 4
629 /* a value larger than 32 bits */
630 if(l > UINT_MAX)
631 return HOST_NAME;
632 #endif
633
634 parts[n] = l;
635 c = endp;
636
637 switch(*c) {
638 case '.':
639 if(n == 3)
640 return HOST_NAME;
641 n++;
642 c++;
643 break;
644
645 case '\0':
646 done = TRUE;
647 break;
648
649 default:
650 return HOST_NAME;
651 }
652 }
653
654 switch(n) {
655 case 0: /* a -- 32 bits */
656 Curl_dyn_reset(host);
657
658 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
659 (unsigned int)(parts[0] >> 24),
660 (unsigned int)((parts[0] >> 16) & 0xff),
661 (unsigned int)((parts[0] >> 8) & 0xff),
662 (unsigned int)(parts[0] & 0xff));
663 break;
664 case 1: /* a.b -- 8.24 bits */
665 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
666 return HOST_NAME;
667 Curl_dyn_reset(host);
668 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
669 (unsigned int)(parts[0]),
670 (unsigned int)((parts[1] >> 16) & 0xff),
671 (unsigned int)((parts[1] >> 8) & 0xff),
672 (unsigned int)(parts[1] & 0xff));
673 break;
674 case 2: /* a.b.c -- 8.8.16 bits */
675 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
676 return HOST_NAME;
677 Curl_dyn_reset(host);
678 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
679 (unsigned int)(parts[0]),
680 (unsigned int)(parts[1]),
681 (unsigned int)((parts[2] >> 8) & 0xff),
682 (unsigned int)(parts[2] & 0xff));
683 break;
684 case 3: /* a.b.c.d -- 8.8.8.8 bits */
685 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
686 (parts[3] > 0xff))
687 return HOST_NAME;
688 Curl_dyn_reset(host);
689 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
690 (unsigned int)(parts[0]),
691 (unsigned int)(parts[1]),
692 (unsigned int)(parts[2]),
693 (unsigned int)(parts[3]));
694 break;
695 }
696 if(result)
697 return HOST_ERROR;
698 return HOST_IPV4;
699 }
700
701 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)702 static CURLUcode urldecode_host(struct dynbuf *host)
703 {
704 char *per = NULL;
705 const char *hostname = Curl_dyn_ptr(host);
706 per = strchr(hostname, '%');
707 if(!per)
708 /* nothing to decode */
709 return CURLUE_OK;
710 else {
711 /* encoded */
712 size_t dlen;
713 char *decoded;
714 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
715 REJECT_CTRL);
716 if(result)
717 return CURLUE_BAD_HOSTNAME;
718 Curl_dyn_reset(host);
719 result = Curl_dyn_addn(host, decoded, dlen);
720 free(decoded);
721 if(result)
722 return cc2cu(result);
723 }
724
725 return CURLUE_OK;
726 }
727
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)728 static CURLUcode parse_authority(struct Curl_URL *u,
729 const char *auth, size_t authlen,
730 unsigned int flags,
731 struct dynbuf *host,
732 bool has_scheme)
733 {
734 size_t offset;
735 CURLUcode uc;
736 CURLcode result;
737
738 /*
739 * Parse the login details and strip them out of the hostname.
740 */
741 uc = parse_hostname_login(u, auth, authlen, flags, &offset);
742 if(uc)
743 goto out;
744
745 result = Curl_dyn_addn(host, auth + offset, authlen - offset);
746 if(result) {
747 uc = cc2cu(result);
748 goto out;
749 }
750
751 uc = Curl_parse_port(u, host, has_scheme);
752 if(uc)
753 goto out;
754
755 if(!Curl_dyn_len(host))
756 return CURLUE_NO_HOST;
757
758 switch(ipv4_normalize(host)) {
759 case HOST_IPV4:
760 break;
761 case HOST_IPV6:
762 uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
763 break;
764 case HOST_NAME:
765 uc = urldecode_host(host);
766 if(!uc)
767 uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
768 break;
769 case HOST_ERROR:
770 uc = CURLUE_OUT_OF_MEMORY;
771 break;
772 default:
773 uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
774 break;
775 }
776
777 out:
778 return uc;
779 }
780
781 /* used for HTTP/2 server push */
Curl_url_set_authority(CURLU * u,const char * authority)782 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
783 {
784 CURLUcode result;
785 struct dynbuf host;
786
787 DEBUGASSERT(authority);
788 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
789
790 result = parse_authority(u, authority, strlen(authority),
791 CURLU_DISALLOW_USER, &host, !!u->scheme);
792 if(result)
793 Curl_dyn_free(&host);
794 else {
795 free(u->host);
796 u->host = Curl_dyn_ptr(&host);
797 }
798 return result;
799 }
800
801 /*
802 * "Remove Dot Segments"
803 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
804 */
805
806 /*
807 * dedotdotify()
808 * @unittest: 1395
809 *
810 * This function gets a null-terminated path with dot and dotdot sequences
811 * passed in and strips them off according to the rules in RFC 3986 section
812 * 5.2.4.
813 *
814 * The function handles a query part ('?' + stuff) appended but it expects
815 * that fragments ('#' + stuff) have already been cut off.
816 *
817 * RETURNS
818 *
819 * Zero for success and 'out' set to an allocated dedotdotified string.
820 */
821 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)822 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
823 {
824 char *outptr;
825 const char *endp = &input[clen];
826 char *out;
827
828 *outp = NULL;
829 /* the path always starts with a slash, and a slash has not dot */
830 if((clen < 2) || !memchr(input, '.', clen))
831 return 0;
832
833 out = malloc(clen + 1);
834 if(!out)
835 return 1; /* out of memory */
836
837 *out = 0; /* null-terminates, for inputs like "./" */
838 outptr = out;
839
840 do {
841 bool dotdot = TRUE;
842 if(*input == '.') {
843 /* A. If the input buffer begins with a prefix of "../" or "./", then
844 remove that prefix from the input buffer; otherwise, */
845
846 if(!strncmp("./", input, 2)) {
847 input += 2;
848 clen -= 2;
849 }
850 else if(!strncmp("../", input, 3)) {
851 input += 3;
852 clen -= 3;
853 }
854 /* D. if the input buffer consists only of "." or "..", then remove
855 that from the input buffer; otherwise, */
856
857 else if(!strcmp(".", input) || !strcmp("..", input) ||
858 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
859 *out = 0;
860 break;
861 }
862 else
863 dotdot = FALSE;
864 }
865 else if(*input == '/') {
866 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
867 "." is a complete path segment, then replace that prefix with "/" in
868 the input buffer; otherwise, */
869 if(!strncmp("/./", input, 3)) {
870 input += 2;
871 clen -= 2;
872 }
873 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
874 *outptr++ = '/';
875 *outptr = 0;
876 break;
877 }
878
879 /* C. if the input buffer begins with a prefix of "/../" or "/..",
880 where ".." is a complete path segment, then replace that prefix with
881 "/" in the input buffer and remove the last segment and its
882 preceding "/" (if any) from the output buffer; otherwise, */
883
884 else if(!strncmp("/../", input, 4)) {
885 input += 3;
886 clen -= 3;
887 /* remove the last segment from the output buffer */
888 while(outptr > out) {
889 outptr--;
890 if(*outptr == '/')
891 break;
892 }
893 *outptr = 0; /* null-terminate where it stops */
894 }
895 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
896 /* remove the last segment from the output buffer */
897 while(outptr > out) {
898 outptr--;
899 if(*outptr == '/')
900 break;
901 }
902 *outptr++ = '/';
903 *outptr = 0; /* null-terminate where it stops */
904 break;
905 }
906 else
907 dotdot = FALSE;
908 }
909 else
910 dotdot = FALSE;
911
912 if(!dotdot) {
913 /* E. move the first path segment in the input buffer to the end of
914 the output buffer, including the initial "/" character (if any) and
915 any subsequent characters up to, but not including, the next "/"
916 character or the end of the input buffer. */
917
918 do {
919 *outptr++ = *input++;
920 clen--;
921 } while(*input && (*input != '/') && (*input != '?'));
922 *outptr = 0;
923 }
924
925 /* continue until end of path */
926 } while(input < endp);
927
928 *outp = out;
929 return 0; /* success */
930 }
931
parseurl(const char * url,CURLU * u,unsigned int flags)932 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
933 {
934 const char *path;
935 size_t pathlen;
936 char *query = NULL;
937 char *fragment = NULL;
938 char schemebuf[MAX_SCHEME_LEN + 1];
939 size_t schemelen = 0;
940 size_t urllen;
941 CURLUcode result = CURLUE_OK;
942 size_t fraglen = 0;
943 struct dynbuf host;
944
945 DEBUGASSERT(url);
946
947 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
948
949 result = junkscan(url, &urllen, flags);
950 if(result)
951 goto fail;
952
953 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
954 flags & (CURLU_GUESS_SCHEME|
955 CURLU_DEFAULT_SCHEME));
956
957 /* handle the file: scheme */
958 if(schemelen && !strcmp(schemebuf, "file")) {
959 bool uncpath = FALSE;
960 if(urllen <= 6) {
961 /* file:/ is not enough to actually be a complete file: URL */
962 result = CURLUE_BAD_FILE_URL;
963 goto fail;
964 }
965
966 /* path has been allocated large enough to hold this */
967 path = (char *)&url[5];
968 pathlen = urllen - 5;
969
970 u->scheme = strdup("file");
971 if(!u->scheme) {
972 result = CURLUE_OUT_OF_MEMORY;
973 goto fail;
974 }
975
976 /* Extra handling URLs with an authority component (i.e. that start with
977 * "file://")
978 *
979 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
980 * RFC 8089, but not the (current) WHAT-WG URL spec.
981 */
982 if(path[0] == '/' && path[1] == '/') {
983 /* swallow the two slashes */
984 const char *ptr = &path[2];
985
986 /*
987 * According to RFC 8089, a file: URL can be reliably dereferenced if:
988 *
989 * o it has no/blank hostname, or
990 *
991 * o the hostname matches "localhost" (case-insensitively), or
992 *
993 * o the hostname is a FQDN that resolves to this machine, or
994 *
995 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
996 * Appendix E.3).
997 *
998 * For brevity, we only consider URLs with empty, "localhost", or
999 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1000 *
1001 * Additionally, there is an exception for URLs with a Windows drive
1002 * letter in the authority (which was accidentally omitted from RFC 8089
1003 * Appendix E, but believe me, it was meant to be there. --MK)
1004 */
1005 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1006 /* the URL includes a hostname, it must match "localhost" or
1007 "127.0.0.1" to be valid */
1008 if(checkprefix("localhost/", ptr) ||
1009 checkprefix("127.0.0.1/", ptr)) {
1010 ptr += 9; /* now points to the slash after the host */
1011 }
1012 else {
1013 #if defined(_WIN32)
1014 size_t len;
1015
1016 /* the hostname, NetBIOS computer name, can not contain disallowed
1017 chars, and the delimiting slash character must be appended to the
1018 hostname */
1019 path = strpbrk(ptr, "/\\:*?\"<>|");
1020 if(!path || *path != '/') {
1021 result = CURLUE_BAD_FILE_URL;
1022 goto fail;
1023 }
1024
1025 len = path - ptr;
1026 if(len) {
1027 CURLcode code = Curl_dyn_addn(&host, ptr, len);
1028 if(code) {
1029 result = cc2cu(code);
1030 goto fail;
1031 }
1032 uncpath = TRUE;
1033 }
1034
1035 ptr -= 2; /* now points to the // before the host in UNC */
1036 #else
1037 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1038 none */
1039 result = CURLUE_BAD_FILE_URL;
1040 goto fail;
1041 #endif
1042 }
1043 }
1044
1045 path = ptr;
1046 pathlen = urllen - (ptr - url);
1047 }
1048
1049 if(!uncpath)
1050 /* no host for file: URLs by default */
1051 Curl_dyn_reset(&host);
1052
1053 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1054 /* Do not allow Windows drive letters when not in Windows.
1055 * This catches both "file:/c:" and "file:c:" */
1056 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1057 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1058 /* File drive letters are only accepted in MS-DOS/Windows */
1059 result = CURLUE_BAD_FILE_URL;
1060 goto fail;
1061 }
1062 #else
1063 /* If the path starts with a slash and a drive letter, ditch the slash */
1064 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1065 /* This cannot be done with strcpy, as the memory chunks overlap! */
1066 path++;
1067 pathlen--;
1068 }
1069 #endif
1070
1071 }
1072 else {
1073 /* clear path */
1074 const char *schemep = NULL;
1075 const char *hostp;
1076 size_t hostlen;
1077
1078 if(schemelen) {
1079 int i = 0;
1080 const char *p = &url[schemelen + 1];
1081 while((*p == '/') && (i < 4)) {
1082 p++;
1083 i++;
1084 }
1085
1086 schemep = schemebuf;
1087 if(!Curl_get_scheme_handler(schemep) &&
1088 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1089 result = CURLUE_UNSUPPORTED_SCHEME;
1090 goto fail;
1091 }
1092
1093 if((i < 1) || (i > 3)) {
1094 /* less than one or more than three slashes */
1095 result = CURLUE_BAD_SLASHES;
1096 goto fail;
1097 }
1098 hostp = p; /* hostname starts here */
1099 }
1100 else {
1101 /* no scheme! */
1102
1103 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1104 result = CURLUE_BAD_SCHEME;
1105 goto fail;
1106 }
1107 if(flags & CURLU_DEFAULT_SCHEME)
1108 schemep = DEFAULT_SCHEME;
1109
1110 /*
1111 * The URL was badly formatted, let's try without scheme specified.
1112 */
1113 hostp = url;
1114 }
1115
1116 if(schemep) {
1117 u->scheme = strdup(schemep);
1118 if(!u->scheme) {
1119 result = CURLUE_OUT_OF_MEMORY;
1120 goto fail;
1121 }
1122 }
1123
1124 /* find the end of the hostname + port number */
1125 hostlen = strcspn(hostp, "/?#");
1126 path = &hostp[hostlen];
1127
1128 /* this pathlen also contains the query and the fragment */
1129 pathlen = urllen - (path - url);
1130 if(hostlen) {
1131
1132 result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1133 if(result)
1134 goto fail;
1135
1136 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1137 const char *hostname = Curl_dyn_ptr(&host);
1138 /* legacy curl-style guess based on hostname */
1139 if(checkprefix("ftp.", hostname))
1140 schemep = "ftp";
1141 else if(checkprefix("dict.", hostname))
1142 schemep = "dict";
1143 else if(checkprefix("ldap.", hostname))
1144 schemep = "ldap";
1145 else if(checkprefix("imap.", hostname))
1146 schemep = "imap";
1147 else if(checkprefix("smtp.", hostname))
1148 schemep = "smtp";
1149 else if(checkprefix("pop3.", hostname))
1150 schemep = "pop3";
1151 else
1152 schemep = "http";
1153
1154 u->scheme = strdup(schemep);
1155 if(!u->scheme) {
1156 result = CURLUE_OUT_OF_MEMORY;
1157 goto fail;
1158 }
1159 u->guessed_scheme = TRUE;
1160 }
1161 }
1162 else if(flags & CURLU_NO_AUTHORITY) {
1163 /* allowed to be empty. */
1164 if(Curl_dyn_add(&host, "")) {
1165 result = CURLUE_OUT_OF_MEMORY;
1166 goto fail;
1167 }
1168 }
1169 else {
1170 result = CURLUE_NO_HOST;
1171 goto fail;
1172 }
1173 }
1174
1175 fragment = strchr(path, '#');
1176 if(fragment) {
1177 fraglen = pathlen - (fragment - path);
1178 u->fragment_present = TRUE;
1179 if(fraglen > 1) {
1180 /* skip the leading '#' in the copy but include the terminating null */
1181 if(flags & CURLU_URLENCODE) {
1182 struct dynbuf enc;
1183 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1184 result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1185 if(result)
1186 goto fail;
1187 u->fragment = Curl_dyn_ptr(&enc);
1188 }
1189 else {
1190 u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1191 if(!u->fragment) {
1192 result = CURLUE_OUT_OF_MEMORY;
1193 goto fail;
1194 }
1195 }
1196 }
1197 /* after this, pathlen still contains the query */
1198 pathlen -= fraglen;
1199 }
1200
1201 query = memchr(path, '?', pathlen);
1202 if(query) {
1203 size_t qlen = fragment ? (size_t)(fragment - query) :
1204 pathlen - (query - path);
1205 pathlen -= qlen;
1206 u->query_present = TRUE;
1207 if(qlen > 1) {
1208 if(flags & CURLU_URLENCODE) {
1209 struct dynbuf enc;
1210 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1211 /* skip the leading question mark */
1212 result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1213 if(result)
1214 goto fail;
1215 u->query = Curl_dyn_ptr(&enc);
1216 }
1217 else {
1218 u->query = Curl_memdup0(query + 1, qlen - 1);
1219 if(!u->query) {
1220 result = CURLUE_OUT_OF_MEMORY;
1221 goto fail;
1222 }
1223 }
1224 }
1225 else {
1226 /* single byte query */
1227 u->query = strdup("");
1228 if(!u->query) {
1229 result = CURLUE_OUT_OF_MEMORY;
1230 goto fail;
1231 }
1232 }
1233 }
1234
1235 if(pathlen && (flags & CURLU_URLENCODE)) {
1236 struct dynbuf enc;
1237 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1238 result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1239 if(result)
1240 goto fail;
1241 pathlen = Curl_dyn_len(&enc);
1242 path = u->path = Curl_dyn_ptr(&enc);
1243 }
1244
1245 if(pathlen <= 1) {
1246 /* there is no path left or just the slash, unset */
1247 path = NULL;
1248 }
1249 else {
1250 if(!u->path) {
1251 u->path = Curl_memdup0(path, pathlen);
1252 if(!u->path) {
1253 result = CURLUE_OUT_OF_MEMORY;
1254 goto fail;
1255 }
1256 path = u->path;
1257 }
1258 else if(flags & CURLU_URLENCODE)
1259 /* it might have encoded more than just the path so cut it */
1260 u->path[pathlen] = 0;
1261
1262 if(!(flags & CURLU_PATH_AS_IS)) {
1263 /* remove ../ and ./ sequences according to RFC3986 */
1264 char *dedot;
1265 int err = dedotdotify((char *)path, pathlen, &dedot);
1266 if(err) {
1267 result = CURLUE_OUT_OF_MEMORY;
1268 goto fail;
1269 }
1270 if(dedot) {
1271 free(u->path);
1272 u->path = dedot;
1273 }
1274 }
1275 }
1276
1277 u->host = Curl_dyn_ptr(&host);
1278
1279 return result;
1280 fail:
1281 Curl_dyn_free(&host);
1282 free_urlhandle(u);
1283 return result;
1284 }
1285
1286 /*
1287 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1288 */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1289 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1290 unsigned int flags)
1291 {
1292 CURLUcode result;
1293 CURLU tmpurl;
1294 memset(&tmpurl, 0, sizeof(tmpurl));
1295 result = parseurl(url, &tmpurl, flags);
1296 if(!result) {
1297 free_urlhandle(u);
1298 *u = tmpurl;
1299 }
1300 return result;
1301 }
1302
1303 /*
1304 */
curl_url(void)1305 CURLU *curl_url(void)
1306 {
1307 return calloc(1, sizeof(struct Curl_URL));
1308 }
1309
curl_url_cleanup(CURLU * u)1310 void curl_url_cleanup(CURLU *u)
1311 {
1312 if(u) {
1313 free_urlhandle(u);
1314 free(u);
1315 }
1316 }
1317
1318 #define DUP(dest, src, name) \
1319 do { \
1320 if(src->name) { \
1321 dest->name = strdup(src->name); \
1322 if(!dest->name) \
1323 goto fail; \
1324 } \
1325 } while(0)
1326
curl_url_dup(const CURLU * in)1327 CURLU *curl_url_dup(const CURLU *in)
1328 {
1329 struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1330 if(u) {
1331 DUP(u, in, scheme);
1332 DUP(u, in, user);
1333 DUP(u, in, password);
1334 DUP(u, in, options);
1335 DUP(u, in, host);
1336 DUP(u, in, port);
1337 DUP(u, in, path);
1338 DUP(u, in, query);
1339 DUP(u, in, fragment);
1340 DUP(u, in, zoneid);
1341 u->portnum = in->portnum;
1342 u->fragment_present = in->fragment_present;
1343 u->query_present = in->query_present;
1344 }
1345 return u;
1346 fail:
1347 curl_url_cleanup(u);
1348 return NULL;
1349 }
1350
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1351 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1352 char **part, unsigned int flags)
1353 {
1354 const char *ptr;
1355 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1356 char portbuf[7];
1357 bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
1358 bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1359 bool punycode = FALSE;
1360 bool depunyfy = FALSE;
1361 bool plusdecode = FALSE;
1362 (void)flags;
1363 if(!u)
1364 return CURLUE_BAD_HANDLE;
1365 if(!part)
1366 return CURLUE_BAD_PARTPOINTER;
1367 *part = NULL;
1368
1369 switch(what) {
1370 case CURLUPART_SCHEME:
1371 ptr = u->scheme;
1372 ifmissing = CURLUE_NO_SCHEME;
1373 urldecode = FALSE; /* never for schemes */
1374 if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
1375 return CURLUE_NO_SCHEME;
1376 break;
1377 case CURLUPART_USER:
1378 ptr = u->user;
1379 ifmissing = CURLUE_NO_USER;
1380 break;
1381 case CURLUPART_PASSWORD:
1382 ptr = u->password;
1383 ifmissing = CURLUE_NO_PASSWORD;
1384 break;
1385 case CURLUPART_OPTIONS:
1386 ptr = u->options;
1387 ifmissing = CURLUE_NO_OPTIONS;
1388 break;
1389 case CURLUPART_HOST:
1390 ptr = u->host;
1391 ifmissing = CURLUE_NO_HOST;
1392 punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1393 depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1394 break;
1395 case CURLUPART_ZONEID:
1396 ptr = u->zoneid;
1397 ifmissing = CURLUE_NO_ZONEID;
1398 break;
1399 case CURLUPART_PORT:
1400 ptr = u->port;
1401 ifmissing = CURLUE_NO_PORT;
1402 urldecode = FALSE; /* never for port */
1403 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1404 /* there is no stored port number, but asked to deliver
1405 a default one for the scheme */
1406 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1407 if(h) {
1408 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1409 ptr = portbuf;
1410 }
1411 }
1412 else if(ptr && u->scheme) {
1413 /* there is a stored port number, but ask to inhibit if
1414 it matches the default one for the scheme */
1415 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1416 if(h && (h->defport == u->portnum) &&
1417 (flags & CURLU_NO_DEFAULT_PORT))
1418 ptr = NULL;
1419 }
1420 break;
1421 case CURLUPART_PATH:
1422 ptr = u->path;
1423 if(!ptr)
1424 ptr = "/";
1425 break;
1426 case CURLUPART_QUERY:
1427 ptr = u->query;
1428 ifmissing = CURLUE_NO_QUERY;
1429 plusdecode = urldecode;
1430 if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1431 /* there was a blank query and the user do not ask for it */
1432 ptr = NULL;
1433 break;
1434 case CURLUPART_FRAGMENT:
1435 ptr = u->fragment;
1436 ifmissing = CURLUE_NO_FRAGMENT;
1437 if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1438 /* there was a blank fragment and the user asks for it */
1439 ptr = "";
1440 break;
1441 case CURLUPART_URL: {
1442 char *url;
1443 char *scheme;
1444 char *options = u->options;
1445 char *port = u->port;
1446 char *allochost = NULL;
1447 bool show_fragment =
1448 u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1449 bool show_query =
1450 (u->query && u->query[0]) ||
1451 (u->query_present && flags & CURLU_GET_EMPTY);
1452 punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
1453 depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
1454 if(u->scheme && strcasecompare("file", u->scheme)) {
1455 url = aprintf("file://%s%s%s",
1456 u->path,
1457 show_fragment ? "#": "",
1458 u->fragment ? u->fragment : "");
1459 }
1460 else if(!u->host)
1461 return CURLUE_NO_HOST;
1462 else {
1463 const struct Curl_handler *h = NULL;
1464 char schemebuf[MAX_SCHEME_LEN + 5];
1465 if(u->scheme)
1466 scheme = u->scheme;
1467 else if(flags & CURLU_DEFAULT_SCHEME)
1468 scheme = (char *) DEFAULT_SCHEME;
1469 else
1470 return CURLUE_NO_SCHEME;
1471
1472 h = Curl_get_scheme_handler(scheme);
1473 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1474 /* there is no stored port number, but asked to deliver
1475 a default one for the scheme */
1476 if(h) {
1477 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1478 port = portbuf;
1479 }
1480 }
1481 else if(port) {
1482 /* there is a stored port number, but asked to inhibit if it matches
1483 the default one for the scheme */
1484 if(h && (h->defport == u->portnum) &&
1485 (flags & CURLU_NO_DEFAULT_PORT))
1486 port = NULL;
1487 }
1488
1489 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1490 options = NULL;
1491
1492 if(u->host[0] == '[') {
1493 if(u->zoneid) {
1494 /* make it '[ host %25 zoneid ]' */
1495 struct dynbuf enc;
1496 size_t hostlen = strlen(u->host);
1497 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1498 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1499 u->zoneid))
1500 return CURLUE_OUT_OF_MEMORY;
1501 allochost = Curl_dyn_ptr(&enc);
1502 }
1503 }
1504 else if(urlencode) {
1505 allochost = curl_easy_escape(NULL, u->host, 0);
1506 if(!allochost)
1507 return CURLUE_OUT_OF_MEMORY;
1508 }
1509 else if(punycode) {
1510 if(!Curl_is_ASCII_name(u->host)) {
1511 #ifndef USE_IDN
1512 return CURLUE_LACKS_IDN;
1513 #else
1514 CURLcode result = Curl_idn_decode(u->host, &allochost);
1515 if(result)
1516 return (result == CURLE_OUT_OF_MEMORY) ?
1517 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1518 #endif
1519 }
1520 }
1521 else if(depunyfy) {
1522 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1523 #ifndef USE_IDN
1524 return CURLUE_LACKS_IDN;
1525 #else
1526 CURLcode result = Curl_idn_encode(u->host, &allochost);
1527 if(result)
1528 /* this is the most likely error */
1529 return (result == CURLE_OUT_OF_MEMORY) ?
1530 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1531 #endif
1532 }
1533 }
1534
1535 if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
1536 msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
1537 else
1538 schemebuf[0] = 0;
1539
1540 url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1541 schemebuf,
1542 u->user ? u->user : "",
1543 u->password ? ":": "",
1544 u->password ? u->password : "",
1545 options ? ";" : "",
1546 options ? options : "",
1547 (u->user || u->password || options) ? "@": "",
1548 allochost ? allochost : u->host,
1549 port ? ":": "",
1550 port ? port : "",
1551 u->path ? u->path : "/",
1552 show_query ? "?": "",
1553 u->query ? u->query : "",
1554 show_fragment ? "#": "",
1555 u->fragment ? u->fragment : "");
1556 free(allochost);
1557 }
1558 if(!url)
1559 return CURLUE_OUT_OF_MEMORY;
1560 *part = url;
1561 return CURLUE_OK;
1562 }
1563 default:
1564 ptr = NULL;
1565 break;
1566 }
1567 if(ptr) {
1568 size_t partlen = strlen(ptr);
1569 size_t i = 0;
1570 *part = Curl_memdup0(ptr, partlen);
1571 if(!*part)
1572 return CURLUE_OUT_OF_MEMORY;
1573 if(plusdecode) {
1574 /* convert + to space */
1575 char *plus = *part;
1576 for(i = 0; i < partlen; ++plus, i++) {
1577 if(*plus == '+')
1578 *plus = ' ';
1579 }
1580 }
1581 if(urldecode) {
1582 char *decoded;
1583 size_t dlen;
1584 /* this unconditional rejection of control bytes is documented
1585 API behavior */
1586 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1587 free(*part);
1588 if(res) {
1589 *part = NULL;
1590 return CURLUE_URLDECODE;
1591 }
1592 *part = decoded;
1593 partlen = dlen;
1594 }
1595 if(urlencode) {
1596 struct dynbuf enc;
1597 CURLUcode uc;
1598 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1599 uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1600 if(uc)
1601 return uc;
1602 free(*part);
1603 *part = Curl_dyn_ptr(&enc);
1604 }
1605 else if(punycode) {
1606 if(!Curl_is_ASCII_name(u->host)) {
1607 #ifndef USE_IDN
1608 return CURLUE_LACKS_IDN;
1609 #else
1610 char *allochost;
1611 CURLcode result = Curl_idn_decode(*part, &allochost);
1612 if(result)
1613 return (result == CURLE_OUT_OF_MEMORY) ?
1614 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1615 free(*part);
1616 *part = allochost;
1617 #endif
1618 }
1619 }
1620 else if(depunyfy) {
1621 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1622 #ifndef USE_IDN
1623 return CURLUE_LACKS_IDN;
1624 #else
1625 char *allochost;
1626 CURLcode result = Curl_idn_encode(*part, &allochost);
1627 if(result)
1628 return (result == CURLE_OUT_OF_MEMORY) ?
1629 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1630 free(*part);
1631 *part = allochost;
1632 #endif
1633 }
1634 }
1635
1636 return CURLUE_OK;
1637 }
1638 else
1639 return ifmissing;
1640 }
1641
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1642 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1643 const char *part, unsigned int flags)
1644 {
1645 char **storep = NULL;
1646 bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
1647 bool plusencode = FALSE;
1648 bool urlskipslash = FALSE;
1649 bool leadingslash = FALSE;
1650 bool appendquery = FALSE;
1651 bool equalsencode = FALSE;
1652 size_t nalloc;
1653
1654 if(!u)
1655 return CURLUE_BAD_HANDLE;
1656 if(!part) {
1657 /* setting a part to NULL clears it */
1658 switch(what) {
1659 case CURLUPART_URL:
1660 break;
1661 case CURLUPART_SCHEME:
1662 storep = &u->scheme;
1663 u->guessed_scheme = FALSE;
1664 break;
1665 case CURLUPART_USER:
1666 storep = &u->user;
1667 break;
1668 case CURLUPART_PASSWORD:
1669 storep = &u->password;
1670 break;
1671 case CURLUPART_OPTIONS:
1672 storep = &u->options;
1673 break;
1674 case CURLUPART_HOST:
1675 storep = &u->host;
1676 break;
1677 case CURLUPART_ZONEID:
1678 storep = &u->zoneid;
1679 break;
1680 case CURLUPART_PORT:
1681 u->portnum = 0;
1682 storep = &u->port;
1683 break;
1684 case CURLUPART_PATH:
1685 storep = &u->path;
1686 break;
1687 case CURLUPART_QUERY:
1688 storep = &u->query;
1689 u->query_present = FALSE;
1690 break;
1691 case CURLUPART_FRAGMENT:
1692 storep = &u->fragment;
1693 u->fragment_present = FALSE;
1694 break;
1695 default:
1696 return CURLUE_UNKNOWN_PART;
1697 }
1698 if(storep && *storep) {
1699 Curl_safefree(*storep);
1700 }
1701 else if(!storep) {
1702 free_urlhandle(u);
1703 memset(u, 0, sizeof(struct Curl_URL));
1704 }
1705 return CURLUE_OK;
1706 }
1707
1708 nalloc = strlen(part);
1709 if(nalloc > CURL_MAX_INPUT_LENGTH)
1710 /* excessive input length */
1711 return CURLUE_MALFORMED_INPUT;
1712
1713 switch(what) {
1714 case CURLUPART_SCHEME: {
1715 size_t plen = strlen(part);
1716 const char *s = part;
1717 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1718 /* too long or too short */
1719 return CURLUE_BAD_SCHEME;
1720 /* verify that it is a fine scheme */
1721 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1722 return CURLUE_UNSUPPORTED_SCHEME;
1723 storep = &u->scheme;
1724 urlencode = FALSE; /* never */
1725 if(ISALPHA(*s)) {
1726 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1727 while(--plen) {
1728 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1729 s++; /* fine */
1730 else
1731 return CURLUE_BAD_SCHEME;
1732 }
1733 }
1734 else
1735 return CURLUE_BAD_SCHEME;
1736 u->guessed_scheme = FALSE;
1737 break;
1738 }
1739 case CURLUPART_USER:
1740 storep = &u->user;
1741 break;
1742 case CURLUPART_PASSWORD:
1743 storep = &u->password;
1744 break;
1745 case CURLUPART_OPTIONS:
1746 storep = &u->options;
1747 break;
1748 case CURLUPART_HOST:
1749 storep = &u->host;
1750 Curl_safefree(u->zoneid);
1751 break;
1752 case CURLUPART_ZONEID:
1753 storep = &u->zoneid;
1754 break;
1755 case CURLUPART_PORT:
1756 if(!ISDIGIT(part[0]))
1757 /* not a number */
1758 return CURLUE_BAD_PORT_NUMBER;
1759 else {
1760 char *tmp;
1761 char *endp;
1762 unsigned long port;
1763 errno = 0;
1764 port = strtoul(part, &endp, 10); /* must be decimal */
1765 if(errno || (port > 0xffff) || *endp)
1766 /* weirdly provided number, not good! */
1767 return CURLUE_BAD_PORT_NUMBER;
1768 tmp = strdup(part);
1769 if(!tmp)
1770 return CURLUE_OUT_OF_MEMORY;
1771 free(u->port);
1772 u->port = tmp;
1773 u->portnum = (unsigned short)port;
1774 return CURLUE_OK;
1775 }
1776 case CURLUPART_PATH:
1777 urlskipslash = TRUE;
1778 leadingslash = TRUE; /* enforce */
1779 storep = &u->path;
1780 break;
1781 case CURLUPART_QUERY:
1782 plusencode = urlencode;
1783 appendquery = (flags & CURLU_APPENDQUERY) ? 1 : 0;
1784 equalsencode = appendquery;
1785 storep = &u->query;
1786 u->query_present = TRUE;
1787 break;
1788 case CURLUPART_FRAGMENT:
1789 storep = &u->fragment;
1790 u->fragment_present = TRUE;
1791 break;
1792 case CURLUPART_URL: {
1793 /*
1794 * Allow a new URL to replace the existing (if any) contents.
1795 *
1796 * If the existing contents is enough for a URL, allow a relative URL to
1797 * replace it.
1798 */
1799 CURLUcode uc;
1800 char *oldurl;
1801
1802 if(!nalloc)
1803 /* a blank URL is not a valid URL */
1804 return CURLUE_MALFORMED_INPUT;
1805
1806 /* if the new thing is absolute or the old one is not (we could not get an
1807 * absolute URL in 'oldurl'), then replace the existing with the new. */
1808 if(Curl_is_absolute_url(part, NULL, 0,
1809 flags & (CURLU_GUESS_SCHEME|CURLU_DEFAULT_SCHEME))
1810 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1811 return parseurl_and_replace(part, u, flags);
1812 }
1813
1814 /* apply the relative part to create a new URL */
1815 uc = redirect_url(oldurl, part, u, flags);
1816 free(oldurl);
1817 return uc;
1818 }
1819 default:
1820 return CURLUE_UNKNOWN_PART;
1821 }
1822 DEBUGASSERT(storep);
1823 {
1824 const char *newp;
1825 struct dynbuf enc;
1826 Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1827
1828 if(leadingslash && (part[0] != '/')) {
1829 CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1830 if(result)
1831 return cc2cu(result);
1832 }
1833 if(urlencode) {
1834 const unsigned char *i;
1835
1836 for(i = (const unsigned char *)part; *i; i++) {
1837 CURLcode result;
1838 if((*i == ' ') && plusencode) {
1839 result = Curl_dyn_addn(&enc, "+", 1);
1840 if(result)
1841 return CURLUE_OUT_OF_MEMORY;
1842 }
1843 else if(ISUNRESERVED(*i) ||
1844 ((*i == '/') && urlskipslash) ||
1845 ((*i == '=') && equalsencode)) {
1846 if((*i == '=') && equalsencode)
1847 /* only skip the first equals sign */
1848 equalsencode = FALSE;
1849 result = Curl_dyn_addn(&enc, i, 1);
1850 if(result)
1851 return cc2cu(result);
1852 }
1853 else {
1854 char out[3]={'%'};
1855 out[1] = hexdigits[*i >> 4];
1856 out[2] = hexdigits[*i & 0xf];
1857 result = Curl_dyn_addn(&enc, out, 3);
1858 if(result)
1859 return cc2cu(result);
1860 }
1861 }
1862 }
1863 else {
1864 char *p;
1865 CURLcode result = Curl_dyn_add(&enc, part);
1866 if(result)
1867 return cc2cu(result);
1868 p = Curl_dyn_ptr(&enc);
1869 while(*p) {
1870 /* make sure percent encoded are lower case */
1871 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1872 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1873 p[1] = Curl_raw_tolower(p[1]);
1874 p[2] = Curl_raw_tolower(p[2]);
1875 p += 3;
1876 }
1877 else
1878 p++;
1879 }
1880 }
1881 newp = Curl_dyn_ptr(&enc);
1882
1883 if(appendquery && newp) {
1884 /* Append the 'newp' string onto the old query. Add a '&' separator if
1885 none is present at the end of the existing query already */
1886
1887 size_t querylen = u->query ? strlen(u->query) : 0;
1888 bool addamperand = querylen && (u->query[querylen -1] != '&');
1889 if(querylen) {
1890 struct dynbuf qbuf;
1891 Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1892
1893 if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1894 goto nomem;
1895
1896 if(addamperand) {
1897 if(Curl_dyn_addn(&qbuf, "&", 1))
1898 goto nomem;
1899 }
1900 if(Curl_dyn_add(&qbuf, newp))
1901 goto nomem;
1902 Curl_dyn_free(&enc);
1903 free(*storep);
1904 *storep = Curl_dyn_ptr(&qbuf);
1905 return CURLUE_OK;
1906 nomem:
1907 Curl_dyn_free(&enc);
1908 return CURLUE_OUT_OF_MEMORY;
1909 }
1910 }
1911
1912 else if(what == CURLUPART_HOST) {
1913 size_t n = Curl_dyn_len(&enc);
1914 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1915 /* Skip hostname check, it is allowed to be empty. */
1916 }
1917 else {
1918 bool bad = FALSE;
1919 if(!n)
1920 bad = TRUE; /* empty hostname is not okay */
1921 else if(!urlencode) {
1922 /* if the host name part was not URL encoded here, it was set ready
1923 URL encoded so we need to decode it to check */
1924 size_t dlen;
1925 char *decoded = NULL;
1926 CURLcode result =
1927 Curl_urldecode(newp, n, &decoded, &dlen, REJECT_CTRL);
1928 if(result || hostname_check(u, decoded, dlen))
1929 bad = TRUE;
1930 free(decoded);
1931 }
1932 else if(hostname_check(u, (char *)newp, n))
1933 bad = TRUE;
1934 if(bad) {
1935 Curl_dyn_free(&enc);
1936 return CURLUE_BAD_HOSTNAME;
1937 }
1938 }
1939 }
1940
1941 free(*storep);
1942 *storep = (char *)newp;
1943 }
1944 return CURLUE_OK;
1945 }
1946