1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 #include "curl_memrchr.h"
38
39 /* The last 3 #include files should be in this order */
40 #include "curl_printf.h"
41 #include "curl_memory.h"
42 #include "memdebug.h"
43
44 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45 #define STARTS_WITH_DRIVE_PREFIX(str) \
46 ((('a' <= str[0] && str[0] <= 'z') || \
47 ('A' <= str[0] && str[0] <= 'Z')) && \
48 (str[1] == ':'))
49
50 /* MSDOS/Windows style drive prefix, optionally with
51 * a '|' instead of ':', followed by a slash or NUL */
52 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55 ((str)[1] == ':' || (str)[1] == '|') && \
56 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58 /* scheme is not URL encoded, the longest libcurl supported ones are... */
59 #define MAX_SCHEME_LEN 40
60
61 /*
62 * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63 * sure we have _some_ value for AF_INET6 without polluting our fake value
64 * everywhere.
65 */
66 #if !defined(USE_IPV6) && !defined(AF_INET6)
67 #define AF_INET6 (AF_INET + 1)
68 #endif
69
70 /* Internal representation of CURLU. Point to URL-encoded strings. */
71 struct Curl_URL {
72 char *scheme;
73 char *user;
74 char *password;
75 char *options; /* IMAP only? */
76 char *host;
77 char *zoneid; /* for numerical IPv6 addresses */
78 char *port;
79 char *path;
80 char *query;
81 char *fragment;
82 unsigned short portnum; /* the numerical version (if 'port' is set) */
83 BIT(query_present); /* to support blank */
84 BIT(fragment_present); /* to support blank */
85 };
86
87 #define DEFAULT_SCHEME "https"
88
free_urlhandle(struct Curl_URL * u)89 static void free_urlhandle(struct Curl_URL *u)
90 {
91 free(u->scheme);
92 free(u->user);
93 free(u->password);
94 free(u->options);
95 free(u->host);
96 free(u->zoneid);
97 free(u->port);
98 free(u->path);
99 free(u->query);
100 free(u->fragment);
101 }
102
103 /*
104 * Find the separator at the end of the host name, or the '?' in cases like
105 * http://www.example.com?id=2380
106 */
find_host_sep(const char * url)107 static const char *find_host_sep(const char *url)
108 {
109 const char *sep;
110 const char *query;
111
112 /* Find the start of the hostname */
113 sep = strstr(url, "//");
114 if(!sep)
115 sep = url;
116 else
117 sep += 2;
118
119 query = strchr(sep, '?');
120 sep = strchr(sep, '/');
121
122 if(!sep)
123 sep = url + strlen(url);
124
125 if(!query)
126 query = url + strlen(url);
127
128 return sep < query ? sep : query;
129 }
130
131 /* convert CURLcode to CURLUcode */
132 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
133 CURLUE_OUT_OF_MEMORY)
134 /*
135 * Decide whether a character in a URL must be escaped.
136 */
137 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
138
139 static const char hexdigits[] = "0123456789abcdef";
140 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
141 * spaces in the source URL accordingly.
142 *
143 * URL encoding should be skipped for host names, otherwise IDN resolution
144 * will fail.
145 */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)146 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
147 size_t len, bool relative,
148 bool query)
149 {
150 /* we must add this with whitespace-replacing */
151 bool left = !query;
152 const unsigned char *iptr;
153 const unsigned char *host_sep = (const unsigned char *) url;
154 CURLcode result;
155
156 if(!relative)
157 host_sep = (const unsigned char *) find_host_sep(url);
158
159 for(iptr = (unsigned char *)url; /* read from here */
160 len; iptr++, len--) {
161
162 if(iptr < host_sep) {
163 result = Curl_dyn_addn(o, iptr, 1);
164 if(result)
165 return cc2cu(result);
166 continue;
167 }
168
169 if(*iptr == ' ') {
170 if(left)
171 result = Curl_dyn_addn(o, "%20", 3);
172 else
173 result = Curl_dyn_addn(o, "+", 1);
174 if(result)
175 return cc2cu(result);
176 continue;
177 }
178
179 if(*iptr == '?')
180 left = FALSE;
181
182 if(urlchar_needs_escaping(*iptr)) {
183 char out[3]={'%'};
184 out[1] = hexdigits[*iptr>>4];
185 out[2] = hexdigits[*iptr & 0xf];
186 result = Curl_dyn_addn(o, out, 3);
187 }
188 else
189 result = Curl_dyn_addn(o, iptr, 1);
190 if(result)
191 return cc2cu(result);
192 }
193
194 return CURLUE_OK;
195 }
196
197 /*
198 * Returns the length of the scheme if the given URL is absolute (as opposed
199 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
200 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
201 *
202 * If 'guess_scheme' is TRUE, it means the URL might be provided without
203 * scheme.
204 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)205 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
206 bool guess_scheme)
207 {
208 int i = 0;
209 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
210 (void)buflen; /* only used in debug-builds */
211 if(buf)
212 buf[0] = 0; /* always leave a defined value in buf */
213 #ifdef _WIN32
214 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
215 return 0;
216 #endif
217 if(ISALPHA(url[0]))
218 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
219 char s = url[i];
220 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
221 /* RFC 3986 3.1 explains:
222 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223 */
224 }
225 else {
226 break;
227 }
228 }
229 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
230 /* If this does not guess scheme, the scheme always ends with the colon so
231 that this also detects data: URLs etc. In guessing mode, data: could
232 be the host name "data" with a specified port number. */
233
234 /* the length of the scheme is the name part only */
235 size_t len = i;
236 if(buf) {
237 Curl_strntolower(buf, url, i);
238 buf[i] = 0;
239 }
240 return len;
241 }
242 return 0;
243 }
244
245 /*
246 * Concatenate a relative URL to a base URL making it absolute.
247 * URL-encodes any spaces.
248 * The returned pointer must be freed by the caller unless NULL
249 * (returns NULL on out of memory).
250 *
251 * Note that this function destroys the 'base' string.
252 */
concat_url(char * base,const char * relurl,char ** newurl)253 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254 {
255 /***
256 TRY to append this new path to the old URL
257 to the right of the host part. Oh crap, this is doomed to cause
258 problems in the future...
259 */
260 struct dynbuf newest;
261 char *protsep;
262 char *pathsep;
263 bool host_changed = FALSE;
264 const char *useurl = relurl;
265 CURLcode result = CURLE_OK;
266 CURLUcode uc;
267 bool skip_slash = FALSE;
268 *newurl = NULL;
269
270 /* protsep points to the start of the host name */
271 protsep = strstr(base, "//");
272 if(!protsep)
273 protsep = base;
274 else
275 protsep += 2; /* pass the slashes */
276
277 if('/' != relurl[0]) {
278 int level = 0;
279
280 /* First we need to find out if there's a ?-letter in the URL,
281 and cut it and the right-side of that off */
282 pathsep = strchr(protsep, '?');
283 if(pathsep)
284 *pathsep = 0;
285
286 /* we have a relative path to append to the last slash if there's one
287 available, or the new URL is just a query string (starts with a '?') or
288 a fragment (starts with '#') we append the new one at the end of the
289 current URL */
290 if((useurl[0] != '?') && (useurl[0] != '#')) {
291 pathsep = strrchr(protsep, '/');
292 if(pathsep)
293 *pathsep = 0;
294
295 /* Check if there's any slash after the host name, and if so, remember
296 that position instead */
297 pathsep = strchr(protsep, '/');
298 if(pathsep)
299 protsep = pathsep + 1;
300 else
301 protsep = NULL;
302
303 /* now deal with one "./" or any amount of "../" in the newurl
304 and act accordingly */
305
306 if((useurl[0] == '.') && (useurl[1] == '/'))
307 useurl += 2; /* just skip the "./" */
308
309 while((useurl[0] == '.') &&
310 (useurl[1] == '.') &&
311 (useurl[2] == '/')) {
312 level++;
313 useurl += 3; /* pass the "../" */
314 }
315
316 if(protsep) {
317 while(level--) {
318 /* cut off one more level from the right of the original URL */
319 pathsep = strrchr(protsep, '/');
320 if(pathsep)
321 *pathsep = 0;
322 else {
323 *protsep = 0;
324 break;
325 }
326 }
327 }
328 }
329 else
330 skip_slash = TRUE;
331 }
332 else {
333 /* We got a new absolute path for this server */
334
335 if(relurl[1] == '/') {
336 /* the new URL starts with //, just keep the protocol part from the
337 original one */
338 *protsep = 0;
339 useurl = &relurl[2]; /* we keep the slashes from the original, so we
340 skip the new ones */
341 host_changed = TRUE;
342 }
343 else {
344 /* cut off the original URL from the first slash, or deal with URLs
345 without slash */
346 pathsep = strchr(protsep, '/');
347 if(pathsep) {
348 /* When people use badly formatted URLs, such as
349 "http://www.example.com?dir=/home/daniel" we must not use the first
350 slash, if there's a ?-letter before it! */
351 char *sep = strchr(protsep, '?');
352 if(sep && (sep < pathsep))
353 pathsep = sep;
354 *pathsep = 0;
355 }
356 else {
357 /* There was no slash. Now, since we might be operating on a badly
358 formatted URL, such as "http://www.example.com?id=2380" which
359 doesn't use a slash separator as it is supposed to, we need to check
360 for a ?-letter as well! */
361 pathsep = strchr(protsep, '?');
362 if(pathsep)
363 *pathsep = 0;
364 }
365 }
366 }
367
368 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
369
370 /* copy over the root url part */
371 result = Curl_dyn_add(&newest, base);
372 if(result)
373 return result;
374
375 /* check if we need to append a slash */
376 if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
377 ;
378 else {
379 result = Curl_dyn_addn(&newest, "/", 1);
380 if(result)
381 return result;
382 }
383
384 /* then append the new piece on the right side */
385 uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
386 FALSE);
387 if(uc)
388 return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
389
390 *newurl = Curl_dyn_ptr(&newest);
391 return CURLE_OK;
392 }
393
394 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)395 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
396 {
397 static const char badbytes[]={
398 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
399 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
400 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
401 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
402 0x7f, 0x00 /* null-terminate */
403 };
404 size_t n = strlen(url);
405 size_t nfine;
406
407 if(n > CURL_MAX_INPUT_LENGTH)
408 /* excessive input length */
409 return CURLUE_MALFORMED_INPUT;
410
411 nfine = strcspn(url, badbytes);
412 if((nfine != n) ||
413 (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
414 return CURLUE_MALFORMED_INPUT;
415
416 *urllen = n;
417 return CURLUE_OK;
418 }
419
420 /*
421 * parse_hostname_login()
422 *
423 * Parse the login details (user name, password and options) from the URL and
424 * strip them out of the host name
425 *
426 */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)427 static CURLUcode parse_hostname_login(struct Curl_URL *u,
428 const char *login,
429 size_t len,
430 unsigned int flags,
431 size_t *offset) /* to the host name */
432 {
433 CURLUcode result = CURLUE_OK;
434 CURLcode ccode;
435 char *userp = NULL;
436 char *passwdp = NULL;
437 char *optionsp = NULL;
438 const struct Curl_handler *h = NULL;
439
440 /* At this point, we assume all the other special cases have been taken
441 * care of, so the host is at most
442 *
443 * [user[:password][;options]]@]hostname
444 *
445 * We need somewhere to put the embedded details, so do that first.
446 */
447 char *ptr;
448
449 DEBUGASSERT(login);
450
451 *offset = 0;
452 ptr = memchr(login, '@', len);
453 if(!ptr)
454 goto out;
455
456 /* We will now try to extract the
457 * possible login information in a string like:
458 * ftp://user:password@ftp.my.site:8021/README */
459 ptr++;
460
461 /* if this is a known scheme, get some details */
462 if(u->scheme)
463 h = Curl_get_scheme_handler(u->scheme);
464
465 /* We could use the login information in the URL so extract it. Only parse
466 options if the handler says we should. Note that 'h' might be NULL! */
467 ccode = Curl_parse_login_details(login, ptr - login - 1,
468 &userp, &passwdp,
469 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
470 &optionsp:NULL);
471 if(ccode) {
472 result = CURLUE_BAD_LOGIN;
473 goto out;
474 }
475
476 if(userp) {
477 if(flags & CURLU_DISALLOW_USER) {
478 /* Option DISALLOW_USER is set and url contains username. */
479 result = CURLUE_USER_NOT_ALLOWED;
480 goto out;
481 }
482 free(u->user);
483 u->user = userp;
484 }
485
486 if(passwdp) {
487 free(u->password);
488 u->password = passwdp;
489 }
490
491 if(optionsp) {
492 free(u->options);
493 u->options = optionsp;
494 }
495
496 /* the host name starts at this offset */
497 *offset = ptr - login;
498 return CURLUE_OK;
499
500 out:
501
502 free(userp);
503 free(passwdp);
504 free(optionsp);
505 u->user = NULL;
506 u->password = NULL;
507 u->options = NULL;
508
509 return result;
510 }
511
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)512 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
513 bool has_scheme)
514 {
515 char *portptr;
516 char *hostname = Curl_dyn_ptr(host);
517 /*
518 * Find the end of an IPv6 address on the ']' ending bracket.
519 */
520 if(hostname[0] == '[') {
521 portptr = strchr(hostname, ']');
522 if(!portptr)
523 return CURLUE_BAD_IPV6;
524 portptr++;
525 /* this is a RFC2732-style specified IP-address */
526 if(*portptr) {
527 if(*portptr != ':')
528 return CURLUE_BAD_PORT_NUMBER;
529 }
530 else
531 portptr = NULL;
532 }
533 else
534 portptr = strchr(hostname, ':');
535
536 if(portptr) {
537 char *rest = NULL;
538 unsigned long port;
539 size_t keep = portptr - hostname;
540
541 /* Browser behavior adaptation. If there's a colon with no digits after,
542 just cut off the name there which makes us ignore the colon and just
543 use the default port. Firefox, Chrome and Safari all do that.
544
545 Don't do it if the URL has no scheme, to make something that looks like
546 a scheme not work!
547 */
548 Curl_dyn_setlen(host, keep);
549 portptr++;
550 if(!*portptr)
551 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552
553 if(!ISDIGIT(*portptr))
554 return CURLUE_BAD_PORT_NUMBER;
555
556 errno = 0;
557 port = strtoul(portptr, &rest, 10); /* Port number must be decimal */
558
559 if(errno || (port > 0xffff) || *rest)
560 return CURLUE_BAD_PORT_NUMBER;
561
562 u->portnum = (unsigned short) port;
563 /* generate a new port number string to get rid of leading zeroes etc */
564 free(u->port);
565 u->port = aprintf("%ld", port);
566 if(!u->port)
567 return CURLUE_OUT_OF_MEMORY;
568 }
569
570 return CURLUE_OK;
571 }
572
573 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)574 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
575 size_t hlen) /* length of hostname */
576 {
577 size_t len;
578 DEBUGASSERT(*hostname == '[');
579 if(hlen < 4) /* '[::]' is the shortest possible valid string */
580 return CURLUE_BAD_IPV6;
581 hostname++;
582 hlen -= 2;
583
584 /* only valid IPv6 letters are ok */
585 len = strspn(hostname, "0123456789abcdefABCDEF:.");
586
587 if(hlen != len) {
588 hlen = len;
589 if(hostname[len] == '%') {
590 /* this could now be '%[zone id]' */
591 char zoneid[16];
592 int i = 0;
593 char *h = &hostname[len + 1];
594 /* pass '25' if present and is a url encoded percent sign */
595 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
596 h += 2;
597 while(*h && (*h != ']') && (i < 15))
598 zoneid[i++] = *h++;
599 if(!i || (']' != *h))
600 return CURLUE_BAD_IPV6;
601 zoneid[i] = 0;
602 u->zoneid = strdup(zoneid);
603 if(!u->zoneid)
604 return CURLUE_OUT_OF_MEMORY;
605 hostname[len] = ']'; /* insert end bracket */
606 hostname[len + 1] = 0; /* terminate the hostname */
607 }
608 else
609 return CURLUE_BAD_IPV6;
610 /* hostname is fine */
611 }
612
613 /* Check the IPv6 address. */
614 {
615 char dest[16]; /* fits a binary IPv6 address */
616 char norm[MAX_IPADR_LEN];
617 hostname[hlen] = 0; /* end the address there */
618 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
619 return CURLUE_BAD_IPV6;
620
621 /* check if it can be done shorter */
622 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
623 (strlen(norm) < hlen)) {
624 strcpy(hostname, norm);
625 hlen = strlen(norm);
626 hostname[hlen + 1] = 0;
627 }
628 hostname[hlen] = ']'; /* restore ending bracket */
629 }
630 return CURLUE_OK;
631 }
632
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)633 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
634 size_t hlen) /* length of hostname */
635 {
636 size_t len;
637 DEBUGASSERT(hostname);
638
639 if(!hlen)
640 return CURLUE_NO_HOST;
641 else if(hostname[0] == '[')
642 return ipv6_parse(u, hostname, hlen);
643 else {
644 /* letters from the second string are not ok */
645 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
646 if(hlen != len)
647 /* hostname with bad content */
648 return CURLUE_BAD_HOSTNAME;
649 }
650 return CURLUE_OK;
651 }
652
653 /*
654 * Handle partial IPv4 numerical addresses and different bases, like
655 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
656 *
657 * If the given input string is syntactically wrong IPv4 or any part for
658 * example is too big, this function returns HOST_NAME.
659 *
660 * Output the "normalized" version of that input string in plain quad decimal
661 * integers.
662 *
663 * Returns the host type.
664 */
665
666 #define HOST_ERROR -1 /* out of memory */
667 #define HOST_BAD -2 /* bad IPv4 address */
668
669 #define HOST_NAME 1
670 #define HOST_IPV4 2
671 #define HOST_IPV6 3
672
ipv4_normalize(struct dynbuf * host)673 static int ipv4_normalize(struct dynbuf *host)
674 {
675 bool done = FALSE;
676 int n = 0;
677 const char *c = Curl_dyn_ptr(host);
678 unsigned long parts[4] = {0, 0, 0, 0};
679 CURLcode result = CURLE_OK;
680
681 if(*c == '[')
682 return HOST_IPV6;
683
684 errno = 0; /* for strtoul */
685 while(!done) {
686 char *endp = NULL;
687 unsigned long l;
688 if(!ISDIGIT(*c))
689 /* most importantly this doesn't allow a leading plus or minus */
690 return HOST_NAME;
691 l = strtoul(c, &endp, 0);
692 if(errno)
693 return HOST_NAME;
694 #if SIZEOF_LONG > 4
695 /* a value larger than 32 bits */
696 if(l > UINT_MAX)
697 return HOST_NAME;
698 #endif
699
700 parts[n] = l;
701 c = endp;
702
703 switch(*c) {
704 case '.':
705 if(n == 3)
706 return HOST_NAME;
707 n++;
708 c++;
709 break;
710
711 case '\0':
712 done = TRUE;
713 break;
714
715 default:
716 return HOST_NAME;
717 }
718 }
719
720 switch(n) {
721 case 0: /* a -- 32 bits */
722 Curl_dyn_reset(host);
723
724 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
725 (unsigned int)(parts[0] >> 24),
726 (unsigned int)((parts[0] >> 16) & 0xff),
727 (unsigned int)((parts[0] >> 8) & 0xff),
728 (unsigned int)(parts[0] & 0xff));
729 break;
730 case 1: /* a.b -- 8.24 bits */
731 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
732 return HOST_NAME;
733 Curl_dyn_reset(host);
734 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
735 (unsigned int)(parts[0]),
736 (unsigned int)((parts[1] >> 16) & 0xff),
737 (unsigned int)((parts[1] >> 8) & 0xff),
738 (unsigned int)(parts[1] & 0xff));
739 break;
740 case 2: /* a.b.c -- 8.8.16 bits */
741 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
742 return HOST_NAME;
743 Curl_dyn_reset(host);
744 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
745 (unsigned int)(parts[0]),
746 (unsigned int)(parts[1]),
747 (unsigned int)((parts[2] >> 8) & 0xff),
748 (unsigned int)(parts[2] & 0xff));
749 break;
750 case 3: /* a.b.c.d -- 8.8.8.8 bits */
751 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
752 (parts[3] > 0xff))
753 return HOST_NAME;
754 Curl_dyn_reset(host);
755 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
756 (unsigned int)(parts[0]),
757 (unsigned int)(parts[1]),
758 (unsigned int)(parts[2]),
759 (unsigned int)(parts[3]));
760 break;
761 }
762 if(result)
763 return HOST_ERROR;
764 return HOST_IPV4;
765 }
766
767 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)768 static CURLUcode urldecode_host(struct dynbuf *host)
769 {
770 char *per = NULL;
771 const char *hostname = Curl_dyn_ptr(host);
772 per = strchr(hostname, '%');
773 if(!per)
774 /* nothing to decode */
775 return CURLUE_OK;
776 else {
777 /* encoded */
778 size_t dlen;
779 char *decoded;
780 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
781 REJECT_CTRL);
782 if(result)
783 return CURLUE_BAD_HOSTNAME;
784 Curl_dyn_reset(host);
785 result = Curl_dyn_addn(host, decoded, dlen);
786 free(decoded);
787 if(result)
788 return cc2cu(result);
789 }
790
791 return CURLUE_OK;
792 }
793
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)794 static CURLUcode parse_authority(struct Curl_URL *u,
795 const char *auth, size_t authlen,
796 unsigned int flags,
797 struct dynbuf *host,
798 bool has_scheme)
799 {
800 size_t offset;
801 CURLUcode uc;
802 CURLcode result;
803
804 /*
805 * Parse the login details and strip them out of the host name.
806 */
807 uc = parse_hostname_login(u, auth, authlen, flags, &offset);
808 if(uc)
809 goto out;
810
811 result = Curl_dyn_addn(host, auth + offset, authlen - offset);
812 if(result) {
813 uc = cc2cu(result);
814 goto out;
815 }
816
817 uc = Curl_parse_port(u, host, has_scheme);
818 if(uc)
819 goto out;
820
821 if(!Curl_dyn_len(host))
822 return CURLUE_NO_HOST;
823
824 switch(ipv4_normalize(host)) {
825 case HOST_IPV4:
826 break;
827 case HOST_IPV6:
828 uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
829 break;
830 case HOST_NAME:
831 uc = urldecode_host(host);
832 if(!uc)
833 uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
834 break;
835 case HOST_ERROR:
836 uc = CURLUE_OUT_OF_MEMORY;
837 break;
838 case HOST_BAD:
839 default:
840 uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
841 break;
842 }
843
844 out:
845 return uc;
846 }
847
848 /* used for HTTP/2 server push */
Curl_url_set_authority(CURLU * u,const char * authority)849 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
850 {
851 CURLUcode result;
852 struct dynbuf host;
853
854 DEBUGASSERT(authority);
855 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
856
857 result = parse_authority(u, authority, strlen(authority),
858 CURLU_DISALLOW_USER, &host, !!u->scheme);
859 if(result)
860 Curl_dyn_free(&host);
861 else {
862 free(u->host);
863 u->host = Curl_dyn_ptr(&host);
864 }
865 return result;
866 }
867
868 /*
869 * "Remove Dot Segments"
870 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
871 */
872
873 /*
874 * dedotdotify()
875 * @unittest: 1395
876 *
877 * This function gets a null-terminated path with dot and dotdot sequences
878 * passed in and strips them off according to the rules in RFC 3986 section
879 * 5.2.4.
880 *
881 * The function handles a query part ('?' + stuff) appended but it expects
882 * that fragments ('#' + stuff) have already been cut off.
883 *
884 * RETURNS
885 *
886 * Zero for success and 'out' set to an allocated dedotdotified string.
887 */
888 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)889 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
890 {
891 char *outptr;
892 const char *endp = &input[clen];
893 char *out;
894
895 *outp = NULL;
896 /* the path always starts with a slash, and a slash has not dot */
897 if((clen < 2) || !memchr(input, '.', clen))
898 return 0;
899
900 out = malloc(clen + 1);
901 if(!out)
902 return 1; /* out of memory */
903
904 *out = 0; /* null-terminates, for inputs like "./" */
905 outptr = out;
906
907 do {
908 bool dotdot = TRUE;
909 if(*input == '.') {
910 /* A. If the input buffer begins with a prefix of "../" or "./", then
911 remove that prefix from the input buffer; otherwise, */
912
913 if(!strncmp("./", input, 2)) {
914 input += 2;
915 clen -= 2;
916 }
917 else if(!strncmp("../", input, 3)) {
918 input += 3;
919 clen -= 3;
920 }
921 /* D. if the input buffer consists only of "." or "..", then remove
922 that from the input buffer; otherwise, */
923
924 else if(!strcmp(".", input) || !strcmp("..", input) ||
925 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
926 *out = 0;
927 break;
928 }
929 else
930 dotdot = FALSE;
931 }
932 else if(*input == '/') {
933 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
934 "." is a complete path segment, then replace that prefix with "/" in
935 the input buffer; otherwise, */
936 if(!strncmp("/./", input, 3)) {
937 input += 2;
938 clen -= 2;
939 }
940 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
941 *outptr++ = '/';
942 *outptr = 0;
943 break;
944 }
945
946 /* C. if the input buffer begins with a prefix of "/../" or "/..",
947 where ".." is a complete path segment, then replace that prefix with
948 "/" in the input buffer and remove the last segment and its
949 preceding "/" (if any) from the output buffer; otherwise, */
950
951 else if(!strncmp("/../", input, 4)) {
952 input += 3;
953 clen -= 3;
954 /* remove the last segment from the output buffer */
955 while(outptr > out) {
956 outptr--;
957 if(*outptr == '/')
958 break;
959 }
960 *outptr = 0; /* null-terminate where it stops */
961 }
962 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
963 /* remove the last segment from the output buffer */
964 while(outptr > out) {
965 outptr--;
966 if(*outptr == '/')
967 break;
968 }
969 *outptr++ = '/';
970 *outptr = 0; /* null-terminate where it stops */
971 break;
972 }
973 else
974 dotdot = FALSE;
975 }
976 else
977 dotdot = FALSE;
978
979 if(!dotdot) {
980 /* E. move the first path segment in the input buffer to the end of
981 the output buffer, including the initial "/" character (if any) and
982 any subsequent characters up to, but not including, the next "/"
983 character or the end of the input buffer. */
984
985 do {
986 *outptr++ = *input++;
987 clen--;
988 } while(*input && (*input != '/') && (*input != '?'));
989 *outptr = 0;
990 }
991
992 /* continue until end of path */
993 } while(input < endp);
994
995 *outp = out;
996 return 0; /* success */
997 }
998
parseurl(const char * url,CURLU * u,unsigned int flags)999 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1000 {
1001 const char *path;
1002 size_t pathlen;
1003 char *query = NULL;
1004 char *fragment = NULL;
1005 char schemebuf[MAX_SCHEME_LEN + 1];
1006 size_t schemelen = 0;
1007 size_t urllen;
1008 CURLUcode result = CURLUE_OK;
1009 size_t fraglen = 0;
1010 struct dynbuf host;
1011
1012 DEBUGASSERT(url);
1013
1014 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1015
1016 result = junkscan(url, &urllen, flags);
1017 if(result)
1018 goto fail;
1019
1020 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1021 flags & (CURLU_GUESS_SCHEME|
1022 CURLU_DEFAULT_SCHEME));
1023
1024 /* handle the file: scheme */
1025 if(schemelen && !strcmp(schemebuf, "file")) {
1026 bool uncpath = FALSE;
1027 if(urllen <= 6) {
1028 /* file:/ is not enough to actually be a complete file: URL */
1029 result = CURLUE_BAD_FILE_URL;
1030 goto fail;
1031 }
1032
1033 /* path has been allocated large enough to hold this */
1034 path = (char *)&url[5];
1035 pathlen = urllen - 5;
1036
1037 u->scheme = strdup("file");
1038 if(!u->scheme) {
1039 result = CURLUE_OUT_OF_MEMORY;
1040 goto fail;
1041 }
1042
1043 /* Extra handling URLs with an authority component (i.e. that start with
1044 * "file://")
1045 *
1046 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1047 * RFC 8089, but not the (current) WHAT-WG URL spec.
1048 */
1049 if(path[0] == '/' && path[1] == '/') {
1050 /* swallow the two slashes */
1051 const char *ptr = &path[2];
1052
1053 /*
1054 * According to RFC 8089, a file: URL can be reliably dereferenced if:
1055 *
1056 * o it has no/blank hostname, or
1057 *
1058 * o the hostname matches "localhost" (case-insensitively), or
1059 *
1060 * o the hostname is a FQDN that resolves to this machine, or
1061 *
1062 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
1063 * Appendix E.3).
1064 *
1065 * For brevity, we only consider URLs with empty, "localhost", or
1066 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1067 *
1068 * Additionally, there is an exception for URLs with a Windows drive
1069 * letter in the authority (which was accidentally omitted from RFC 8089
1070 * Appendix E, but believe me, it was meant to be there. --MK)
1071 */
1072 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1073 /* the URL includes a host name, it must match "localhost" or
1074 "127.0.0.1" to be valid */
1075 if(checkprefix("localhost/", ptr) ||
1076 checkprefix("127.0.0.1/", ptr)) {
1077 ptr += 9; /* now points to the slash after the host */
1078 }
1079 else {
1080 #if defined(_WIN32)
1081 size_t len;
1082
1083 /* the host name, NetBIOS computer name, can not contain disallowed
1084 chars, and the delimiting slash character must be appended to the
1085 host name */
1086 path = strpbrk(ptr, "/\\:*?\"<>|");
1087 if(!path || *path != '/') {
1088 result = CURLUE_BAD_FILE_URL;
1089 goto fail;
1090 }
1091
1092 len = path - ptr;
1093 if(len) {
1094 CURLcode code = Curl_dyn_addn(&host, ptr, len);
1095 if(code) {
1096 result = cc2cu(code);
1097 goto fail;
1098 }
1099 uncpath = TRUE;
1100 }
1101
1102 ptr -= 2; /* now points to the // before the host in UNC */
1103 #else
1104 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1105 none */
1106 result = CURLUE_BAD_FILE_URL;
1107 goto fail;
1108 #endif
1109 }
1110 }
1111
1112 path = ptr;
1113 pathlen = urllen - (ptr - url);
1114 }
1115
1116 if(!uncpath)
1117 /* no host for file: URLs by default */
1118 Curl_dyn_reset(&host);
1119
1120 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1121 /* Don't allow Windows drive letters when not in Windows.
1122 * This catches both "file:/c:" and "file:c:" */
1123 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1124 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1125 /* File drive letters are only accepted in MSDOS/Windows */
1126 result = CURLUE_BAD_FILE_URL;
1127 goto fail;
1128 }
1129 #else
1130 /* If the path starts with a slash and a drive letter, ditch the slash */
1131 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1132 /* This cannot be done with strcpy, as the memory chunks overlap! */
1133 path++;
1134 pathlen--;
1135 }
1136 #endif
1137
1138 }
1139 else {
1140 /* clear path */
1141 const char *schemep = NULL;
1142 const char *hostp;
1143 size_t hostlen;
1144
1145 if(schemelen) {
1146 int i = 0;
1147 const char *p = &url[schemelen + 1];
1148 while((*p == '/') && (i < 4)) {
1149 p++;
1150 i++;
1151 }
1152
1153 schemep = schemebuf;
1154 if(!Curl_get_scheme_handler(schemep) &&
1155 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1156 result = CURLUE_UNSUPPORTED_SCHEME;
1157 goto fail;
1158 }
1159
1160 if((i < 1) || (i > 3)) {
1161 /* less than one or more than three slashes */
1162 result = CURLUE_BAD_SLASHES;
1163 goto fail;
1164 }
1165 hostp = p; /* host name starts here */
1166 }
1167 else {
1168 /* no scheme! */
1169
1170 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1171 result = CURLUE_BAD_SCHEME;
1172 goto fail;
1173 }
1174 if(flags & CURLU_DEFAULT_SCHEME)
1175 schemep = DEFAULT_SCHEME;
1176
1177 /*
1178 * The URL was badly formatted, let's try without scheme specified.
1179 */
1180 hostp = url;
1181 }
1182
1183 if(schemep) {
1184 u->scheme = strdup(schemep);
1185 if(!u->scheme) {
1186 result = CURLUE_OUT_OF_MEMORY;
1187 goto fail;
1188 }
1189 }
1190
1191 /* find the end of the host name + port number */
1192 hostlen = strcspn(hostp, "/?#");
1193 path = &hostp[hostlen];
1194
1195 /* this pathlen also contains the query and the fragment */
1196 pathlen = urllen - (path - url);
1197 if(hostlen) {
1198
1199 result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1200 if(result)
1201 goto fail;
1202
1203 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1204 const char *hostname = Curl_dyn_ptr(&host);
1205 /* legacy curl-style guess based on host name */
1206 if(checkprefix("ftp.", hostname))
1207 schemep = "ftp";
1208 else if(checkprefix("dict.", hostname))
1209 schemep = "dict";
1210 else if(checkprefix("ldap.", hostname))
1211 schemep = "ldap";
1212 else if(checkprefix("imap.", hostname))
1213 schemep = "imap";
1214 else if(checkprefix("smtp.", hostname))
1215 schemep = "smtp";
1216 else if(checkprefix("pop3.", hostname))
1217 schemep = "pop3";
1218 else
1219 schemep = "http";
1220
1221 u->scheme = strdup(schemep);
1222 if(!u->scheme) {
1223 result = CURLUE_OUT_OF_MEMORY;
1224 goto fail;
1225 }
1226 }
1227 }
1228 else if(flags & CURLU_NO_AUTHORITY) {
1229 /* allowed to be empty. */
1230 if(Curl_dyn_add(&host, "")) {
1231 result = CURLUE_OUT_OF_MEMORY;
1232 goto fail;
1233 }
1234 }
1235 else {
1236 result = CURLUE_NO_HOST;
1237 goto fail;
1238 }
1239 }
1240
1241 fragment = strchr(path, '#');
1242 if(fragment) {
1243 fraglen = pathlen - (fragment - path);
1244 u->fragment_present = TRUE;
1245 if(fraglen > 1) {
1246 /* skip the leading '#' in the copy but include the terminating null */
1247 if(flags & CURLU_URLENCODE) {
1248 struct dynbuf enc;
1249 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250 result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251 if(result)
1252 goto fail;
1253 u->fragment = Curl_dyn_ptr(&enc);
1254 }
1255 else {
1256 u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257 if(!u->fragment) {
1258 result = CURLUE_OUT_OF_MEMORY;
1259 goto fail;
1260 }
1261 }
1262 }
1263 /* after this, pathlen still contains the query */
1264 pathlen -= fraglen;
1265 }
1266
1267 query = memchr(path, '?', pathlen);
1268 if(query) {
1269 size_t qlen = fragment ? (size_t)(fragment - query) :
1270 pathlen - (query - path);
1271 pathlen -= qlen;
1272 u->query_present = TRUE;
1273 if(qlen > 1) {
1274 if(flags & CURLU_URLENCODE) {
1275 struct dynbuf enc;
1276 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1277 /* skip the leading question mark */
1278 result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1279 if(result)
1280 goto fail;
1281 u->query = Curl_dyn_ptr(&enc);
1282 }
1283 else {
1284 u->query = Curl_memdup0(query + 1, qlen - 1);
1285 if(!u->query) {
1286 result = CURLUE_OUT_OF_MEMORY;
1287 goto fail;
1288 }
1289 }
1290 }
1291 else {
1292 /* single byte query */
1293 u->query = strdup("");
1294 if(!u->query) {
1295 result = CURLUE_OUT_OF_MEMORY;
1296 goto fail;
1297 }
1298 }
1299 }
1300
1301 if(pathlen && (flags & CURLU_URLENCODE)) {
1302 struct dynbuf enc;
1303 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1304 result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1305 if(result)
1306 goto fail;
1307 pathlen = Curl_dyn_len(&enc);
1308 path = u->path = Curl_dyn_ptr(&enc);
1309 }
1310
1311 if(pathlen <= 1) {
1312 /* there is no path left or just the slash, unset */
1313 path = NULL;
1314 }
1315 else {
1316 if(!u->path) {
1317 u->path = Curl_memdup0(path, pathlen);
1318 if(!u->path) {
1319 result = CURLUE_OUT_OF_MEMORY;
1320 goto fail;
1321 }
1322 path = u->path;
1323 }
1324 else if(flags & CURLU_URLENCODE)
1325 /* it might have encoded more than just the path so cut it */
1326 u->path[pathlen] = 0;
1327
1328 if(!(flags & CURLU_PATH_AS_IS)) {
1329 /* remove ../ and ./ sequences according to RFC3986 */
1330 char *dedot;
1331 int err = dedotdotify((char *)path, pathlen, &dedot);
1332 if(err) {
1333 result = CURLUE_OUT_OF_MEMORY;
1334 goto fail;
1335 }
1336 if(dedot) {
1337 free(u->path);
1338 u->path = dedot;
1339 }
1340 }
1341 }
1342
1343 u->host = Curl_dyn_ptr(&host);
1344
1345 return result;
1346 fail:
1347 Curl_dyn_free(&host);
1348 free_urlhandle(u);
1349 return result;
1350 }
1351
1352 /*
1353 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1354 */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1355 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1356 unsigned int flags)
1357 {
1358 CURLUcode result;
1359 CURLU tmpurl;
1360 memset(&tmpurl, 0, sizeof(tmpurl));
1361 result = parseurl(url, &tmpurl, flags);
1362 if(!result) {
1363 free_urlhandle(u);
1364 *u = tmpurl;
1365 }
1366 return result;
1367 }
1368
1369 /*
1370 */
curl_url(void)1371 CURLU *curl_url(void)
1372 {
1373 return calloc(1, sizeof(struct Curl_URL));
1374 }
1375
curl_url_cleanup(CURLU * u)1376 void curl_url_cleanup(CURLU *u)
1377 {
1378 if(u) {
1379 free_urlhandle(u);
1380 free(u);
1381 }
1382 }
1383
1384 #define DUP(dest, src, name) \
1385 do { \
1386 if(src->name) { \
1387 dest->name = strdup(src->name); \
1388 if(!dest->name) \
1389 goto fail; \
1390 } \
1391 } while(0)
1392
curl_url_dup(const CURLU * in)1393 CURLU *curl_url_dup(const CURLU *in)
1394 {
1395 struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1396 if(u) {
1397 DUP(u, in, scheme);
1398 DUP(u, in, user);
1399 DUP(u, in, password);
1400 DUP(u, in, options);
1401 DUP(u, in, host);
1402 DUP(u, in, port);
1403 DUP(u, in, path);
1404 DUP(u, in, query);
1405 DUP(u, in, fragment);
1406 DUP(u, in, zoneid);
1407 u->portnum = in->portnum;
1408 u->fragment_present = in->fragment_present;
1409 u->query_present = in->query_present;
1410 }
1411 return u;
1412 fail:
1413 curl_url_cleanup(u);
1414 return NULL;
1415 }
1416
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1417 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1418 char **part, unsigned int flags)
1419 {
1420 const char *ptr;
1421 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1422 char portbuf[7];
1423 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1424 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1425 bool punycode = FALSE;
1426 bool depunyfy = FALSE;
1427 bool plusdecode = FALSE;
1428 (void)flags;
1429 if(!u)
1430 return CURLUE_BAD_HANDLE;
1431 if(!part)
1432 return CURLUE_BAD_PARTPOINTER;
1433 *part = NULL;
1434
1435 switch(what) {
1436 case CURLUPART_SCHEME:
1437 ptr = u->scheme;
1438 ifmissing = CURLUE_NO_SCHEME;
1439 urldecode = FALSE; /* never for schemes */
1440 break;
1441 case CURLUPART_USER:
1442 ptr = u->user;
1443 ifmissing = CURLUE_NO_USER;
1444 break;
1445 case CURLUPART_PASSWORD:
1446 ptr = u->password;
1447 ifmissing = CURLUE_NO_PASSWORD;
1448 break;
1449 case CURLUPART_OPTIONS:
1450 ptr = u->options;
1451 ifmissing = CURLUE_NO_OPTIONS;
1452 break;
1453 case CURLUPART_HOST:
1454 ptr = u->host;
1455 ifmissing = CURLUE_NO_HOST;
1456 punycode = (flags & CURLU_PUNYCODE)?1:0;
1457 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1458 break;
1459 case CURLUPART_ZONEID:
1460 ptr = u->zoneid;
1461 ifmissing = CURLUE_NO_ZONEID;
1462 break;
1463 case CURLUPART_PORT:
1464 ptr = u->port;
1465 ifmissing = CURLUE_NO_PORT;
1466 urldecode = FALSE; /* never for port */
1467 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1468 /* there's no stored port number, but asked to deliver
1469 a default one for the scheme */
1470 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1471 if(h) {
1472 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1473 ptr = portbuf;
1474 }
1475 }
1476 else if(ptr && u->scheme) {
1477 /* there is a stored port number, but ask to inhibit if
1478 it matches the default one for the scheme */
1479 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1480 if(h && (h->defport == u->portnum) &&
1481 (flags & CURLU_NO_DEFAULT_PORT))
1482 ptr = NULL;
1483 }
1484 break;
1485 case CURLUPART_PATH:
1486 ptr = u->path;
1487 if(!ptr)
1488 ptr = "/";
1489 break;
1490 case CURLUPART_QUERY:
1491 ptr = u->query;
1492 ifmissing = CURLUE_NO_QUERY;
1493 plusdecode = urldecode;
1494 if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1495 /* there was a blank query and the user do not ask for it */
1496 ptr = NULL;
1497 break;
1498 case CURLUPART_FRAGMENT:
1499 ptr = u->fragment;
1500 ifmissing = CURLUE_NO_FRAGMENT;
1501 if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1502 /* there was a blank fragment and the user asks for it */
1503 ptr = "";
1504 break;
1505 case CURLUPART_URL: {
1506 char *url;
1507 char *scheme;
1508 char *options = u->options;
1509 char *port = u->port;
1510 char *allochost = NULL;
1511 bool show_fragment =
1512 u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1513 bool show_query =
1514 (u->query && u->query[0]) ||
1515 (u->query_present && flags & CURLU_GET_EMPTY);
1516 punycode = (flags & CURLU_PUNYCODE)?1:0;
1517 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1518 if(u->scheme && strcasecompare("file", u->scheme)) {
1519 url = aprintf("file://%s%s%s",
1520 u->path,
1521 show_fragment ? "#": "",
1522 u->fragment ? u->fragment : "");
1523 }
1524 else if(!u->host)
1525 return CURLUE_NO_HOST;
1526 else {
1527 const struct Curl_handler *h = NULL;
1528 if(u->scheme)
1529 scheme = u->scheme;
1530 else if(flags & CURLU_DEFAULT_SCHEME)
1531 scheme = (char *) DEFAULT_SCHEME;
1532 else
1533 return CURLUE_NO_SCHEME;
1534
1535 h = Curl_get_scheme_handler(scheme);
1536 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1537 /* there's no stored port number, but asked to deliver
1538 a default one for the scheme */
1539 if(h) {
1540 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1541 port = portbuf;
1542 }
1543 }
1544 else if(port) {
1545 /* there is a stored port number, but asked to inhibit if it matches
1546 the default one for the scheme */
1547 if(h && (h->defport == u->portnum) &&
1548 (flags & CURLU_NO_DEFAULT_PORT))
1549 port = NULL;
1550 }
1551
1552 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1553 options = NULL;
1554
1555 if(u->host[0] == '[') {
1556 if(u->zoneid) {
1557 /* make it '[ host %25 zoneid ]' */
1558 struct dynbuf enc;
1559 size_t hostlen = strlen(u->host);
1560 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1561 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1562 u->zoneid))
1563 return CURLUE_OUT_OF_MEMORY;
1564 allochost = Curl_dyn_ptr(&enc);
1565 }
1566 }
1567 else if(urlencode) {
1568 allochost = curl_easy_escape(NULL, u->host, 0);
1569 if(!allochost)
1570 return CURLUE_OUT_OF_MEMORY;
1571 }
1572 else if(punycode) {
1573 if(!Curl_is_ASCII_name(u->host)) {
1574 #ifndef USE_IDN
1575 return CURLUE_LACKS_IDN;
1576 #else
1577 CURLcode result = Curl_idn_decode(u->host, &allochost);
1578 if(result)
1579 return (result == CURLE_OUT_OF_MEMORY) ?
1580 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1581 #endif
1582 }
1583 }
1584 else if(depunyfy) {
1585 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1586 #ifndef USE_IDN
1587 return CURLUE_LACKS_IDN;
1588 #else
1589 CURLcode result = Curl_idn_encode(u->host, &allochost);
1590 if(result)
1591 /* this is the most likely error */
1592 return (result == CURLE_OUT_OF_MEMORY) ?
1593 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1594 #endif
1595 }
1596 }
1597
1598 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1599 scheme,
1600 u->user ? u->user : "",
1601 u->password ? ":": "",
1602 u->password ? u->password : "",
1603 options ? ";" : "",
1604 options ? options : "",
1605 (u->user || u->password || options) ? "@": "",
1606 allochost ? allochost : u->host,
1607 port ? ":": "",
1608 port ? port : "",
1609 u->path ? u->path : "/",
1610 show_query ? "?": "",
1611 u->query ? u->query : "",
1612 show_fragment ? "#": "",
1613 u->fragment? u->fragment : "");
1614 free(allochost);
1615 }
1616 if(!url)
1617 return CURLUE_OUT_OF_MEMORY;
1618 *part = url;
1619 return CURLUE_OK;
1620 }
1621 default:
1622 ptr = NULL;
1623 break;
1624 }
1625 if(ptr) {
1626 size_t partlen = strlen(ptr);
1627 size_t i = 0;
1628 *part = Curl_memdup0(ptr, partlen);
1629 if(!*part)
1630 return CURLUE_OUT_OF_MEMORY;
1631 if(plusdecode) {
1632 /* convert + to space */
1633 char *plus = *part;
1634 for(i = 0; i < partlen; ++plus, i++) {
1635 if(*plus == '+')
1636 *plus = ' ';
1637 }
1638 }
1639 if(urldecode) {
1640 char *decoded;
1641 size_t dlen;
1642 /* this unconditional rejection of control bytes is documented
1643 API behavior */
1644 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1645 free(*part);
1646 if(res) {
1647 *part = NULL;
1648 return CURLUE_URLDECODE;
1649 }
1650 *part = decoded;
1651 partlen = dlen;
1652 }
1653 if(urlencode) {
1654 struct dynbuf enc;
1655 CURLUcode uc;
1656 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1657 uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1658 if(uc)
1659 return uc;
1660 free(*part);
1661 *part = Curl_dyn_ptr(&enc);
1662 }
1663 else if(punycode) {
1664 if(!Curl_is_ASCII_name(u->host)) {
1665 #ifndef USE_IDN
1666 return CURLUE_LACKS_IDN;
1667 #else
1668 char *allochost;
1669 CURLcode result = Curl_idn_decode(*part, &allochost);
1670 if(result)
1671 return (result == CURLE_OUT_OF_MEMORY) ?
1672 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1673 free(*part);
1674 *part = allochost;
1675 #endif
1676 }
1677 }
1678 else if(depunyfy) {
1679 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1680 #ifndef USE_IDN
1681 return CURLUE_LACKS_IDN;
1682 #else
1683 char *allochost;
1684 CURLcode result = Curl_idn_encode(*part, &allochost);
1685 if(result)
1686 return (result == CURLE_OUT_OF_MEMORY) ?
1687 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1688 free(*part);
1689 *part = allochost;
1690 #endif
1691 }
1692 }
1693
1694 return CURLUE_OK;
1695 }
1696 else
1697 return ifmissing;
1698 }
1699
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1700 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1701 const char *part, unsigned int flags)
1702 {
1703 char **storep = NULL;
1704 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1705 bool plusencode = FALSE;
1706 bool urlskipslash = FALSE;
1707 bool leadingslash = FALSE;
1708 bool appendquery = FALSE;
1709 bool equalsencode = FALSE;
1710 size_t nalloc;
1711
1712 if(!u)
1713 return CURLUE_BAD_HANDLE;
1714 if(!part) {
1715 /* setting a part to NULL clears it */
1716 switch(what) {
1717 case CURLUPART_URL:
1718 break;
1719 case CURLUPART_SCHEME:
1720 storep = &u->scheme;
1721 break;
1722 case CURLUPART_USER:
1723 storep = &u->user;
1724 break;
1725 case CURLUPART_PASSWORD:
1726 storep = &u->password;
1727 break;
1728 case CURLUPART_OPTIONS:
1729 storep = &u->options;
1730 break;
1731 case CURLUPART_HOST:
1732 storep = &u->host;
1733 break;
1734 case CURLUPART_ZONEID:
1735 storep = &u->zoneid;
1736 break;
1737 case CURLUPART_PORT:
1738 u->portnum = 0;
1739 storep = &u->port;
1740 break;
1741 case CURLUPART_PATH:
1742 storep = &u->path;
1743 break;
1744 case CURLUPART_QUERY:
1745 storep = &u->query;
1746 u->query_present = FALSE;
1747 break;
1748 case CURLUPART_FRAGMENT:
1749 storep = &u->fragment;
1750 u->fragment_present = FALSE;
1751 break;
1752 default:
1753 return CURLUE_UNKNOWN_PART;
1754 }
1755 if(storep && *storep) {
1756 Curl_safefree(*storep);
1757 }
1758 else if(!storep) {
1759 free_urlhandle(u);
1760 memset(u, 0, sizeof(struct Curl_URL));
1761 }
1762 return CURLUE_OK;
1763 }
1764
1765 nalloc = strlen(part);
1766 if(nalloc > CURL_MAX_INPUT_LENGTH)
1767 /* excessive input length */
1768 return CURLUE_MALFORMED_INPUT;
1769
1770 switch(what) {
1771 case CURLUPART_SCHEME: {
1772 size_t plen = strlen(part);
1773 const char *s = part;
1774 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1775 /* too long or too short */
1776 return CURLUE_BAD_SCHEME;
1777 /* verify that it is a fine scheme */
1778 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1779 return CURLUE_UNSUPPORTED_SCHEME;
1780 storep = &u->scheme;
1781 urlencode = FALSE; /* never */
1782 if(ISALPHA(*s)) {
1783 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1784 while(--plen) {
1785 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1786 s++; /* fine */
1787 else
1788 return CURLUE_BAD_SCHEME;
1789 }
1790 }
1791 else
1792 return CURLUE_BAD_SCHEME;
1793 break;
1794 }
1795 case CURLUPART_USER:
1796 storep = &u->user;
1797 break;
1798 case CURLUPART_PASSWORD:
1799 storep = &u->password;
1800 break;
1801 case CURLUPART_OPTIONS:
1802 storep = &u->options;
1803 break;
1804 case CURLUPART_HOST:
1805 storep = &u->host;
1806 Curl_safefree(u->zoneid);
1807 break;
1808 case CURLUPART_ZONEID:
1809 storep = &u->zoneid;
1810 break;
1811 case CURLUPART_PORT:
1812 if(!ISDIGIT(part[0]))
1813 /* not a number */
1814 return CURLUE_BAD_PORT_NUMBER;
1815 else {
1816 char *tmp;
1817 char *endp;
1818 unsigned long port;
1819 errno = 0;
1820 port = strtoul(part, &endp, 10); /* must be decimal */
1821 if(errno || (port > 0xffff) || *endp)
1822 /* weirdly provided number, not good! */
1823 return CURLUE_BAD_PORT_NUMBER;
1824 tmp = strdup(part);
1825 if(!tmp)
1826 return CURLUE_OUT_OF_MEMORY;
1827 free(u->port);
1828 u->port = tmp;
1829 u->portnum = (unsigned short)port;
1830 return CURLUE_OK;
1831 }
1832 case CURLUPART_PATH:
1833 urlskipslash = TRUE;
1834 leadingslash = TRUE; /* enforce */
1835 storep = &u->path;
1836 break;
1837 case CURLUPART_QUERY:
1838 plusencode = urlencode;
1839 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1840 equalsencode = appendquery;
1841 storep = &u->query;
1842 u->query_present = TRUE;
1843 break;
1844 case CURLUPART_FRAGMENT:
1845 storep = &u->fragment;
1846 u->fragment_present = TRUE;
1847 break;
1848 case CURLUPART_URL: {
1849 /*
1850 * Allow a new URL to replace the existing (if any) contents.
1851 *
1852 * If the existing contents is enough for a URL, allow a relative URL to
1853 * replace it.
1854 */
1855 CURLcode result;
1856 CURLUcode uc;
1857 char *oldurl;
1858 char *redired_url;
1859
1860 if(!nalloc)
1861 /* a blank URL is not a valid URL */
1862 return CURLUE_MALFORMED_INPUT;
1863
1864 /* if the new thing is absolute or the old one is not
1865 * (we could not get an absolute url in 'oldurl'),
1866 * then replace the existing with the new. */
1867 if(Curl_is_absolute_url(part, NULL, 0,
1868 flags & (CURLU_GUESS_SCHEME|
1869 CURLU_DEFAULT_SCHEME))
1870 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1871 return parseurl_and_replace(part, u, flags);
1872 }
1873
1874 /* apply the relative part to create a new URL
1875 * and replace the existing one with it. */
1876 result = concat_url(oldurl, part, &redired_url);
1877 free(oldurl);
1878 if(result)
1879 return cc2cu(result);
1880
1881 uc = parseurl_and_replace(redired_url, u, flags);
1882 free(redired_url);
1883 return uc;
1884 }
1885 default:
1886 return CURLUE_UNKNOWN_PART;
1887 }
1888 DEBUGASSERT(storep);
1889 {
1890 const char *newp;
1891 struct dynbuf enc;
1892 Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1893
1894 if(leadingslash && (part[0] != '/')) {
1895 CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1896 if(result)
1897 return cc2cu(result);
1898 }
1899 if(urlencode) {
1900 const unsigned char *i;
1901
1902 for(i = (const unsigned char *)part; *i; i++) {
1903 CURLcode result;
1904 if((*i == ' ') && plusencode) {
1905 result = Curl_dyn_addn(&enc, "+", 1);
1906 if(result)
1907 return CURLUE_OUT_OF_MEMORY;
1908 }
1909 else if(ISUNRESERVED(*i) ||
1910 ((*i == '/') && urlskipslash) ||
1911 ((*i == '=') && equalsencode)) {
1912 if((*i == '=') && equalsencode)
1913 /* only skip the first equals sign */
1914 equalsencode = FALSE;
1915 result = Curl_dyn_addn(&enc, i, 1);
1916 if(result)
1917 return cc2cu(result);
1918 }
1919 else {
1920 char out[3]={'%'};
1921 out[1] = hexdigits[*i>>4];
1922 out[2] = hexdigits[*i & 0xf];
1923 result = Curl_dyn_addn(&enc, out, 3);
1924 if(result)
1925 return cc2cu(result);
1926 }
1927 }
1928 }
1929 else {
1930 char *p;
1931 CURLcode result = Curl_dyn_add(&enc, part);
1932 if(result)
1933 return cc2cu(result);
1934 p = Curl_dyn_ptr(&enc);
1935 while(*p) {
1936 /* make sure percent encoded are lower case */
1937 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1938 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1939 p[1] = Curl_raw_tolower(p[1]);
1940 p[2] = Curl_raw_tolower(p[2]);
1941 p += 3;
1942 }
1943 else
1944 p++;
1945 }
1946 }
1947 newp = Curl_dyn_ptr(&enc);
1948
1949 if(appendquery && newp) {
1950 /* Append the 'newp' string onto the old query. Add a '&' separator if
1951 none is present at the end of the existing query already */
1952
1953 size_t querylen = u->query ? strlen(u->query) : 0;
1954 bool addamperand = querylen && (u->query[querylen -1] != '&');
1955 if(querylen) {
1956 struct dynbuf qbuf;
1957 Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1958
1959 if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1960 goto nomem;
1961
1962 if(addamperand) {
1963 if(Curl_dyn_addn(&qbuf, "&", 1))
1964 goto nomem;
1965 }
1966 if(Curl_dyn_add(&qbuf, newp))
1967 goto nomem;
1968 Curl_dyn_free(&enc);
1969 free(*storep);
1970 *storep = Curl_dyn_ptr(&qbuf);
1971 return CURLUE_OK;
1972 nomem:
1973 Curl_dyn_free(&enc);
1974 return CURLUE_OUT_OF_MEMORY;
1975 }
1976 }
1977
1978 else if(what == CURLUPART_HOST) {
1979 size_t n = Curl_dyn_len(&enc);
1980 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1981 /* Skip hostname check, it's allowed to be empty. */
1982 }
1983 else {
1984 if(!n || hostname_check(u, (char *)newp, n)) {
1985 Curl_dyn_free(&enc);
1986 return CURLUE_BAD_HOSTNAME;
1987 }
1988 }
1989 }
1990
1991 free(*storep);
1992 *storep = (char *)newp;
1993 }
1994 return CURLUE_OK;
1995 }
1996