1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 #include "curl_setup.h"
26
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 #include "curl_memrchr.h"
38
39 /* The last 3 #include files should be in this order */
40 #include "curl_printf.h"
41 #include "curl_memory.h"
42 #include "memdebug.h"
43
44 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45 #define STARTS_WITH_DRIVE_PREFIX(str) \
46 ((('a' <= str[0] && str[0] <= 'z') || \
47 ('A' <= str[0] && str[0] <= 'Z')) && \
48 (str[1] == ':'))
49
50 /* MSDOS/Windows style drive prefix, optionally with
51 * a '|' instead of ':', followed by a slash or NUL */
52 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55 ((str)[1] == ':' || (str)[1] == '|') && \
56 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58 /* scheme is not URL encoded, the longest libcurl supported ones are... */
59 #define MAX_SCHEME_LEN 40
60
61 /*
62 * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63 * sure we have _some_ value for AF_INET6 without polluting our fake value
64 * everywhere.
65 */
66 #if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67 #define AF_INET6 (AF_INET + 1)
68 #endif
69
70 /* Internal representation of CURLU. Point to URL-encoded strings. */
71 struct Curl_URL {
72 char *scheme;
73 char *user;
74 char *password;
75 char *options; /* IMAP only? */
76 char *host;
77 char *zoneid; /* for numerical IPv6 addresses */
78 char *port;
79 char *path;
80 char *query;
81 char *fragment;
82 long portnum; /* the numerical version */
83 };
84
85 #define DEFAULT_SCHEME "https"
86
free_urlhandle(struct Curl_URL * u)87 static void free_urlhandle(struct Curl_URL *u)
88 {
89 free(u->scheme);
90 free(u->user);
91 free(u->password);
92 free(u->options);
93 free(u->host);
94 free(u->zoneid);
95 free(u->port);
96 free(u->path);
97 free(u->query);
98 free(u->fragment);
99 }
100
101 /*
102 * Find the separator at the end of the host name, or the '?' in cases like
103 * http://www.example.com?id=2380
104 */
find_host_sep(const char * url)105 static const char *find_host_sep(const char *url)
106 {
107 const char *sep;
108 const char *query;
109
110 /* Find the start of the hostname */
111 sep = strstr(url, "//");
112 if(!sep)
113 sep = url;
114 else
115 sep += 2;
116
117 query = strchr(sep, '?');
118 sep = strchr(sep, '/');
119
120 if(!sep)
121 sep = url + strlen(url);
122
123 if(!query)
124 query = url + strlen(url);
125
126 return sep < query ? sep : query;
127 }
128
129 /* convert CURLcode to CURLUcode */
130 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
131 CURLUE_OUT_OF_MEMORY)
132 /*
133 * Decide whether a character in a URL must be escaped.
134 */
135 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
136
137 static const char hexdigits[] = "0123456789abcdef";
138 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
139 * spaces in the source URL accordingly.
140 *
141 * URL encoding should be skipped for host names, otherwise IDN resolution
142 * will fail.
143 */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)144 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
145 size_t len, bool relative,
146 bool query)
147 {
148 /* we must add this with whitespace-replacing */
149 bool left = !query;
150 const unsigned char *iptr;
151 const unsigned char *host_sep = (const unsigned char *) url;
152 CURLcode result;
153
154 if(!relative)
155 host_sep = (const unsigned char *) find_host_sep(url);
156
157 for(iptr = (unsigned char *)url; /* read from here */
158 len; iptr++, len--) {
159
160 if(iptr < host_sep) {
161 result = Curl_dyn_addn(o, iptr, 1);
162 if(result)
163 return cc2cu(result);
164 continue;
165 }
166
167 if(*iptr == ' ') {
168 if(left)
169 result = Curl_dyn_addn(o, "%20", 3);
170 else
171 result = Curl_dyn_addn(o, "+", 1);
172 if(result)
173 return cc2cu(result);
174 continue;
175 }
176
177 if(*iptr == '?')
178 left = FALSE;
179
180 if(urlchar_needs_escaping(*iptr)) {
181 char out[3]={'%'};
182 out[1] = hexdigits[*iptr>>4];
183 out[2] = hexdigits[*iptr & 0xf];
184 result = Curl_dyn_addn(o, out, 3);
185 }
186 else
187 result = Curl_dyn_addn(o, iptr, 1);
188 if(result)
189 return cc2cu(result);
190 }
191
192 return CURLUE_OK;
193 }
194
195 /*
196 * Returns the length of the scheme if the given URL is absolute (as opposed
197 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
198 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
199 *
200 * If 'guess_scheme' is TRUE, it means the URL might be provided without
201 * scheme.
202 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)203 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
204 bool guess_scheme)
205 {
206 int i = 0;
207 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
208 (void)buflen; /* only used in debug-builds */
209 if(buf)
210 buf[0] = 0; /* always leave a defined value in buf */
211 #ifdef _WIN32
212 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
213 return 0;
214 #endif
215 if(ISALPHA(url[0]))
216 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
217 char s = url[i];
218 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
219 /* RFC 3986 3.1 explains:
220 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
221 */
222 }
223 else {
224 break;
225 }
226 }
227 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
228 /* If this does not guess scheme, the scheme always ends with the colon so
229 that this also detects data: URLs etc. In guessing mode, data: could
230 be the host name "data" with a specified port number. */
231
232 /* the length of the scheme is the name part only */
233 size_t len = i;
234 if(buf) {
235 buf[i] = 0;
236 while(i--) {
237 buf[i] = Curl_raw_tolower(url[i]);
238 }
239 }
240 return len;
241 }
242 return 0;
243 }
244
245 /*
246 * Concatenate a relative URL to a base URL making it absolute.
247 * URL-encodes any spaces.
248 * The returned pointer must be freed by the caller unless NULL
249 * (returns NULL on out of memory).
250 *
251 * Note that this function destroys the 'base' string.
252 */
concat_url(char * base,const char * relurl,char ** newurl)253 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254 {
255 /***
256 TRY to append this new path to the old URL
257 to the right of the host part. Oh crap, this is doomed to cause
258 problems in the future...
259 */
260 struct dynbuf newest;
261 char *protsep;
262 char *pathsep;
263 bool host_changed = FALSE;
264 const char *useurl = relurl;
265 CURLcode result = CURLE_OK;
266 CURLUcode uc;
267 *newurl = NULL;
268
269 /* protsep points to the start of the host name */
270 protsep = strstr(base, "//");
271 if(!protsep)
272 protsep = base;
273 else
274 protsep += 2; /* pass the slashes */
275
276 if('/' != relurl[0]) {
277 int level = 0;
278
279 /* First we need to find out if there's a ?-letter in the URL,
280 and cut it and the right-side of that off */
281 pathsep = strchr(protsep, '?');
282 if(pathsep)
283 *pathsep = 0;
284
285 /* we have a relative path to append to the last slash if there's one
286 available, or if the new URL is just a query string (starts with a
287 '?') we append the new one at the end of the entire currently worked
288 out URL */
289 if(useurl[0] != '?') {
290 pathsep = strrchr(protsep, '/');
291 if(pathsep)
292 *pathsep = 0;
293 }
294
295 /* Check if there's any slash after the host name, and if so, remember
296 that position instead */
297 pathsep = strchr(protsep, '/');
298 if(pathsep)
299 protsep = pathsep + 1;
300 else
301 protsep = NULL;
302
303 /* now deal with one "./" or any amount of "../" in the newurl
304 and act accordingly */
305
306 if((useurl[0] == '.') && (useurl[1] == '/'))
307 useurl += 2; /* just skip the "./" */
308
309 while((useurl[0] == '.') &&
310 (useurl[1] == '.') &&
311 (useurl[2] == '/')) {
312 level++;
313 useurl += 3; /* pass the "../" */
314 }
315
316 if(protsep) {
317 while(level--) {
318 /* cut off one more level from the right of the original URL */
319 pathsep = strrchr(protsep, '/');
320 if(pathsep)
321 *pathsep = 0;
322 else {
323 *protsep = 0;
324 break;
325 }
326 }
327 }
328 }
329 else {
330 /* We got a new absolute path for this server */
331
332 if(relurl[1] == '/') {
333 /* the new URL starts with //, just keep the protocol part from the
334 original one */
335 *protsep = 0;
336 useurl = &relurl[2]; /* we keep the slashes from the original, so we
337 skip the new ones */
338 host_changed = TRUE;
339 }
340 else {
341 /* cut off the original URL from the first slash, or deal with URLs
342 without slash */
343 pathsep = strchr(protsep, '/');
344 if(pathsep) {
345 /* When people use badly formatted URLs, such as
346 "http://www.example.com?dir=/home/daniel" we must not use the first
347 slash, if there's a ?-letter before it! */
348 char *sep = strchr(protsep, '?');
349 if(sep && (sep < pathsep))
350 pathsep = sep;
351 *pathsep = 0;
352 }
353 else {
354 /* There was no slash. Now, since we might be operating on a badly
355 formatted URL, such as "http://www.example.com?id=2380" which
356 doesn't use a slash separator as it is supposed to, we need to check
357 for a ?-letter as well! */
358 pathsep = strchr(protsep, '?');
359 if(pathsep)
360 *pathsep = 0;
361 }
362 }
363 }
364
365 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
366
367 /* copy over the root url part */
368 result = Curl_dyn_add(&newest, base);
369 if(result)
370 return result;
371
372 /* check if we need to append a slash */
373 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
374 ;
375 else {
376 result = Curl_dyn_addn(&newest, "/", 1);
377 if(result)
378 return result;
379 }
380
381 /* then append the new piece on the right side */
382 uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
383 FALSE);
384 if(uc)
385 return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
386
387 *newurl = Curl_dyn_ptr(&newest);
388 return CURLE_OK;
389 }
390
391 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)392 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
393 {
394 static const char badbytes[]={
395 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
398 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
399 0x7f, 0x00 /* null-terminate */
400 };
401 size_t n = strlen(url);
402 size_t nfine;
403
404 if(n > CURL_MAX_INPUT_LENGTH)
405 /* excessive input length */
406 return CURLUE_MALFORMED_INPUT;
407
408 nfine = strcspn(url, badbytes);
409 if((nfine != n) ||
410 (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
411 return CURLUE_MALFORMED_INPUT;
412
413 *urllen = n;
414 return CURLUE_OK;
415 }
416
417 /*
418 * parse_hostname_login()
419 *
420 * Parse the login details (user name, password and options) from the URL and
421 * strip them out of the host name
422 *
423 */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)424 static CURLUcode parse_hostname_login(struct Curl_URL *u,
425 const char *login,
426 size_t len,
427 unsigned int flags,
428 size_t *offset) /* to the host name */
429 {
430 CURLUcode result = CURLUE_OK;
431 CURLcode ccode;
432 char *userp = NULL;
433 char *passwdp = NULL;
434 char *optionsp = NULL;
435 const struct Curl_handler *h = NULL;
436
437 /* At this point, we assume all the other special cases have been taken
438 * care of, so the host is at most
439 *
440 * [user[:password][;options]]@]hostname
441 *
442 * We need somewhere to put the embedded details, so do that first.
443 */
444 char *ptr;
445
446 DEBUGASSERT(login);
447
448 *offset = 0;
449 ptr = memchr(login, '@', len);
450 if(!ptr)
451 goto out;
452
453 /* We will now try to extract the
454 * possible login information in a string like:
455 * ftp://user:password@ftp.my.site:8021/README */
456 ptr++;
457
458 /* if this is a known scheme, get some details */
459 if(u->scheme)
460 h = Curl_get_scheme_handler(u->scheme);
461
462 /* We could use the login information in the URL so extract it. Only parse
463 options if the handler says we should. Note that 'h' might be NULL! */
464 ccode = Curl_parse_login_details(login, ptr - login - 1,
465 &userp, &passwdp,
466 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
467 &optionsp:NULL);
468 if(ccode) {
469 result = CURLUE_BAD_LOGIN;
470 goto out;
471 }
472
473 if(userp) {
474 if(flags & CURLU_DISALLOW_USER) {
475 /* Option DISALLOW_USER is set and url contains username. */
476 result = CURLUE_USER_NOT_ALLOWED;
477 goto out;
478 }
479 free(u->user);
480 u->user = userp;
481 }
482
483 if(passwdp) {
484 free(u->password);
485 u->password = passwdp;
486 }
487
488 if(optionsp) {
489 free(u->options);
490 u->options = optionsp;
491 }
492
493 /* the host name starts at this offset */
494 *offset = ptr - login;
495 return CURLUE_OK;
496
497 out:
498
499 free(userp);
500 free(passwdp);
501 free(optionsp);
502 u->user = NULL;
503 u->password = NULL;
504 u->options = NULL;
505
506 return result;
507 }
508
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)509 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
510 bool has_scheme)
511 {
512 char *portptr;
513 char *hostname = Curl_dyn_ptr(host);
514 /*
515 * Find the end of an IPv6 address on the ']' ending bracket.
516 */
517 if(hostname[0] == '[') {
518 portptr = strchr(hostname, ']');
519 if(!portptr)
520 return CURLUE_BAD_IPV6;
521 portptr++;
522 /* this is a RFC2732-style specified IP-address */
523 if(*portptr) {
524 if(*portptr != ':')
525 return CURLUE_BAD_PORT_NUMBER;
526 }
527 else
528 portptr = NULL;
529 }
530 else
531 portptr = strchr(hostname, ':');
532
533 if(portptr) {
534 char *rest;
535 long port;
536 size_t keep = portptr - hostname;
537
538 /* Browser behavior adaptation. If there's a colon with no digits after,
539 just cut off the name there which makes us ignore the colon and just
540 use the default port. Firefox, Chrome and Safari all do that.
541
542 Don't do it if the URL has no scheme, to make something that looks like
543 a scheme not work!
544 */
545 Curl_dyn_setlen(host, keep);
546 portptr++;
547 if(!*portptr)
548 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
549
550 if(!ISDIGIT(*portptr))
551 return CURLUE_BAD_PORT_NUMBER;
552
553 port = strtol(portptr, &rest, 10); /* Port number must be decimal */
554
555 if(port > 0xffff)
556 return CURLUE_BAD_PORT_NUMBER;
557
558 if(rest[0])
559 return CURLUE_BAD_PORT_NUMBER;
560
561 u->portnum = port;
562 /* generate a new port number string to get rid of leading zeroes etc */
563 free(u->port);
564 u->port = aprintf("%ld", port);
565 if(!u->port)
566 return CURLUE_OUT_OF_MEMORY;
567 }
568
569 return CURLUE_OK;
570 }
571
572 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)573 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
574 size_t hlen) /* length of hostname */
575 {
576 size_t len;
577 DEBUGASSERT(*hostname == '[');
578 if(hlen < 4) /* '[::]' is the shortest possible valid string */
579 return CURLUE_BAD_IPV6;
580 hostname++;
581 hlen -= 2;
582
583 /* only valid IPv6 letters are ok */
584 len = strspn(hostname, "0123456789abcdefABCDEF:.");
585
586 if(hlen != len) {
587 hlen = len;
588 if(hostname[len] == '%') {
589 /* this could now be '%[zone id]' */
590 char zoneid[16];
591 int i = 0;
592 char *h = &hostname[len + 1];
593 /* pass '25' if present and is a url encoded percent sign */
594 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595 h += 2;
596 while(*h && (*h != ']') && (i < 15))
597 zoneid[i++] = *h++;
598 if(!i || (']' != *h))
599 return CURLUE_BAD_IPV6;
600 zoneid[i] = 0;
601 u->zoneid = strdup(zoneid);
602 if(!u->zoneid)
603 return CURLUE_OUT_OF_MEMORY;
604 hostname[len] = ']'; /* insert end bracket */
605 hostname[len + 1] = 0; /* terminate the hostname */
606 }
607 else
608 return CURLUE_BAD_IPV6;
609 /* hostname is fine */
610 }
611
612 /* Check the IPv6 address. */
613 {
614 char dest[16]; /* fits a binary IPv6 address */
615 char norm[MAX_IPADR_LEN];
616 hostname[hlen] = 0; /* end the address there */
617 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618 return CURLUE_BAD_IPV6;
619
620 /* check if it can be done shorter */
621 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622 (strlen(norm) < hlen)) {
623 strcpy(hostname, norm);
624 hlen = strlen(norm);
625 hostname[hlen + 1] = 0;
626 }
627 hostname[hlen] = ']'; /* restore ending bracket */
628 }
629 return CURLUE_OK;
630 }
631
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)632 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
633 size_t hlen) /* length of hostname */
634 {
635 size_t len;
636 DEBUGASSERT(hostname);
637
638 if(!hlen)
639 return CURLUE_NO_HOST;
640 else if(hostname[0] == '[')
641 return ipv6_parse(u, hostname, hlen);
642 else {
643 /* letters from the second string are not ok */
644 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
645 if(hlen != len)
646 /* hostname with bad content */
647 return CURLUE_BAD_HOSTNAME;
648 }
649 return CURLUE_OK;
650 }
651
652 /*
653 * Handle partial IPv4 numerical addresses and different bases, like
654 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
655 *
656 * If the given input string is syntactically wrong IPv4 or any part for
657 * example is too big, this function returns HOST_NAME.
658 *
659 * Output the "normalized" version of that input string in plain quad decimal
660 * integers.
661 *
662 * Returns the host type.
663 */
664
665 #define HOST_ERROR -1 /* out of memory */
666 #define HOST_BAD -2 /* bad IPv4 address */
667
668 #define HOST_NAME 1
669 #define HOST_IPV4 2
670 #define HOST_IPV6 3
671
ipv4_normalize(struct dynbuf * host)672 static int ipv4_normalize(struct dynbuf *host)
673 {
674 bool done = FALSE;
675 int n = 0;
676 const char *c = Curl_dyn_ptr(host);
677 unsigned long parts[4] = {0, 0, 0, 0};
678 CURLcode result = CURLE_OK;
679
680 if(*c == '[')
681 return HOST_IPV6;
682
683 while(!done) {
684 char *endp;
685 unsigned long l;
686 if(!ISDIGIT(*c))
687 /* most importantly this doesn't allow a leading plus or minus */
688 return HOST_NAME;
689 l = strtoul(c, &endp, 0);
690
691 parts[n] = l;
692 c = endp;
693
694 switch(*c) {
695 case '.':
696 if(n == 3)
697 return HOST_NAME;
698 n++;
699 c++;
700 break;
701
702 case '\0':
703 done = TRUE;
704 break;
705
706 default:
707 return HOST_NAME;
708 }
709
710 /* overflow */
711 if((l == ULONG_MAX) && (errno == ERANGE))
712 return HOST_NAME;
713
714 #if SIZEOF_LONG > 4
715 /* a value larger than 32 bits */
716 if(l > UINT_MAX)
717 return HOST_NAME;
718 #endif
719 }
720
721 switch(n) {
722 case 0: /* a -- 32 bits */
723 Curl_dyn_reset(host);
724
725 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
726 (unsigned int)(parts[0] >> 24),
727 (unsigned int)((parts[0] >> 16) & 0xff),
728 (unsigned int)((parts[0] >> 8) & 0xff),
729 (unsigned int)(parts[0] & 0xff));
730 break;
731 case 1: /* a.b -- 8.24 bits */
732 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
733 return HOST_NAME;
734 Curl_dyn_reset(host);
735 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
736 (unsigned int)(parts[0]),
737 (unsigned int)((parts[1] >> 16) & 0xff),
738 (unsigned int)((parts[1] >> 8) & 0xff),
739 (unsigned int)(parts[1] & 0xff));
740 break;
741 case 2: /* a.b.c -- 8.8.16 bits */
742 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
743 return HOST_NAME;
744 Curl_dyn_reset(host);
745 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
746 (unsigned int)(parts[0]),
747 (unsigned int)(parts[1]),
748 (unsigned int)((parts[2] >> 8) & 0xff),
749 (unsigned int)(parts[2] & 0xff));
750 break;
751 case 3: /* a.b.c.d -- 8.8.8.8 bits */
752 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
753 (parts[3] > 0xff))
754 return HOST_NAME;
755 Curl_dyn_reset(host);
756 result = Curl_dyn_addf(host, "%u.%u.%u.%u",
757 (unsigned int)(parts[0]),
758 (unsigned int)(parts[1]),
759 (unsigned int)(parts[2]),
760 (unsigned int)(parts[3]));
761 break;
762 }
763 if(result)
764 return HOST_ERROR;
765 return HOST_IPV4;
766 }
767
768 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)769 static CURLUcode urldecode_host(struct dynbuf *host)
770 {
771 char *per = NULL;
772 const char *hostname = Curl_dyn_ptr(host);
773 per = strchr(hostname, '%');
774 if(!per)
775 /* nothing to decode */
776 return CURLUE_OK;
777 else {
778 /* encoded */
779 size_t dlen;
780 char *decoded;
781 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
782 REJECT_CTRL);
783 if(result)
784 return CURLUE_BAD_HOSTNAME;
785 Curl_dyn_reset(host);
786 result = Curl_dyn_addn(host, decoded, dlen);
787 free(decoded);
788 if(result)
789 return cc2cu(result);
790 }
791
792 return CURLUE_OK;
793 }
794
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)795 static CURLUcode parse_authority(struct Curl_URL *u,
796 const char *auth, size_t authlen,
797 unsigned int flags,
798 struct dynbuf *host,
799 bool has_scheme)
800 {
801 size_t offset;
802 CURLUcode uc;
803 CURLcode result;
804
805 /*
806 * Parse the login details and strip them out of the host name.
807 */
808 uc = parse_hostname_login(u, auth, authlen, flags, &offset);
809 if(uc)
810 goto out;
811
812 result = Curl_dyn_addn(host, auth + offset, authlen - offset);
813 if(result) {
814 uc = cc2cu(result);
815 goto out;
816 }
817
818 uc = Curl_parse_port(u, host, has_scheme);
819 if(uc)
820 goto out;
821
822 if(!Curl_dyn_len(host))
823 return CURLUE_NO_HOST;
824
825 switch(ipv4_normalize(host)) {
826 case HOST_IPV4:
827 break;
828 case HOST_IPV6:
829 uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
830 break;
831 case HOST_NAME:
832 uc = urldecode_host(host);
833 if(!uc)
834 uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
835 break;
836 case HOST_ERROR:
837 uc = CURLUE_OUT_OF_MEMORY;
838 break;
839 case HOST_BAD:
840 default:
841 uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
842 break;
843 }
844
845 out:
846 return uc;
847 }
848
Curl_url_set_authority(CURLU * u,const char * authority,unsigned int flags)849 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
850 unsigned int flags)
851 {
852 CURLUcode result;
853 struct dynbuf host;
854
855 DEBUGASSERT(authority);
856 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
857
858 result = parse_authority(u, authority, strlen(authority), flags,
859 &host, !!u->scheme);
860 if(result)
861 Curl_dyn_free(&host);
862 else {
863 free(u->host);
864 u->host = Curl_dyn_ptr(&host);
865 }
866 return result;
867 }
868
869 /*
870 * "Remove Dot Segments"
871 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
872 */
873
874 /*
875 * dedotdotify()
876 * @unittest: 1395
877 *
878 * This function gets a null-terminated path with dot and dotdot sequences
879 * passed in and strips them off according to the rules in RFC 3986 section
880 * 5.2.4.
881 *
882 * The function handles a query part ('?' + stuff) appended but it expects
883 * that fragments ('#' + stuff) have already been cut off.
884 *
885 * RETURNS
886 *
887 * Zero for success and 'out' set to an allocated dedotdotified string.
888 */
889 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)890 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
891 {
892 char *outptr;
893 const char *endp = &input[clen];
894 char *out;
895
896 *outp = NULL;
897 /* the path always starts with a slash, and a slash has not dot */
898 if((clen < 2) || !memchr(input, '.', clen))
899 return 0;
900
901 out = malloc(clen + 1);
902 if(!out)
903 return 1; /* out of memory */
904
905 *out = 0; /* null-terminates, for inputs like "./" */
906 outptr = out;
907
908 do {
909 bool dotdot = TRUE;
910 if(*input == '.') {
911 /* A. If the input buffer begins with a prefix of "../" or "./", then
912 remove that prefix from the input buffer; otherwise, */
913
914 if(!strncmp("./", input, 2)) {
915 input += 2;
916 clen -= 2;
917 }
918 else if(!strncmp("../", input, 3)) {
919 input += 3;
920 clen -= 3;
921 }
922 /* D. if the input buffer consists only of "." or "..", then remove
923 that from the input buffer; otherwise, */
924
925 else if(!strcmp(".", input) || !strcmp("..", input) ||
926 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
927 *out = 0;
928 break;
929 }
930 else
931 dotdot = FALSE;
932 }
933 else if(*input == '/') {
934 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
935 "." is a complete path segment, then replace that prefix with "/" in
936 the input buffer; otherwise, */
937 if(!strncmp("/./", input, 3)) {
938 input += 2;
939 clen -= 2;
940 }
941 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
942 *outptr++ = '/';
943 *outptr = 0;
944 break;
945 }
946
947 /* C. if the input buffer begins with a prefix of "/../" or "/..",
948 where ".." is a complete path segment, then replace that prefix with
949 "/" in the input buffer and remove the last segment and its
950 preceding "/" (if any) from the output buffer; otherwise, */
951
952 else if(!strncmp("/../", input, 4)) {
953 input += 3;
954 clen -= 3;
955 /* remove the last segment from the output buffer */
956 while(outptr > out) {
957 outptr--;
958 if(*outptr == '/')
959 break;
960 }
961 *outptr = 0; /* null-terminate where it stops */
962 }
963 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
964 /* remove the last segment from the output buffer */
965 while(outptr > out) {
966 outptr--;
967 if(*outptr == '/')
968 break;
969 }
970 *outptr++ = '/';
971 *outptr = 0; /* null-terminate where it stops */
972 break;
973 }
974 else
975 dotdot = FALSE;
976 }
977 else
978 dotdot = FALSE;
979
980 if(!dotdot) {
981 /* E. move the first path segment in the input buffer to the end of
982 the output buffer, including the initial "/" character (if any) and
983 any subsequent characters up to, but not including, the next "/"
984 character or the end of the input buffer. */
985
986 do {
987 *outptr++ = *input++;
988 clen--;
989 } while(*input && (*input != '/') && (*input != '?'));
990 *outptr = 0;
991 }
992
993 /* continue until end of path */
994 } while(input < endp);
995
996 *outp = out;
997 return 0; /* success */
998 }
999
parseurl(const char * url,CURLU * u,unsigned int flags)1000 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1001 {
1002 const char *path;
1003 size_t pathlen;
1004 char *query = NULL;
1005 char *fragment = NULL;
1006 char schemebuf[MAX_SCHEME_LEN + 1];
1007 size_t schemelen = 0;
1008 size_t urllen;
1009 CURLUcode result = CURLUE_OK;
1010 size_t fraglen = 0;
1011 struct dynbuf host;
1012
1013 DEBUGASSERT(url);
1014
1015 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1016
1017 result = junkscan(url, &urllen, flags);
1018 if(result)
1019 goto fail;
1020
1021 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1022 flags & (CURLU_GUESS_SCHEME|
1023 CURLU_DEFAULT_SCHEME));
1024
1025 /* handle the file: scheme */
1026 if(schemelen && !strcmp(schemebuf, "file")) {
1027 bool uncpath = FALSE;
1028 if(urllen <= 6) {
1029 /* file:/ is not enough to actually be a complete file: URL */
1030 result = CURLUE_BAD_FILE_URL;
1031 goto fail;
1032 }
1033
1034 /* path has been allocated large enough to hold this */
1035 path = (char *)&url[5];
1036 pathlen = urllen - 5;
1037
1038 u->scheme = strdup("file");
1039 if(!u->scheme) {
1040 result = CURLUE_OUT_OF_MEMORY;
1041 goto fail;
1042 }
1043
1044 /* Extra handling URLs with an authority component (i.e. that start with
1045 * "file://")
1046 *
1047 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1048 * RFC 8089, but not the (current) WHAT-WG URL spec.
1049 */
1050 if(path[0] == '/' && path[1] == '/') {
1051 /* swallow the two slashes */
1052 const char *ptr = &path[2];
1053
1054 /*
1055 * According to RFC 8089, a file: URL can be reliably dereferenced if:
1056 *
1057 * o it has no/blank hostname, or
1058 *
1059 * o the hostname matches "localhost" (case-insensitively), or
1060 *
1061 * o the hostname is a FQDN that resolves to this machine, or
1062 *
1063 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
1064 * Appendix E.3).
1065 *
1066 * For brevity, we only consider URLs with empty, "localhost", or
1067 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1068 *
1069 * Additionally, there is an exception for URLs with a Windows drive
1070 * letter in the authority (which was accidentally omitted from RFC 8089
1071 * Appendix E, but believe me, it was meant to be there. --MK)
1072 */
1073 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1074 /* the URL includes a host name, it must match "localhost" or
1075 "127.0.0.1" to be valid */
1076 if(checkprefix("localhost/", ptr) ||
1077 checkprefix("127.0.0.1/", ptr)) {
1078 ptr += 9; /* now points to the slash after the host */
1079 }
1080 else {
1081 #if defined(_WIN32)
1082 size_t len;
1083
1084 /* the host name, NetBIOS computer name, can not contain disallowed
1085 chars, and the delimiting slash character must be appended to the
1086 host name */
1087 path = strpbrk(ptr, "/\\:*?\"<>|");
1088 if(!path || *path != '/') {
1089 result = CURLUE_BAD_FILE_URL;
1090 goto fail;
1091 }
1092
1093 len = path - ptr;
1094 if(len) {
1095 CURLcode code = Curl_dyn_addn(&host, ptr, len);
1096 if(code) {
1097 result = cc2cu(code);
1098 goto fail;
1099 }
1100 uncpath = TRUE;
1101 }
1102
1103 ptr -= 2; /* now points to the // before the host in UNC */
1104 #else
1105 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1106 none */
1107 result = CURLUE_BAD_FILE_URL;
1108 goto fail;
1109 #endif
1110 }
1111 }
1112
1113 path = ptr;
1114 pathlen = urllen - (ptr - url);
1115 }
1116
1117 if(!uncpath)
1118 /* no host for file: URLs by default */
1119 Curl_dyn_reset(&host);
1120
1121 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1122 /* Don't allow Windows drive letters when not in Windows.
1123 * This catches both "file:/c:" and "file:c:" */
1124 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1125 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1126 /* File drive letters are only accepted in MSDOS/Windows */
1127 result = CURLUE_BAD_FILE_URL;
1128 goto fail;
1129 }
1130 #else
1131 /* If the path starts with a slash and a drive letter, ditch the slash */
1132 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1133 /* This cannot be done with strcpy, as the memory chunks overlap! */
1134 path++;
1135 pathlen--;
1136 }
1137 #endif
1138
1139 }
1140 else {
1141 /* clear path */
1142 const char *schemep = NULL;
1143 const char *hostp;
1144 size_t hostlen;
1145
1146 if(schemelen) {
1147 int i = 0;
1148 const char *p = &url[schemelen + 1];
1149 while((*p == '/') && (i < 4)) {
1150 p++;
1151 i++;
1152 }
1153
1154 schemep = schemebuf;
1155 if(!Curl_get_scheme_handler(schemep) &&
1156 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1157 result = CURLUE_UNSUPPORTED_SCHEME;
1158 goto fail;
1159 }
1160
1161 if((i < 1) || (i > 3)) {
1162 /* less than one or more than three slashes */
1163 result = CURLUE_BAD_SLASHES;
1164 goto fail;
1165 }
1166 hostp = p; /* host name starts here */
1167 }
1168 else {
1169 /* no scheme! */
1170
1171 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1172 result = CURLUE_BAD_SCHEME;
1173 goto fail;
1174 }
1175 if(flags & CURLU_DEFAULT_SCHEME)
1176 schemep = DEFAULT_SCHEME;
1177
1178 /*
1179 * The URL was badly formatted, let's try without scheme specified.
1180 */
1181 hostp = url;
1182 }
1183
1184 if(schemep) {
1185 u->scheme = strdup(schemep);
1186 if(!u->scheme) {
1187 result = CURLUE_OUT_OF_MEMORY;
1188 goto fail;
1189 }
1190 }
1191
1192 /* find the end of the host name + port number */
1193 hostlen = strcspn(hostp, "/?#");
1194 path = &hostp[hostlen];
1195
1196 /* this pathlen also contains the query and the fragment */
1197 pathlen = urllen - (path - url);
1198 if(hostlen) {
1199
1200 result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1201 if(result)
1202 goto fail;
1203
1204 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1205 const char *hostname = Curl_dyn_ptr(&host);
1206 /* legacy curl-style guess based on host name */
1207 if(checkprefix("ftp.", hostname))
1208 schemep = "ftp";
1209 else if(checkprefix("dict.", hostname))
1210 schemep = "dict";
1211 else if(checkprefix("ldap.", hostname))
1212 schemep = "ldap";
1213 else if(checkprefix("imap.", hostname))
1214 schemep = "imap";
1215 else if(checkprefix("smtp.", hostname))
1216 schemep = "smtp";
1217 else if(checkprefix("pop3.", hostname))
1218 schemep = "pop3";
1219 else
1220 schemep = "http";
1221
1222 u->scheme = strdup(schemep);
1223 if(!u->scheme) {
1224 result = CURLUE_OUT_OF_MEMORY;
1225 goto fail;
1226 }
1227 }
1228 }
1229 else if(flags & CURLU_NO_AUTHORITY) {
1230 /* allowed to be empty. */
1231 if(Curl_dyn_add(&host, "")) {
1232 result = CURLUE_OUT_OF_MEMORY;
1233 goto fail;
1234 }
1235 }
1236 else {
1237 result = CURLUE_NO_HOST;
1238 goto fail;
1239 }
1240 }
1241
1242 fragment = strchr(path, '#');
1243 if(fragment) {
1244 fraglen = pathlen - (fragment - path);
1245 if(fraglen > 1) {
1246 /* skip the leading '#' in the copy but include the terminating null */
1247 if(flags & CURLU_URLENCODE) {
1248 struct dynbuf enc;
1249 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250 result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251 if(result)
1252 goto fail;
1253 u->fragment = Curl_dyn_ptr(&enc);
1254 }
1255 else {
1256 u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257 if(!u->fragment) {
1258 result = CURLUE_OUT_OF_MEMORY;
1259 goto fail;
1260 }
1261 }
1262 }
1263 /* after this, pathlen still contains the query */
1264 pathlen -= fraglen;
1265 }
1266
1267 query = memchr(path, '?', pathlen);
1268 if(query) {
1269 size_t qlen = fragment ? (size_t)(fragment - query) :
1270 pathlen - (query - path);
1271 pathlen -= qlen;
1272 if(qlen > 1) {
1273 if(flags & CURLU_URLENCODE) {
1274 struct dynbuf enc;
1275 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1276 /* skip the leading question mark */
1277 result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1278 if(result)
1279 goto fail;
1280 u->query = Curl_dyn_ptr(&enc);
1281 }
1282 else {
1283 u->query = Curl_memdup0(query + 1, qlen - 1);
1284 if(!u->query) {
1285 result = CURLUE_OUT_OF_MEMORY;
1286 goto fail;
1287 }
1288 }
1289 }
1290 else {
1291 /* single byte query */
1292 u->query = strdup("");
1293 if(!u->query) {
1294 result = CURLUE_OUT_OF_MEMORY;
1295 goto fail;
1296 }
1297 }
1298 }
1299
1300 if(pathlen && (flags & CURLU_URLENCODE)) {
1301 struct dynbuf enc;
1302 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1303 result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1304 if(result)
1305 goto fail;
1306 pathlen = Curl_dyn_len(&enc);
1307 path = u->path = Curl_dyn_ptr(&enc);
1308 }
1309
1310 if(pathlen <= 1) {
1311 /* there is no path left or just the slash, unset */
1312 path = NULL;
1313 }
1314 else {
1315 if(!u->path) {
1316 u->path = Curl_memdup0(path, pathlen);
1317 if(!u->path) {
1318 result = CURLUE_OUT_OF_MEMORY;
1319 goto fail;
1320 }
1321 path = u->path;
1322 }
1323 else if(flags & CURLU_URLENCODE)
1324 /* it might have encoded more than just the path so cut it */
1325 u->path[pathlen] = 0;
1326
1327 if(!(flags & CURLU_PATH_AS_IS)) {
1328 /* remove ../ and ./ sequences according to RFC3986 */
1329 char *dedot;
1330 int err = dedotdotify((char *)path, pathlen, &dedot);
1331 if(err) {
1332 result = CURLUE_OUT_OF_MEMORY;
1333 goto fail;
1334 }
1335 if(dedot) {
1336 free(u->path);
1337 u->path = dedot;
1338 }
1339 }
1340 }
1341
1342 u->host = Curl_dyn_ptr(&host);
1343
1344 return result;
1345 fail:
1346 Curl_dyn_free(&host);
1347 free_urlhandle(u);
1348 return result;
1349 }
1350
1351 /*
1352 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1353 */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1354 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1355 unsigned int flags)
1356 {
1357 CURLUcode result;
1358 CURLU tmpurl;
1359 memset(&tmpurl, 0, sizeof(tmpurl));
1360 result = parseurl(url, &tmpurl, flags);
1361 if(!result) {
1362 free_urlhandle(u);
1363 *u = tmpurl;
1364 }
1365 return result;
1366 }
1367
1368 /*
1369 */
curl_url(void)1370 CURLU *curl_url(void)
1371 {
1372 return calloc(1, sizeof(struct Curl_URL));
1373 }
1374
curl_url_cleanup(CURLU * u)1375 void curl_url_cleanup(CURLU *u)
1376 {
1377 if(u) {
1378 free_urlhandle(u);
1379 free(u);
1380 }
1381 }
1382
1383 #define DUP(dest, src, name) \
1384 do { \
1385 if(src->name) { \
1386 dest->name = strdup(src->name); \
1387 if(!dest->name) \
1388 goto fail; \
1389 } \
1390 } while(0)
1391
curl_url_dup(const CURLU * in)1392 CURLU *curl_url_dup(const CURLU *in)
1393 {
1394 struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1395 if(u) {
1396 DUP(u, in, scheme);
1397 DUP(u, in, user);
1398 DUP(u, in, password);
1399 DUP(u, in, options);
1400 DUP(u, in, host);
1401 DUP(u, in, port);
1402 DUP(u, in, path);
1403 DUP(u, in, query);
1404 DUP(u, in, fragment);
1405 DUP(u, in, zoneid);
1406 u->portnum = in->portnum;
1407 }
1408 return u;
1409 fail:
1410 curl_url_cleanup(u);
1411 return NULL;
1412 }
1413
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1414 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1415 char **part, unsigned int flags)
1416 {
1417 const char *ptr;
1418 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1419 char portbuf[7];
1420 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1421 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1422 bool punycode = FALSE;
1423 bool depunyfy = FALSE;
1424 bool plusdecode = FALSE;
1425 (void)flags;
1426 if(!u)
1427 return CURLUE_BAD_HANDLE;
1428 if(!part)
1429 return CURLUE_BAD_PARTPOINTER;
1430 *part = NULL;
1431
1432 switch(what) {
1433 case CURLUPART_SCHEME:
1434 ptr = u->scheme;
1435 ifmissing = CURLUE_NO_SCHEME;
1436 urldecode = FALSE; /* never for schemes */
1437 break;
1438 case CURLUPART_USER:
1439 ptr = u->user;
1440 ifmissing = CURLUE_NO_USER;
1441 break;
1442 case CURLUPART_PASSWORD:
1443 ptr = u->password;
1444 ifmissing = CURLUE_NO_PASSWORD;
1445 break;
1446 case CURLUPART_OPTIONS:
1447 ptr = u->options;
1448 ifmissing = CURLUE_NO_OPTIONS;
1449 break;
1450 case CURLUPART_HOST:
1451 ptr = u->host;
1452 ifmissing = CURLUE_NO_HOST;
1453 punycode = (flags & CURLU_PUNYCODE)?1:0;
1454 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1455 break;
1456 case CURLUPART_ZONEID:
1457 ptr = u->zoneid;
1458 ifmissing = CURLUE_NO_ZONEID;
1459 break;
1460 case CURLUPART_PORT:
1461 ptr = u->port;
1462 ifmissing = CURLUE_NO_PORT;
1463 urldecode = FALSE; /* never for port */
1464 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1465 /* there's no stored port number, but asked to deliver
1466 a default one for the scheme */
1467 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1468 if(h) {
1469 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1470 ptr = portbuf;
1471 }
1472 }
1473 else if(ptr && u->scheme) {
1474 /* there is a stored port number, but ask to inhibit if
1475 it matches the default one for the scheme */
1476 const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1477 if(h && (h->defport == u->portnum) &&
1478 (flags & CURLU_NO_DEFAULT_PORT))
1479 ptr = NULL;
1480 }
1481 break;
1482 case CURLUPART_PATH:
1483 ptr = u->path;
1484 if(!ptr)
1485 ptr = "/";
1486 break;
1487 case CURLUPART_QUERY:
1488 ptr = u->query;
1489 ifmissing = CURLUE_NO_QUERY;
1490 plusdecode = urldecode;
1491 break;
1492 case CURLUPART_FRAGMENT:
1493 ptr = u->fragment;
1494 ifmissing = CURLUE_NO_FRAGMENT;
1495 break;
1496 case CURLUPART_URL: {
1497 char *url;
1498 char *scheme;
1499 char *options = u->options;
1500 char *port = u->port;
1501 char *allochost = NULL;
1502 punycode = (flags & CURLU_PUNYCODE)?1:0;
1503 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1504 if(u->scheme && strcasecompare("file", u->scheme)) {
1505 url = aprintf("file://%s%s%s",
1506 u->path,
1507 u->fragment? "#": "",
1508 u->fragment? u->fragment : "");
1509 }
1510 else if(!u->host)
1511 return CURLUE_NO_HOST;
1512 else {
1513 const struct Curl_handler *h = NULL;
1514 if(u->scheme)
1515 scheme = u->scheme;
1516 else if(flags & CURLU_DEFAULT_SCHEME)
1517 scheme = (char *) DEFAULT_SCHEME;
1518 else
1519 return CURLUE_NO_SCHEME;
1520
1521 h = Curl_get_scheme_handler(scheme);
1522 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1523 /* there's no stored port number, but asked to deliver
1524 a default one for the scheme */
1525 if(h) {
1526 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1527 port = portbuf;
1528 }
1529 }
1530 else if(port) {
1531 /* there is a stored port number, but asked to inhibit if it matches
1532 the default one for the scheme */
1533 if(h && (h->defport == u->portnum) &&
1534 (flags & CURLU_NO_DEFAULT_PORT))
1535 port = NULL;
1536 }
1537
1538 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1539 options = NULL;
1540
1541 if(u->host[0] == '[') {
1542 if(u->zoneid) {
1543 /* make it '[ host %25 zoneid ]' */
1544 struct dynbuf enc;
1545 size_t hostlen = strlen(u->host);
1546 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1547 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1548 u->zoneid))
1549 return CURLUE_OUT_OF_MEMORY;
1550 allochost = Curl_dyn_ptr(&enc);
1551 }
1552 }
1553 else if(urlencode) {
1554 allochost = curl_easy_escape(NULL, u->host, 0);
1555 if(!allochost)
1556 return CURLUE_OUT_OF_MEMORY;
1557 }
1558 else if(punycode) {
1559 if(!Curl_is_ASCII_name(u->host)) {
1560 #ifndef USE_IDN
1561 return CURLUE_LACKS_IDN;
1562 #else
1563 CURLcode result = Curl_idn_decode(u->host, &allochost);
1564 if(result)
1565 return (result == CURLE_OUT_OF_MEMORY) ?
1566 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1567 #endif
1568 }
1569 }
1570 else if(depunyfy) {
1571 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1572 #ifndef USE_IDN
1573 return CURLUE_LACKS_IDN;
1574 #else
1575 CURLcode result = Curl_idn_encode(u->host, &allochost);
1576 if(result)
1577 /* this is the most likely error */
1578 return (result == CURLE_OUT_OF_MEMORY) ?
1579 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1580 #endif
1581 }
1582 }
1583
1584 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1585 scheme,
1586 u->user ? u->user : "",
1587 u->password ? ":": "",
1588 u->password ? u->password : "",
1589 options ? ";" : "",
1590 options ? options : "",
1591 (u->user || u->password || options) ? "@": "",
1592 allochost ? allochost : u->host,
1593 port ? ":": "",
1594 port ? port : "",
1595 u->path ? u->path : "/",
1596 (u->query && u->query[0]) ? "?": "",
1597 (u->query && u->query[0]) ? u->query : "",
1598 u->fragment? "#": "",
1599 u->fragment? u->fragment : "");
1600 free(allochost);
1601 }
1602 if(!url)
1603 return CURLUE_OUT_OF_MEMORY;
1604 *part = url;
1605 return CURLUE_OK;
1606 }
1607 default:
1608 ptr = NULL;
1609 break;
1610 }
1611 if(ptr) {
1612 size_t partlen = strlen(ptr);
1613 size_t i = 0;
1614 *part = Curl_memdup0(ptr, partlen);
1615 if(!*part)
1616 return CURLUE_OUT_OF_MEMORY;
1617 if(plusdecode) {
1618 /* convert + to space */
1619 char *plus = *part;
1620 for(i = 0; i < partlen; ++plus, i++) {
1621 if(*plus == '+')
1622 *plus = ' ';
1623 }
1624 }
1625 if(urldecode) {
1626 char *decoded;
1627 size_t dlen;
1628 /* this unconditional rejection of control bytes is documented
1629 API behavior */
1630 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1631 free(*part);
1632 if(res) {
1633 *part = NULL;
1634 return CURLUE_URLDECODE;
1635 }
1636 *part = decoded;
1637 partlen = dlen;
1638 }
1639 if(urlencode) {
1640 struct dynbuf enc;
1641 CURLUcode uc;
1642 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1643 uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1644 if(uc)
1645 return uc;
1646 free(*part);
1647 *part = Curl_dyn_ptr(&enc);
1648 }
1649 else if(punycode) {
1650 if(!Curl_is_ASCII_name(u->host)) {
1651 #ifndef USE_IDN
1652 return CURLUE_LACKS_IDN;
1653 #else
1654 char *allochost;
1655 CURLcode result = Curl_idn_decode(*part, &allochost);
1656 if(result)
1657 return (result == CURLE_OUT_OF_MEMORY) ?
1658 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1659 free(*part);
1660 *part = allochost;
1661 #endif
1662 }
1663 }
1664 else if(depunyfy) {
1665 if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1666 #ifndef USE_IDN
1667 return CURLUE_LACKS_IDN;
1668 #else
1669 char *allochost;
1670 CURLcode result = Curl_idn_encode(*part, &allochost);
1671 if(result)
1672 return (result == CURLE_OUT_OF_MEMORY) ?
1673 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1674 free(*part);
1675 *part = allochost;
1676 #endif
1677 }
1678 }
1679
1680 return CURLUE_OK;
1681 }
1682 else
1683 return ifmissing;
1684 }
1685
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1686 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1687 const char *part, unsigned int flags)
1688 {
1689 char **storep = NULL;
1690 long port = 0;
1691 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1692 bool plusencode = FALSE;
1693 bool urlskipslash = FALSE;
1694 bool leadingslash = FALSE;
1695 bool appendquery = FALSE;
1696 bool equalsencode = FALSE;
1697 size_t nalloc;
1698
1699 if(!u)
1700 return CURLUE_BAD_HANDLE;
1701 if(!part) {
1702 /* setting a part to NULL clears it */
1703 switch(what) {
1704 case CURLUPART_URL:
1705 break;
1706 case CURLUPART_SCHEME:
1707 storep = &u->scheme;
1708 break;
1709 case CURLUPART_USER:
1710 storep = &u->user;
1711 break;
1712 case CURLUPART_PASSWORD:
1713 storep = &u->password;
1714 break;
1715 case CURLUPART_OPTIONS:
1716 storep = &u->options;
1717 break;
1718 case CURLUPART_HOST:
1719 storep = &u->host;
1720 break;
1721 case CURLUPART_ZONEID:
1722 storep = &u->zoneid;
1723 break;
1724 case CURLUPART_PORT:
1725 u->portnum = 0;
1726 storep = &u->port;
1727 break;
1728 case CURLUPART_PATH:
1729 storep = &u->path;
1730 break;
1731 case CURLUPART_QUERY:
1732 storep = &u->query;
1733 break;
1734 case CURLUPART_FRAGMENT:
1735 storep = &u->fragment;
1736 break;
1737 default:
1738 return CURLUE_UNKNOWN_PART;
1739 }
1740 if(storep && *storep) {
1741 Curl_safefree(*storep);
1742 }
1743 else if(!storep) {
1744 free_urlhandle(u);
1745 memset(u, 0, sizeof(struct Curl_URL));
1746 }
1747 return CURLUE_OK;
1748 }
1749
1750 nalloc = strlen(part);
1751 if(nalloc > CURL_MAX_INPUT_LENGTH)
1752 /* excessive input length */
1753 return CURLUE_MALFORMED_INPUT;
1754
1755 switch(what) {
1756 case CURLUPART_SCHEME: {
1757 size_t plen = strlen(part);
1758 const char *s = part;
1759 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1760 /* too long or too short */
1761 return CURLUE_BAD_SCHEME;
1762 /* verify that it is a fine scheme */
1763 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1764 return CURLUE_UNSUPPORTED_SCHEME;
1765 storep = &u->scheme;
1766 urlencode = FALSE; /* never */
1767 if(ISALPHA(*s)) {
1768 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1769 while(--plen) {
1770 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1771 s++; /* fine */
1772 else
1773 return CURLUE_BAD_SCHEME;
1774 }
1775 }
1776 else
1777 return CURLUE_BAD_SCHEME;
1778 break;
1779 }
1780 case CURLUPART_USER:
1781 storep = &u->user;
1782 break;
1783 case CURLUPART_PASSWORD:
1784 storep = &u->password;
1785 break;
1786 case CURLUPART_OPTIONS:
1787 storep = &u->options;
1788 break;
1789 case CURLUPART_HOST:
1790 storep = &u->host;
1791 Curl_safefree(u->zoneid);
1792 break;
1793 case CURLUPART_ZONEID:
1794 storep = &u->zoneid;
1795 break;
1796 case CURLUPART_PORT:
1797 {
1798 char *endp;
1799 urlencode = FALSE; /* never */
1800 port = strtol(part, &endp, 10); /* Port number must be decimal */
1801 if((port <= 0) || (port > 0xffff))
1802 return CURLUE_BAD_PORT_NUMBER;
1803 if(*endp)
1804 /* weirdly provided number, not good! */
1805 return CURLUE_BAD_PORT_NUMBER;
1806 storep = &u->port;
1807 }
1808 break;
1809 case CURLUPART_PATH:
1810 urlskipslash = TRUE;
1811 leadingslash = TRUE; /* enforce */
1812 storep = &u->path;
1813 break;
1814 case CURLUPART_QUERY:
1815 plusencode = urlencode;
1816 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1817 equalsencode = appendquery;
1818 storep = &u->query;
1819 break;
1820 case CURLUPART_FRAGMENT:
1821 storep = &u->fragment;
1822 break;
1823 case CURLUPART_URL: {
1824 /*
1825 * Allow a new URL to replace the existing (if any) contents.
1826 *
1827 * If the existing contents is enough for a URL, allow a relative URL to
1828 * replace it.
1829 */
1830 CURLcode result;
1831 CURLUcode uc;
1832 char *oldurl;
1833 char *redired_url;
1834
1835 if(!nalloc)
1836 /* a blank URL is not a valid URL */
1837 return CURLUE_MALFORMED_INPUT;
1838
1839 /* if the new thing is absolute or the old one is not
1840 * (we could not get an absolute url in 'oldurl'),
1841 * then replace the existing with the new. */
1842 if(Curl_is_absolute_url(part, NULL, 0,
1843 flags & (CURLU_GUESS_SCHEME|
1844 CURLU_DEFAULT_SCHEME))
1845 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1846 return parseurl_and_replace(part, u, flags);
1847 }
1848
1849 /* apply the relative part to create a new URL
1850 * and replace the existing one with it. */
1851 result = concat_url(oldurl, part, &redired_url);
1852 free(oldurl);
1853 if(result)
1854 return cc2cu(result);
1855
1856 uc = parseurl_and_replace(redired_url, u, flags);
1857 free(redired_url);
1858 return uc;
1859 }
1860 default:
1861 return CURLUE_UNKNOWN_PART;
1862 }
1863 DEBUGASSERT(storep);
1864 {
1865 const char *newp;
1866 struct dynbuf enc;
1867 Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1868
1869 if(leadingslash && (part[0] != '/')) {
1870 CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1871 if(result)
1872 return cc2cu(result);
1873 }
1874 if(urlencode) {
1875 const unsigned char *i;
1876
1877 for(i = (const unsigned char *)part; *i; i++) {
1878 CURLcode result;
1879 if((*i == ' ') && plusencode) {
1880 result = Curl_dyn_addn(&enc, "+", 1);
1881 if(result)
1882 return CURLUE_OUT_OF_MEMORY;
1883 }
1884 else if(ISUNRESERVED(*i) ||
1885 ((*i == '/') && urlskipslash) ||
1886 ((*i == '=') && equalsencode)) {
1887 if((*i == '=') && equalsencode)
1888 /* only skip the first equals sign */
1889 equalsencode = FALSE;
1890 result = Curl_dyn_addn(&enc, i, 1);
1891 if(result)
1892 return cc2cu(result);
1893 }
1894 else {
1895 char out[3]={'%'};
1896 out[1] = hexdigits[*i>>4];
1897 out[2] = hexdigits[*i & 0xf];
1898 result = Curl_dyn_addn(&enc, out, 3);
1899 if(result)
1900 return cc2cu(result);
1901 }
1902 }
1903 }
1904 else {
1905 char *p;
1906 CURLcode result = Curl_dyn_add(&enc, part);
1907 if(result)
1908 return cc2cu(result);
1909 p = Curl_dyn_ptr(&enc);
1910 while(*p) {
1911 /* make sure percent encoded are lower case */
1912 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1913 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1914 p[1] = Curl_raw_tolower(p[1]);
1915 p[2] = Curl_raw_tolower(p[2]);
1916 p += 3;
1917 }
1918 else
1919 p++;
1920 }
1921 }
1922 newp = Curl_dyn_ptr(&enc);
1923
1924 if(appendquery && newp) {
1925 /* Append the 'newp' string onto the old query. Add a '&' separator if
1926 none is present at the end of the existing query already */
1927
1928 size_t querylen = u->query ? strlen(u->query) : 0;
1929 bool addamperand = querylen && (u->query[querylen -1] != '&');
1930 if(querylen) {
1931 struct dynbuf qbuf;
1932 Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1933
1934 if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1935 goto nomem;
1936
1937 if(addamperand) {
1938 if(Curl_dyn_addn(&qbuf, "&", 1))
1939 goto nomem;
1940 }
1941 if(Curl_dyn_add(&qbuf, newp))
1942 goto nomem;
1943 Curl_dyn_free(&enc);
1944 free(*storep);
1945 *storep = Curl_dyn_ptr(&qbuf);
1946 return CURLUE_OK;
1947 nomem:
1948 Curl_dyn_free(&enc);
1949 return CURLUE_OUT_OF_MEMORY;
1950 }
1951 }
1952
1953 else if(what == CURLUPART_HOST) {
1954 size_t n = Curl_dyn_len(&enc);
1955 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1956 /* Skip hostname check, it's allowed to be empty. */
1957 }
1958 else {
1959 if(!n || hostname_check(u, (char *)newp, n)) {
1960 Curl_dyn_free(&enc);
1961 return CURLUE_BAD_HOSTNAME;
1962 }
1963 }
1964 }
1965
1966 free(*storep);
1967 *storep = (char *)newp;
1968 }
1969 /* set after the string, to make it not assigned if the allocation above
1970 fails */
1971 if(port)
1972 u->portnum = port;
1973 return CURLUE_OK;
1974 }
1975