• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 #include "curl_memrchr.h"
38 
39 /* The last 3 #include files should be in this order */
40 #include "curl_printf.h"
41 #include "curl_memory.h"
42 #include "memdebug.h"
43 
44   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45 #define STARTS_WITH_DRIVE_PREFIX(str) \
46   ((('a' <= str[0] && str[0] <= 'z') || \
47     ('A' <= str[0] && str[0] <= 'Z')) && \
48    (str[1] == ':'))
49 
50   /* MSDOS/Windows style drive prefix, optionally with
51    * a '|' instead of ':', followed by a slash or NUL */
52 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55    ((str)[1] == ':' || (str)[1] == '|') && \
56    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57 
58 /* scheme is not URL encoded, the longest libcurl supported ones are... */
59 #define MAX_SCHEME_LEN 40
60 
61 /*
62  * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63  * sure we have _some_ value for AF_INET6 without polluting our fake value
64  * everywhere.
65  */
66 #if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67 #define AF_INET6 (AF_INET + 1)
68 #endif
69 
70 /* Internal representation of CURLU. Point to URL-encoded strings. */
71 struct Curl_URL {
72   char *scheme;
73   char *user;
74   char *password;
75   char *options; /* IMAP only? */
76   char *host;
77   char *zoneid; /* for numerical IPv6 addresses */
78   char *port;
79   char *path;
80   char *query;
81   char *fragment;
82   long portnum; /* the numerical version */
83 };
84 
85 #define DEFAULT_SCHEME "https"
86 
free_urlhandle(struct Curl_URL * u)87 static void free_urlhandle(struct Curl_URL *u)
88 {
89   free(u->scheme);
90   free(u->user);
91   free(u->password);
92   free(u->options);
93   free(u->host);
94   free(u->zoneid);
95   free(u->port);
96   free(u->path);
97   free(u->query);
98   free(u->fragment);
99 }
100 
101 /*
102  * Find the separator at the end of the host name, or the '?' in cases like
103  * http://www.example.com?id=2380
104  */
find_host_sep(const char * url)105 static const char *find_host_sep(const char *url)
106 {
107   const char *sep;
108   const char *query;
109 
110   /* Find the start of the hostname */
111   sep = strstr(url, "//");
112   if(!sep)
113     sep = url;
114   else
115     sep += 2;
116 
117   query = strchr(sep, '?');
118   sep = strchr(sep, '/');
119 
120   if(!sep)
121     sep = url + strlen(url);
122 
123   if(!query)
124     query = url + strlen(url);
125 
126   return sep < query ? sep : query;
127 }
128 
129 /* convert CURLcode to CURLUcode */
130 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
131                   CURLUE_OUT_OF_MEMORY)
132 /*
133  * Decide whether a character in a URL must be escaped.
134  */
135 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
136 
137 static const char hexdigits[] = "0123456789abcdef";
138 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
139  * spaces in the source URL accordingly.
140  *
141  * URL encoding should be skipped for host names, otherwise IDN resolution
142  * will fail.
143  */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)144 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
145                                size_t len, bool relative,
146                                bool query)
147 {
148   /* we must add this with whitespace-replacing */
149   bool left = !query;
150   const unsigned char *iptr;
151   const unsigned char *host_sep = (const unsigned char *) url;
152   CURLcode result;
153 
154   if(!relative)
155     host_sep = (const unsigned char *) find_host_sep(url);
156 
157   for(iptr = (unsigned char *)url;    /* read from here */
158       len; iptr++, len--) {
159 
160     if(iptr < host_sep) {
161       result = Curl_dyn_addn(o, iptr, 1);
162       if(result)
163         return cc2cu(result);
164       continue;
165     }
166 
167     if(*iptr == ' ') {
168       if(left)
169         result = Curl_dyn_addn(o, "%20", 3);
170       else
171         result = Curl_dyn_addn(o, "+", 1);
172       if(result)
173         return cc2cu(result);
174       continue;
175     }
176 
177     if(*iptr == '?')
178       left = FALSE;
179 
180     if(urlchar_needs_escaping(*iptr)) {
181       char out[3]={'%'};
182       out[1] = hexdigits[*iptr>>4];
183       out[2] = hexdigits[*iptr & 0xf];
184       result = Curl_dyn_addn(o, out, 3);
185     }
186     else
187       result = Curl_dyn_addn(o, iptr, 1);
188     if(result)
189       return cc2cu(result);
190   }
191 
192   return CURLUE_OK;
193 }
194 
195 /*
196  * Returns the length of the scheme if the given URL is absolute (as opposed
197  * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
198  * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
199  *
200  * If 'guess_scheme' is TRUE, it means the URL might be provided without
201  * scheme.
202  */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)203 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
204                             bool guess_scheme)
205 {
206   int i = 0;
207   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
208   (void)buflen; /* only used in debug-builds */
209   if(buf)
210     buf[0] = 0; /* always leave a defined value in buf */
211 #ifdef _WIN32
212   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
213     return 0;
214 #endif
215   if(ISALPHA(url[0]))
216     for(i = 1; i < MAX_SCHEME_LEN; ++i) {
217       char s = url[i];
218       if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
219         /* RFC 3986 3.1 explains:
220            scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
221         */
222       }
223       else {
224         break;
225       }
226     }
227   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
228     /* If this does not guess scheme, the scheme always ends with the colon so
229        that this also detects data: URLs etc. In guessing mode, data: could
230        be the host name "data" with a specified port number. */
231 
232     /* the length of the scheme is the name part only */
233     size_t len = i;
234     if(buf) {
235       buf[i] = 0;
236       while(i--) {
237         buf[i] = Curl_raw_tolower(url[i]);
238       }
239     }
240     return len;
241   }
242   return 0;
243 }
244 
245 /*
246  * Concatenate a relative URL to a base URL making it absolute.
247  * URL-encodes any spaces.
248  * The returned pointer must be freed by the caller unless NULL
249  * (returns NULL on out of memory).
250  *
251  * Note that this function destroys the 'base' string.
252  */
concat_url(char * base,const char * relurl,char ** newurl)253 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254 {
255   /***
256    TRY to append this new path to the old URL
257    to the right of the host part. Oh crap, this is doomed to cause
258    problems in the future...
259   */
260   struct dynbuf newest;
261   char *protsep;
262   char *pathsep;
263   bool host_changed = FALSE;
264   const char *useurl = relurl;
265   CURLcode result = CURLE_OK;
266   CURLUcode uc;
267   *newurl = NULL;
268 
269   /* protsep points to the start of the host name */
270   protsep = strstr(base, "//");
271   if(!protsep)
272     protsep = base;
273   else
274     protsep += 2; /* pass the slashes */
275 
276   if('/' != relurl[0]) {
277     int level = 0;
278 
279     /* First we need to find out if there's a ?-letter in the URL,
280        and cut it and the right-side of that off */
281     pathsep = strchr(protsep, '?');
282     if(pathsep)
283       *pathsep = 0;
284 
285     /* we have a relative path to append to the last slash if there's one
286        available, or if the new URL is just a query string (starts with a
287        '?')  we append the new one at the end of the entire currently worked
288        out URL */
289     if(useurl[0] != '?') {
290       pathsep = strrchr(protsep, '/');
291       if(pathsep)
292         *pathsep = 0;
293     }
294 
295     /* Check if there's any slash after the host name, and if so, remember
296        that position instead */
297     pathsep = strchr(protsep, '/');
298     if(pathsep)
299       protsep = pathsep + 1;
300     else
301       protsep = NULL;
302 
303     /* now deal with one "./" or any amount of "../" in the newurl
304        and act accordingly */
305 
306     if((useurl[0] == '.') && (useurl[1] == '/'))
307       useurl += 2; /* just skip the "./" */
308 
309     while((useurl[0] == '.') &&
310           (useurl[1] == '.') &&
311           (useurl[2] == '/')) {
312       level++;
313       useurl += 3; /* pass the "../" */
314     }
315 
316     if(protsep) {
317       while(level--) {
318         /* cut off one more level from the right of the original URL */
319         pathsep = strrchr(protsep, '/');
320         if(pathsep)
321           *pathsep = 0;
322         else {
323           *protsep = 0;
324           break;
325         }
326       }
327     }
328   }
329   else {
330     /* We got a new absolute path for this server */
331 
332     if(relurl[1] == '/') {
333       /* the new URL starts with //, just keep the protocol part from the
334          original one */
335       *protsep = 0;
336       useurl = &relurl[2]; /* we keep the slashes from the original, so we
337                               skip the new ones */
338       host_changed = TRUE;
339     }
340     else {
341       /* cut off the original URL from the first slash, or deal with URLs
342          without slash */
343       pathsep = strchr(protsep, '/');
344       if(pathsep) {
345         /* When people use badly formatted URLs, such as
346            "http://www.example.com?dir=/home/daniel" we must not use the first
347            slash, if there's a ?-letter before it! */
348         char *sep = strchr(protsep, '?');
349         if(sep && (sep < pathsep))
350           pathsep = sep;
351         *pathsep = 0;
352       }
353       else {
354         /* There was no slash. Now, since we might be operating on a badly
355            formatted URL, such as "http://www.example.com?id=2380" which
356            doesn't use a slash separator as it is supposed to, we need to check
357            for a ?-letter as well! */
358         pathsep = strchr(protsep, '?');
359         if(pathsep)
360           *pathsep = 0;
361       }
362     }
363   }
364 
365   Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
366 
367   /* copy over the root url part */
368   result = Curl_dyn_add(&newest, base);
369   if(result)
370     return result;
371 
372   /* check if we need to append a slash */
373   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
374     ;
375   else {
376     result = Curl_dyn_addn(&newest, "/", 1);
377     if(result)
378       return result;
379   }
380 
381   /* then append the new piece on the right side */
382   uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
383                      FALSE);
384   if(uc)
385     return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
386 
387   *newurl = Curl_dyn_ptr(&newest);
388   return CURLE_OK;
389 }
390 
391 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)392 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
393 {
394   static const char badbytes[]={
395     /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
398     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
399     0x7f, 0x00 /* null-terminate */
400   };
401   size_t n = strlen(url);
402   size_t nfine;
403 
404   if(n > CURL_MAX_INPUT_LENGTH)
405     /* excessive input length */
406     return CURLUE_MALFORMED_INPUT;
407 
408   nfine = strcspn(url, badbytes);
409   if((nfine != n) ||
410      (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
411     return CURLUE_MALFORMED_INPUT;
412 
413   *urllen = n;
414   return CURLUE_OK;
415 }
416 
417 /*
418  * parse_hostname_login()
419  *
420  * Parse the login details (user name, password and options) from the URL and
421  * strip them out of the host name
422  *
423  */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)424 static CURLUcode parse_hostname_login(struct Curl_URL *u,
425                                       const char *login,
426                                       size_t len,
427                                       unsigned int flags,
428                                       size_t *offset) /* to the host name */
429 {
430   CURLUcode result = CURLUE_OK;
431   CURLcode ccode;
432   char *userp = NULL;
433   char *passwdp = NULL;
434   char *optionsp = NULL;
435   const struct Curl_handler *h = NULL;
436 
437   /* At this point, we assume all the other special cases have been taken
438    * care of, so the host is at most
439    *
440    *   [user[:password][;options]]@]hostname
441    *
442    * We need somewhere to put the embedded details, so do that first.
443    */
444   char *ptr;
445 
446   DEBUGASSERT(login);
447 
448   *offset = 0;
449   ptr = memchr(login, '@', len);
450   if(!ptr)
451     goto out;
452 
453   /* We will now try to extract the
454    * possible login information in a string like:
455    * ftp://user:password@ftp.my.site:8021/README */
456   ptr++;
457 
458   /* if this is a known scheme, get some details */
459   if(u->scheme)
460     h = Curl_get_scheme_handler(u->scheme);
461 
462   /* We could use the login information in the URL so extract it. Only parse
463      options if the handler says we should. Note that 'h' might be NULL! */
464   ccode = Curl_parse_login_details(login, ptr - login - 1,
465                                    &userp, &passwdp,
466                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
467                                    &optionsp:NULL);
468   if(ccode) {
469     result = CURLUE_BAD_LOGIN;
470     goto out;
471   }
472 
473   if(userp) {
474     if(flags & CURLU_DISALLOW_USER) {
475       /* Option DISALLOW_USER is set and url contains username. */
476       result = CURLUE_USER_NOT_ALLOWED;
477       goto out;
478     }
479     free(u->user);
480     u->user = userp;
481   }
482 
483   if(passwdp) {
484     free(u->password);
485     u->password = passwdp;
486   }
487 
488   if(optionsp) {
489     free(u->options);
490     u->options = optionsp;
491   }
492 
493   /* the host name starts at this offset */
494   *offset = ptr - login;
495   return CURLUE_OK;
496 
497 out:
498 
499   free(userp);
500   free(passwdp);
501   free(optionsp);
502   u->user = NULL;
503   u->password = NULL;
504   u->options = NULL;
505 
506   return result;
507 }
508 
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)509 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
510                                    bool has_scheme)
511 {
512   char *portptr;
513   char *hostname = Curl_dyn_ptr(host);
514   /*
515    * Find the end of an IPv6 address on the ']' ending bracket.
516    */
517   if(hostname[0] == '[') {
518     portptr = strchr(hostname, ']');
519     if(!portptr)
520       return CURLUE_BAD_IPV6;
521     portptr++;
522     /* this is a RFC2732-style specified IP-address */
523     if(*portptr) {
524       if(*portptr != ':')
525         return CURLUE_BAD_PORT_NUMBER;
526     }
527     else
528       portptr = NULL;
529   }
530   else
531     portptr = strchr(hostname, ':');
532 
533   if(portptr) {
534     char *rest;
535     long port;
536     size_t keep = portptr - hostname;
537 
538     /* Browser behavior adaptation. If there's a colon with no digits after,
539        just cut off the name there which makes us ignore the colon and just
540        use the default port. Firefox, Chrome and Safari all do that.
541 
542        Don't do it if the URL has no scheme, to make something that looks like
543        a scheme not work!
544     */
545     Curl_dyn_setlen(host, keep);
546     portptr++;
547     if(!*portptr)
548       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
549 
550     if(!ISDIGIT(*portptr))
551       return CURLUE_BAD_PORT_NUMBER;
552 
553     port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
554 
555     if(port > 0xffff)
556       return CURLUE_BAD_PORT_NUMBER;
557 
558     if(rest[0])
559       return CURLUE_BAD_PORT_NUMBER;
560 
561     u->portnum = port;
562     /* generate a new port number string to get rid of leading zeroes etc */
563     free(u->port);
564     u->port = aprintf("%ld", port);
565     if(!u->port)
566       return CURLUE_OUT_OF_MEMORY;
567   }
568 
569   return CURLUE_OK;
570 }
571 
572 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)573 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
574                             size_t hlen) /* length of hostname */
575 {
576   size_t len;
577   DEBUGASSERT(*hostname == '[');
578   if(hlen < 4) /* '[::]' is the shortest possible valid string */
579     return CURLUE_BAD_IPV6;
580   hostname++;
581   hlen -= 2;
582 
583   /* only valid IPv6 letters are ok */
584   len = strspn(hostname, "0123456789abcdefABCDEF:.");
585 
586   if(hlen != len) {
587     hlen = len;
588     if(hostname[len] == '%') {
589       /* this could now be '%[zone id]' */
590       char zoneid[16];
591       int i = 0;
592       char *h = &hostname[len + 1];
593       /* pass '25' if present and is a url encoded percent sign */
594       if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595         h += 2;
596       while(*h && (*h != ']') && (i < 15))
597         zoneid[i++] = *h++;
598       if(!i || (']' != *h))
599         return CURLUE_BAD_IPV6;
600       zoneid[i] = 0;
601       u->zoneid = strdup(zoneid);
602       if(!u->zoneid)
603         return CURLUE_OUT_OF_MEMORY;
604       hostname[len] = ']'; /* insert end bracket */
605       hostname[len + 1] = 0; /* terminate the hostname */
606     }
607     else
608       return CURLUE_BAD_IPV6;
609     /* hostname is fine */
610   }
611 
612   /* Check the IPv6 address. */
613   {
614     char dest[16]; /* fits a binary IPv6 address */
615     char norm[MAX_IPADR_LEN];
616     hostname[hlen] = 0; /* end the address there */
617     if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618       return CURLUE_BAD_IPV6;
619 
620     /* check if it can be done shorter */
621     if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622        (strlen(norm) < hlen)) {
623       strcpy(hostname, norm);
624       hlen = strlen(norm);
625       hostname[hlen + 1] = 0;
626     }
627     hostname[hlen] = ']'; /* restore ending bracket */
628   }
629   return CURLUE_OK;
630 }
631 
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)632 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
633                                 size_t hlen) /* length of hostname */
634 {
635   size_t len;
636   DEBUGASSERT(hostname);
637 
638   if(!hlen)
639     return CURLUE_NO_HOST;
640   else if(hostname[0] == '[')
641     return ipv6_parse(u, hostname, hlen);
642   else {
643     /* letters from the second string are not ok */
644     len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
645     if(hlen != len)
646       /* hostname with bad content */
647       return CURLUE_BAD_HOSTNAME;
648   }
649   return CURLUE_OK;
650 }
651 
652 /*
653  * Handle partial IPv4 numerical addresses and different bases, like
654  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
655  *
656  * If the given input string is syntactically wrong IPv4 or any part for
657  * example is too big, this function returns HOST_NAME.
658  *
659  * Output the "normalized" version of that input string in plain quad decimal
660  * integers.
661  *
662  * Returns the host type.
663  */
664 
665 #define HOST_ERROR   -1 /* out of memory */
666 #define HOST_BAD     -2 /* bad IPv4 address */
667 
668 #define HOST_NAME    1
669 #define HOST_IPV4    2
670 #define HOST_IPV6    3
671 
ipv4_normalize(struct dynbuf * host)672 static int ipv4_normalize(struct dynbuf *host)
673 {
674   bool done = FALSE;
675   int n = 0;
676   const char *c = Curl_dyn_ptr(host);
677   unsigned long parts[4] = {0, 0, 0, 0};
678   CURLcode result = CURLE_OK;
679 
680   if(*c == '[')
681     return HOST_IPV6;
682 
683   while(!done) {
684     char *endp;
685     unsigned long l;
686     if(!ISDIGIT(*c))
687       /* most importantly this doesn't allow a leading plus or minus */
688       return HOST_NAME;
689     l = strtoul(c, &endp, 0);
690 
691     parts[n] = l;
692     c = endp;
693 
694     switch(*c) {
695     case '.':
696       if(n == 3)
697         return HOST_NAME;
698       n++;
699       c++;
700       break;
701 
702     case '\0':
703       done = TRUE;
704       break;
705 
706     default:
707       return HOST_NAME;
708     }
709 
710     /* overflow */
711     if((l == ULONG_MAX) && (errno == ERANGE))
712       return HOST_NAME;
713 
714 #if SIZEOF_LONG > 4
715     /* a value larger than 32 bits */
716     if(l > UINT_MAX)
717       return HOST_NAME;
718 #endif
719   }
720 
721   switch(n) {
722   case 0: /* a -- 32 bits */
723     Curl_dyn_reset(host);
724 
725     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
726                            (unsigned int)(parts[0] >> 24),
727                            (unsigned int)((parts[0] >> 16) & 0xff),
728                            (unsigned int)((parts[0] >> 8) & 0xff),
729                            (unsigned int)(parts[0] & 0xff));
730     break;
731   case 1: /* a.b -- 8.24 bits */
732     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
733       return HOST_NAME;
734     Curl_dyn_reset(host);
735     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
736                            (unsigned int)(parts[0]),
737                            (unsigned int)((parts[1] >> 16) & 0xff),
738                            (unsigned int)((parts[1] >> 8) & 0xff),
739                            (unsigned int)(parts[1] & 0xff));
740     break;
741   case 2: /* a.b.c -- 8.8.16 bits */
742     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
743       return HOST_NAME;
744     Curl_dyn_reset(host);
745     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
746                            (unsigned int)(parts[0]),
747                            (unsigned int)(parts[1]),
748                            (unsigned int)((parts[2] >> 8) & 0xff),
749                            (unsigned int)(parts[2] & 0xff));
750     break;
751   case 3: /* a.b.c.d -- 8.8.8.8 bits */
752     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
753        (parts[3] > 0xff))
754       return HOST_NAME;
755     Curl_dyn_reset(host);
756     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
757                            (unsigned int)(parts[0]),
758                            (unsigned int)(parts[1]),
759                            (unsigned int)(parts[2]),
760                            (unsigned int)(parts[3]));
761     break;
762   }
763   if(result)
764     return HOST_ERROR;
765   return HOST_IPV4;
766 }
767 
768 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)769 static CURLUcode urldecode_host(struct dynbuf *host)
770 {
771   char *per = NULL;
772   const char *hostname = Curl_dyn_ptr(host);
773   per = strchr(hostname, '%');
774   if(!per)
775     /* nothing to decode */
776     return CURLUE_OK;
777   else {
778     /* encoded */
779     size_t dlen;
780     char *decoded;
781     CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
782                                      REJECT_CTRL);
783     if(result)
784       return CURLUE_BAD_HOSTNAME;
785     Curl_dyn_reset(host);
786     result = Curl_dyn_addn(host, decoded, dlen);
787     free(decoded);
788     if(result)
789       return cc2cu(result);
790   }
791 
792   return CURLUE_OK;
793 }
794 
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)795 static CURLUcode parse_authority(struct Curl_URL *u,
796                                  const char *auth, size_t authlen,
797                                  unsigned int flags,
798                                  struct dynbuf *host,
799                                  bool has_scheme)
800 {
801   size_t offset;
802   CURLUcode uc;
803   CURLcode result;
804 
805   /*
806    * Parse the login details and strip them out of the host name.
807    */
808   uc = parse_hostname_login(u, auth, authlen, flags, &offset);
809   if(uc)
810     goto out;
811 
812   result = Curl_dyn_addn(host, auth + offset, authlen - offset);
813   if(result) {
814     uc = cc2cu(result);
815     goto out;
816   }
817 
818   uc = Curl_parse_port(u, host, has_scheme);
819   if(uc)
820     goto out;
821 
822   if(!Curl_dyn_len(host))
823     return CURLUE_NO_HOST;
824 
825   switch(ipv4_normalize(host)) {
826   case HOST_IPV4:
827     break;
828   case HOST_IPV6:
829     uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
830     break;
831   case HOST_NAME:
832     uc = urldecode_host(host);
833     if(!uc)
834       uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
835     break;
836   case HOST_ERROR:
837     uc = CURLUE_OUT_OF_MEMORY;
838     break;
839   case HOST_BAD:
840   default:
841     uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
842     break;
843   }
844 
845 out:
846   return uc;
847 }
848 
Curl_url_set_authority(CURLU * u,const char * authority,unsigned int flags)849 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
850                                  unsigned int flags)
851 {
852   CURLUcode result;
853   struct dynbuf host;
854 
855   DEBUGASSERT(authority);
856   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
857 
858   result = parse_authority(u, authority, strlen(authority), flags,
859                            &host, !!u->scheme);
860   if(result)
861     Curl_dyn_free(&host);
862   else {
863     free(u->host);
864     u->host = Curl_dyn_ptr(&host);
865   }
866   return result;
867 }
868 
869 /*
870  * "Remove Dot Segments"
871  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
872  */
873 
874 /*
875  * dedotdotify()
876  * @unittest: 1395
877  *
878  * This function gets a null-terminated path with dot and dotdot sequences
879  * passed in and strips them off according to the rules in RFC 3986 section
880  * 5.2.4.
881  *
882  * The function handles a query part ('?' + stuff) appended but it expects
883  * that fragments ('#' + stuff) have already been cut off.
884  *
885  * RETURNS
886  *
887  * Zero for success and 'out' set to an allocated dedotdotified string.
888  */
889 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)890 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
891 {
892   char *outptr;
893   const char *endp = &input[clen];
894   char *out;
895 
896   *outp = NULL;
897   /* the path always starts with a slash, and a slash has not dot */
898   if((clen < 2) || !memchr(input, '.', clen))
899     return 0;
900 
901   out = malloc(clen + 1);
902   if(!out)
903     return 1; /* out of memory */
904 
905   *out = 0; /* null-terminates, for inputs like "./" */
906   outptr = out;
907 
908   do {
909     bool dotdot = TRUE;
910     if(*input == '.') {
911       /*  A.  If the input buffer begins with a prefix of "../" or "./", then
912           remove that prefix from the input buffer; otherwise, */
913 
914       if(!strncmp("./", input, 2)) {
915         input += 2;
916         clen -= 2;
917       }
918       else if(!strncmp("../", input, 3)) {
919         input += 3;
920         clen -= 3;
921       }
922       /*  D.  if the input buffer consists only of "." or "..", then remove
923           that from the input buffer; otherwise, */
924 
925       else if(!strcmp(".", input) || !strcmp("..", input) ||
926               !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
927         *out = 0;
928         break;
929       }
930       else
931         dotdot = FALSE;
932     }
933     else if(*input == '/') {
934       /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
935           "."  is a complete path segment, then replace that prefix with "/" in
936           the input buffer; otherwise, */
937       if(!strncmp("/./", input, 3)) {
938         input += 2;
939         clen -= 2;
940       }
941       else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
942         *outptr++ = '/';
943         *outptr = 0;
944         break;
945       }
946 
947       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
948           where ".." is a complete path segment, then replace that prefix with
949           "/" in the input buffer and remove the last segment and its
950           preceding "/" (if any) from the output buffer; otherwise, */
951 
952       else if(!strncmp("/../", input, 4)) {
953         input += 3;
954         clen -= 3;
955         /* remove the last segment from the output buffer */
956         while(outptr > out) {
957           outptr--;
958           if(*outptr == '/')
959             break;
960         }
961         *outptr = 0; /* null-terminate where it stops */
962       }
963       else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
964         /* remove the last segment from the output buffer */
965         while(outptr > out) {
966           outptr--;
967           if(*outptr == '/')
968             break;
969         }
970         *outptr++ = '/';
971         *outptr = 0; /* null-terminate where it stops */
972         break;
973       }
974       else
975         dotdot = FALSE;
976     }
977     else
978       dotdot = FALSE;
979 
980     if(!dotdot) {
981       /*  E.  move the first path segment in the input buffer to the end of
982           the output buffer, including the initial "/" character (if any) and
983           any subsequent characters up to, but not including, the next "/"
984           character or the end of the input buffer. */
985 
986       do {
987         *outptr++ = *input++;
988         clen--;
989       } while(*input && (*input != '/') && (*input != '?'));
990       *outptr = 0;
991     }
992 
993     /* continue until end of path */
994   } while(input < endp);
995 
996   *outp = out;
997   return 0; /* success */
998 }
999 
parseurl(const char * url,CURLU * u,unsigned int flags)1000 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1001 {
1002   const char *path;
1003   size_t pathlen;
1004   char *query = NULL;
1005   char *fragment = NULL;
1006   char schemebuf[MAX_SCHEME_LEN + 1];
1007   size_t schemelen = 0;
1008   size_t urllen;
1009   CURLUcode result = CURLUE_OK;
1010   size_t fraglen = 0;
1011   struct dynbuf host;
1012 
1013   DEBUGASSERT(url);
1014 
1015   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1016 
1017   result = junkscan(url, &urllen, flags);
1018   if(result)
1019     goto fail;
1020 
1021   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1022                                    flags & (CURLU_GUESS_SCHEME|
1023                                             CURLU_DEFAULT_SCHEME));
1024 
1025   /* handle the file: scheme */
1026   if(schemelen && !strcmp(schemebuf, "file")) {
1027     bool uncpath = FALSE;
1028     if(urllen <= 6) {
1029       /* file:/ is not enough to actually be a complete file: URL */
1030       result = CURLUE_BAD_FILE_URL;
1031       goto fail;
1032     }
1033 
1034     /* path has been allocated large enough to hold this */
1035     path = (char *)&url[5];
1036     pathlen = urllen - 5;
1037 
1038     u->scheme = strdup("file");
1039     if(!u->scheme) {
1040       result = CURLUE_OUT_OF_MEMORY;
1041       goto fail;
1042     }
1043 
1044     /* Extra handling URLs with an authority component (i.e. that start with
1045      * "file://")
1046      *
1047      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1048      * RFC 8089, but not the (current) WHAT-WG URL spec.
1049      */
1050     if(path[0] == '/' && path[1] == '/') {
1051       /* swallow the two slashes */
1052       const char *ptr = &path[2];
1053 
1054       /*
1055        * According to RFC 8089, a file: URL can be reliably dereferenced if:
1056        *
1057        *  o it has no/blank hostname, or
1058        *
1059        *  o the hostname matches "localhost" (case-insensitively), or
1060        *
1061        *  o the hostname is a FQDN that resolves to this machine, or
1062        *
1063        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
1064        *    Appendix E.3).
1065        *
1066        * For brevity, we only consider URLs with empty, "localhost", or
1067        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1068        *
1069        * Additionally, there is an exception for URLs with a Windows drive
1070        * letter in the authority (which was accidentally omitted from RFC 8089
1071        * Appendix E, but believe me, it was meant to be there. --MK)
1072        */
1073       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1074         /* the URL includes a host name, it must match "localhost" or
1075            "127.0.0.1" to be valid */
1076         if(checkprefix("localhost/", ptr) ||
1077            checkprefix("127.0.0.1/", ptr)) {
1078           ptr += 9; /* now points to the slash after the host */
1079         }
1080         else {
1081 #if defined(_WIN32)
1082           size_t len;
1083 
1084           /* the host name, NetBIOS computer name, can not contain disallowed
1085              chars, and the delimiting slash character must be appended to the
1086              host name */
1087           path = strpbrk(ptr, "/\\:*?\"<>|");
1088           if(!path || *path != '/') {
1089             result = CURLUE_BAD_FILE_URL;
1090             goto fail;
1091           }
1092 
1093           len = path - ptr;
1094           if(len) {
1095             CURLcode code = Curl_dyn_addn(&host, ptr, len);
1096             if(code) {
1097               result = cc2cu(code);
1098               goto fail;
1099             }
1100             uncpath = TRUE;
1101           }
1102 
1103           ptr -= 2; /* now points to the // before the host in UNC */
1104 #else
1105           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1106              none */
1107           result = CURLUE_BAD_FILE_URL;
1108           goto fail;
1109 #endif
1110         }
1111       }
1112 
1113       path = ptr;
1114       pathlen = urllen - (ptr - url);
1115     }
1116 
1117     if(!uncpath)
1118       /* no host for file: URLs by default */
1119       Curl_dyn_reset(&host);
1120 
1121 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1122     /* Don't allow Windows drive letters when not in Windows.
1123      * This catches both "file:/c:" and "file:c:" */
1124     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1125        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1126       /* File drive letters are only accepted in MSDOS/Windows */
1127       result = CURLUE_BAD_FILE_URL;
1128       goto fail;
1129     }
1130 #else
1131     /* If the path starts with a slash and a drive letter, ditch the slash */
1132     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1133       /* This cannot be done with strcpy, as the memory chunks overlap! */
1134       path++;
1135       pathlen--;
1136     }
1137 #endif
1138 
1139   }
1140   else {
1141     /* clear path */
1142     const char *schemep = NULL;
1143     const char *hostp;
1144     size_t hostlen;
1145 
1146     if(schemelen) {
1147       int i = 0;
1148       const char *p = &url[schemelen + 1];
1149       while((*p == '/') && (i < 4)) {
1150         p++;
1151         i++;
1152       }
1153 
1154       schemep = schemebuf;
1155       if(!Curl_get_scheme_handler(schemep) &&
1156          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1157         result = CURLUE_UNSUPPORTED_SCHEME;
1158         goto fail;
1159       }
1160 
1161       if((i < 1) || (i > 3)) {
1162         /* less than one or more than three slashes */
1163         result = CURLUE_BAD_SLASHES;
1164         goto fail;
1165       }
1166       hostp = p; /* host name starts here */
1167     }
1168     else {
1169       /* no scheme! */
1170 
1171       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1172         result = CURLUE_BAD_SCHEME;
1173         goto fail;
1174       }
1175       if(flags & CURLU_DEFAULT_SCHEME)
1176         schemep = DEFAULT_SCHEME;
1177 
1178       /*
1179        * The URL was badly formatted, let's try without scheme specified.
1180        */
1181       hostp = url;
1182     }
1183 
1184     if(schemep) {
1185       u->scheme = strdup(schemep);
1186       if(!u->scheme) {
1187         result = CURLUE_OUT_OF_MEMORY;
1188         goto fail;
1189       }
1190     }
1191 
1192     /* find the end of the host name + port number */
1193     hostlen = strcspn(hostp, "/?#");
1194     path = &hostp[hostlen];
1195 
1196     /* this pathlen also contains the query and the fragment */
1197     pathlen = urllen - (path - url);
1198     if(hostlen) {
1199 
1200       result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1201       if(result)
1202         goto fail;
1203 
1204       if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1205         const char *hostname = Curl_dyn_ptr(&host);
1206         /* legacy curl-style guess based on host name */
1207         if(checkprefix("ftp.", hostname))
1208           schemep = "ftp";
1209         else if(checkprefix("dict.", hostname))
1210           schemep = "dict";
1211         else if(checkprefix("ldap.", hostname))
1212           schemep = "ldap";
1213         else if(checkprefix("imap.", hostname))
1214           schemep = "imap";
1215         else if(checkprefix("smtp.", hostname))
1216           schemep = "smtp";
1217         else if(checkprefix("pop3.", hostname))
1218           schemep = "pop3";
1219         else
1220           schemep = "http";
1221 
1222         u->scheme = strdup(schemep);
1223         if(!u->scheme) {
1224           result = CURLUE_OUT_OF_MEMORY;
1225           goto fail;
1226         }
1227       }
1228     }
1229     else if(flags & CURLU_NO_AUTHORITY) {
1230       /* allowed to be empty. */
1231       if(Curl_dyn_add(&host, "")) {
1232         result = CURLUE_OUT_OF_MEMORY;
1233         goto fail;
1234       }
1235     }
1236     else {
1237       result = CURLUE_NO_HOST;
1238       goto fail;
1239     }
1240   }
1241 
1242   fragment = strchr(path, '#');
1243   if(fragment) {
1244     fraglen = pathlen - (fragment - path);
1245     if(fraglen > 1) {
1246       /* skip the leading '#' in the copy but include the terminating null */
1247       if(flags & CURLU_URLENCODE) {
1248         struct dynbuf enc;
1249         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250         result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251         if(result)
1252           goto fail;
1253         u->fragment = Curl_dyn_ptr(&enc);
1254       }
1255       else {
1256         u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257         if(!u->fragment) {
1258           result = CURLUE_OUT_OF_MEMORY;
1259           goto fail;
1260         }
1261       }
1262     }
1263     /* after this, pathlen still contains the query */
1264     pathlen -= fraglen;
1265   }
1266 
1267   query = memchr(path, '?', pathlen);
1268   if(query) {
1269     size_t qlen = fragment ? (size_t)(fragment - query) :
1270       pathlen - (query - path);
1271     pathlen -= qlen;
1272     if(qlen > 1) {
1273       if(flags & CURLU_URLENCODE) {
1274         struct dynbuf enc;
1275         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1276         /* skip the leading question mark */
1277         result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1278         if(result)
1279           goto fail;
1280         u->query = Curl_dyn_ptr(&enc);
1281       }
1282       else {
1283         u->query = Curl_memdup0(query + 1, qlen - 1);
1284         if(!u->query) {
1285           result = CURLUE_OUT_OF_MEMORY;
1286           goto fail;
1287         }
1288       }
1289     }
1290     else {
1291       /* single byte query */
1292       u->query = strdup("");
1293       if(!u->query) {
1294         result = CURLUE_OUT_OF_MEMORY;
1295         goto fail;
1296       }
1297     }
1298   }
1299 
1300   if(pathlen && (flags & CURLU_URLENCODE)) {
1301     struct dynbuf enc;
1302     Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1303     result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1304     if(result)
1305       goto fail;
1306     pathlen = Curl_dyn_len(&enc);
1307     path = u->path = Curl_dyn_ptr(&enc);
1308   }
1309 
1310   if(pathlen <= 1) {
1311     /* there is no path left or just the slash, unset */
1312     path = NULL;
1313   }
1314   else {
1315     if(!u->path) {
1316       u->path = Curl_memdup0(path, pathlen);
1317       if(!u->path) {
1318         result = CURLUE_OUT_OF_MEMORY;
1319         goto fail;
1320       }
1321       path = u->path;
1322     }
1323     else if(flags & CURLU_URLENCODE)
1324       /* it might have encoded more than just the path so cut it */
1325       u->path[pathlen] = 0;
1326 
1327     if(!(flags & CURLU_PATH_AS_IS)) {
1328       /* remove ../ and ./ sequences according to RFC3986 */
1329       char *dedot;
1330       int err = dedotdotify((char *)path, pathlen, &dedot);
1331       if(err) {
1332         result = CURLUE_OUT_OF_MEMORY;
1333         goto fail;
1334       }
1335       if(dedot) {
1336         free(u->path);
1337         u->path = dedot;
1338       }
1339     }
1340   }
1341 
1342   u->host = Curl_dyn_ptr(&host);
1343 
1344   return result;
1345 fail:
1346   Curl_dyn_free(&host);
1347   free_urlhandle(u);
1348   return result;
1349 }
1350 
1351 /*
1352  * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1353  */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1354 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1355                                       unsigned int flags)
1356 {
1357   CURLUcode result;
1358   CURLU tmpurl;
1359   memset(&tmpurl, 0, sizeof(tmpurl));
1360   result = parseurl(url, &tmpurl, flags);
1361   if(!result) {
1362     free_urlhandle(u);
1363     *u = tmpurl;
1364   }
1365   return result;
1366 }
1367 
1368 /*
1369  */
curl_url(void)1370 CURLU *curl_url(void)
1371 {
1372   return calloc(1, sizeof(struct Curl_URL));
1373 }
1374 
curl_url_cleanup(CURLU * u)1375 void curl_url_cleanup(CURLU *u)
1376 {
1377   if(u) {
1378     free_urlhandle(u);
1379     free(u);
1380   }
1381 }
1382 
1383 #define DUP(dest, src, name)                    \
1384   do {                                          \
1385     if(src->name) {                             \
1386       dest->name = strdup(src->name);           \
1387       if(!dest->name)                           \
1388         goto fail;                              \
1389     }                                           \
1390   } while(0)
1391 
curl_url_dup(const CURLU * in)1392 CURLU *curl_url_dup(const CURLU *in)
1393 {
1394   struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1395   if(u) {
1396     DUP(u, in, scheme);
1397     DUP(u, in, user);
1398     DUP(u, in, password);
1399     DUP(u, in, options);
1400     DUP(u, in, host);
1401     DUP(u, in, port);
1402     DUP(u, in, path);
1403     DUP(u, in, query);
1404     DUP(u, in, fragment);
1405     DUP(u, in, zoneid);
1406     u->portnum = in->portnum;
1407   }
1408   return u;
1409 fail:
1410   curl_url_cleanup(u);
1411   return NULL;
1412 }
1413 
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1414 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1415                        char **part, unsigned int flags)
1416 {
1417   const char *ptr;
1418   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1419   char portbuf[7];
1420   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1421   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1422   bool punycode = FALSE;
1423   bool depunyfy = FALSE;
1424   bool plusdecode = FALSE;
1425   (void)flags;
1426   if(!u)
1427     return CURLUE_BAD_HANDLE;
1428   if(!part)
1429     return CURLUE_BAD_PARTPOINTER;
1430   *part = NULL;
1431 
1432   switch(what) {
1433   case CURLUPART_SCHEME:
1434     ptr = u->scheme;
1435     ifmissing = CURLUE_NO_SCHEME;
1436     urldecode = FALSE; /* never for schemes */
1437     break;
1438   case CURLUPART_USER:
1439     ptr = u->user;
1440     ifmissing = CURLUE_NO_USER;
1441     break;
1442   case CURLUPART_PASSWORD:
1443     ptr = u->password;
1444     ifmissing = CURLUE_NO_PASSWORD;
1445     break;
1446   case CURLUPART_OPTIONS:
1447     ptr = u->options;
1448     ifmissing = CURLUE_NO_OPTIONS;
1449     break;
1450   case CURLUPART_HOST:
1451     ptr = u->host;
1452     ifmissing = CURLUE_NO_HOST;
1453     punycode = (flags & CURLU_PUNYCODE)?1:0;
1454     depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1455     break;
1456   case CURLUPART_ZONEID:
1457     ptr = u->zoneid;
1458     ifmissing = CURLUE_NO_ZONEID;
1459     break;
1460   case CURLUPART_PORT:
1461     ptr = u->port;
1462     ifmissing = CURLUE_NO_PORT;
1463     urldecode = FALSE; /* never for port */
1464     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1465       /* there's no stored port number, but asked to deliver
1466          a default one for the scheme */
1467       const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1468       if(h) {
1469         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1470         ptr = portbuf;
1471       }
1472     }
1473     else if(ptr && u->scheme) {
1474       /* there is a stored port number, but ask to inhibit if
1475          it matches the default one for the scheme */
1476       const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1477       if(h && (h->defport == u->portnum) &&
1478          (flags & CURLU_NO_DEFAULT_PORT))
1479         ptr = NULL;
1480     }
1481     break;
1482   case CURLUPART_PATH:
1483     ptr = u->path;
1484     if(!ptr)
1485       ptr = "/";
1486     break;
1487   case CURLUPART_QUERY:
1488     ptr = u->query;
1489     ifmissing = CURLUE_NO_QUERY;
1490     plusdecode = urldecode;
1491     break;
1492   case CURLUPART_FRAGMENT:
1493     ptr = u->fragment;
1494     ifmissing = CURLUE_NO_FRAGMENT;
1495     break;
1496   case CURLUPART_URL: {
1497     char *url;
1498     char *scheme;
1499     char *options = u->options;
1500     char *port = u->port;
1501     char *allochost = NULL;
1502     punycode = (flags & CURLU_PUNYCODE)?1:0;
1503     depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1504     if(u->scheme && strcasecompare("file", u->scheme)) {
1505       url = aprintf("file://%s%s%s",
1506                     u->path,
1507                     u->fragment? "#": "",
1508                     u->fragment? u->fragment : "");
1509     }
1510     else if(!u->host)
1511       return CURLUE_NO_HOST;
1512     else {
1513       const struct Curl_handler *h = NULL;
1514       if(u->scheme)
1515         scheme = u->scheme;
1516       else if(flags & CURLU_DEFAULT_SCHEME)
1517         scheme = (char *) DEFAULT_SCHEME;
1518       else
1519         return CURLUE_NO_SCHEME;
1520 
1521       h = Curl_get_scheme_handler(scheme);
1522       if(!port && (flags & CURLU_DEFAULT_PORT)) {
1523         /* there's no stored port number, but asked to deliver
1524            a default one for the scheme */
1525         if(h) {
1526           msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1527           port = portbuf;
1528         }
1529       }
1530       else if(port) {
1531         /* there is a stored port number, but asked to inhibit if it matches
1532            the default one for the scheme */
1533         if(h && (h->defport == u->portnum) &&
1534            (flags & CURLU_NO_DEFAULT_PORT))
1535           port = NULL;
1536       }
1537 
1538       if(h && !(h->flags & PROTOPT_URLOPTIONS))
1539         options = NULL;
1540 
1541       if(u->host[0] == '[') {
1542         if(u->zoneid) {
1543           /* make it '[ host %25 zoneid ]' */
1544           struct dynbuf enc;
1545           size_t hostlen = strlen(u->host);
1546           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1547           if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1548                            u->zoneid))
1549             return CURLUE_OUT_OF_MEMORY;
1550           allochost = Curl_dyn_ptr(&enc);
1551         }
1552       }
1553       else if(urlencode) {
1554         allochost = curl_easy_escape(NULL, u->host, 0);
1555         if(!allochost)
1556           return CURLUE_OUT_OF_MEMORY;
1557       }
1558       else if(punycode) {
1559         if(!Curl_is_ASCII_name(u->host)) {
1560 #ifndef USE_IDN
1561           return CURLUE_LACKS_IDN;
1562 #else
1563           CURLcode result = Curl_idn_decode(u->host, &allochost);
1564           if(result)
1565             return (result == CURLE_OUT_OF_MEMORY) ?
1566               CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1567 #endif
1568         }
1569       }
1570       else if(depunyfy) {
1571         if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1572 #ifndef USE_IDN
1573           return CURLUE_LACKS_IDN;
1574 #else
1575           CURLcode result = Curl_idn_encode(u->host, &allochost);
1576           if(result)
1577             /* this is the most likely error */
1578             return (result == CURLE_OUT_OF_MEMORY) ?
1579               CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1580 #endif
1581         }
1582       }
1583 
1584       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1585                     scheme,
1586                     u->user ? u->user : "",
1587                     u->password ? ":": "",
1588                     u->password ? u->password : "",
1589                     options ? ";" : "",
1590                     options ? options : "",
1591                     (u->user || u->password || options) ? "@": "",
1592                     allochost ? allochost : u->host,
1593                     port ? ":": "",
1594                     port ? port : "",
1595                     u->path ? u->path : "/",
1596                     (u->query && u->query[0]) ? "?": "",
1597                     (u->query && u->query[0]) ? u->query : "",
1598                     u->fragment? "#": "",
1599                     u->fragment? u->fragment : "");
1600       free(allochost);
1601     }
1602     if(!url)
1603       return CURLUE_OUT_OF_MEMORY;
1604     *part = url;
1605     return CURLUE_OK;
1606   }
1607   default:
1608     ptr = NULL;
1609     break;
1610   }
1611   if(ptr) {
1612     size_t partlen = strlen(ptr);
1613     size_t i = 0;
1614     *part = Curl_memdup0(ptr, partlen);
1615     if(!*part)
1616       return CURLUE_OUT_OF_MEMORY;
1617     if(plusdecode) {
1618       /* convert + to space */
1619       char *plus = *part;
1620       for(i = 0; i < partlen; ++plus, i++) {
1621         if(*plus == '+')
1622           *plus = ' ';
1623       }
1624     }
1625     if(urldecode) {
1626       char *decoded;
1627       size_t dlen;
1628       /* this unconditional rejection of control bytes is documented
1629          API behavior */
1630       CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1631       free(*part);
1632       if(res) {
1633         *part = NULL;
1634         return CURLUE_URLDECODE;
1635       }
1636       *part = decoded;
1637       partlen = dlen;
1638     }
1639     if(urlencode) {
1640       struct dynbuf enc;
1641       CURLUcode uc;
1642       Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1643       uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1644       if(uc)
1645         return uc;
1646       free(*part);
1647       *part = Curl_dyn_ptr(&enc);
1648     }
1649     else if(punycode) {
1650       if(!Curl_is_ASCII_name(u->host)) {
1651 #ifndef USE_IDN
1652         return CURLUE_LACKS_IDN;
1653 #else
1654         char *allochost;
1655         CURLcode result = Curl_idn_decode(*part, &allochost);
1656         if(result)
1657           return (result == CURLE_OUT_OF_MEMORY) ?
1658             CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1659         free(*part);
1660         *part = allochost;
1661 #endif
1662       }
1663     }
1664     else if(depunyfy) {
1665       if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
1666 #ifndef USE_IDN
1667         return CURLUE_LACKS_IDN;
1668 #else
1669         char *allochost;
1670         CURLcode result = Curl_idn_encode(*part, &allochost);
1671         if(result)
1672           return (result == CURLE_OUT_OF_MEMORY) ?
1673             CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1674         free(*part);
1675         *part = allochost;
1676 #endif
1677       }
1678     }
1679 
1680     return CURLUE_OK;
1681   }
1682   else
1683     return ifmissing;
1684 }
1685 
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1686 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1687                        const char *part, unsigned int flags)
1688 {
1689   char **storep = NULL;
1690   long port = 0;
1691   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1692   bool plusencode = FALSE;
1693   bool urlskipslash = FALSE;
1694   bool leadingslash = FALSE;
1695   bool appendquery = FALSE;
1696   bool equalsencode = FALSE;
1697   size_t nalloc;
1698 
1699   if(!u)
1700     return CURLUE_BAD_HANDLE;
1701   if(!part) {
1702     /* setting a part to NULL clears it */
1703     switch(what) {
1704     case CURLUPART_URL:
1705       break;
1706     case CURLUPART_SCHEME:
1707       storep = &u->scheme;
1708       break;
1709     case CURLUPART_USER:
1710       storep = &u->user;
1711       break;
1712     case CURLUPART_PASSWORD:
1713       storep = &u->password;
1714       break;
1715     case CURLUPART_OPTIONS:
1716       storep = &u->options;
1717       break;
1718     case CURLUPART_HOST:
1719       storep = &u->host;
1720       break;
1721     case CURLUPART_ZONEID:
1722       storep = &u->zoneid;
1723       break;
1724     case CURLUPART_PORT:
1725       u->portnum = 0;
1726       storep = &u->port;
1727       break;
1728     case CURLUPART_PATH:
1729       storep = &u->path;
1730       break;
1731     case CURLUPART_QUERY:
1732       storep = &u->query;
1733       break;
1734     case CURLUPART_FRAGMENT:
1735       storep = &u->fragment;
1736       break;
1737     default:
1738       return CURLUE_UNKNOWN_PART;
1739     }
1740     if(storep && *storep) {
1741       Curl_safefree(*storep);
1742     }
1743     else if(!storep) {
1744       free_urlhandle(u);
1745       memset(u, 0, sizeof(struct Curl_URL));
1746     }
1747     return CURLUE_OK;
1748   }
1749 
1750   nalloc = strlen(part);
1751   if(nalloc > CURL_MAX_INPUT_LENGTH)
1752     /* excessive input length */
1753     return CURLUE_MALFORMED_INPUT;
1754 
1755   switch(what) {
1756   case CURLUPART_SCHEME: {
1757     size_t plen = strlen(part);
1758     const char *s = part;
1759     if((plen > MAX_SCHEME_LEN) || (plen < 1))
1760       /* too long or too short */
1761       return CURLUE_BAD_SCHEME;
1762    /* verify that it is a fine scheme */
1763     if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1764       return CURLUE_UNSUPPORTED_SCHEME;
1765     storep = &u->scheme;
1766     urlencode = FALSE; /* never */
1767     if(ISALPHA(*s)) {
1768       /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1769       while(--plen) {
1770         if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1771           s++; /* fine */
1772         else
1773           return CURLUE_BAD_SCHEME;
1774       }
1775     }
1776     else
1777       return CURLUE_BAD_SCHEME;
1778     break;
1779   }
1780   case CURLUPART_USER:
1781     storep = &u->user;
1782     break;
1783   case CURLUPART_PASSWORD:
1784     storep = &u->password;
1785     break;
1786   case CURLUPART_OPTIONS:
1787     storep = &u->options;
1788     break;
1789   case CURLUPART_HOST:
1790     storep = &u->host;
1791     Curl_safefree(u->zoneid);
1792     break;
1793   case CURLUPART_ZONEID:
1794     storep = &u->zoneid;
1795     break;
1796   case CURLUPART_PORT:
1797   {
1798     char *endp;
1799     urlencode = FALSE; /* never */
1800     port = strtol(part, &endp, 10);  /* Port number must be decimal */
1801     if((port <= 0) || (port > 0xffff))
1802       return CURLUE_BAD_PORT_NUMBER;
1803     if(*endp)
1804       /* weirdly provided number, not good! */
1805       return CURLUE_BAD_PORT_NUMBER;
1806     storep = &u->port;
1807   }
1808   break;
1809   case CURLUPART_PATH:
1810     urlskipslash = TRUE;
1811     leadingslash = TRUE; /* enforce */
1812     storep = &u->path;
1813     break;
1814   case CURLUPART_QUERY:
1815     plusencode = urlencode;
1816     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1817     equalsencode = appendquery;
1818     storep = &u->query;
1819     break;
1820   case CURLUPART_FRAGMENT:
1821     storep = &u->fragment;
1822     break;
1823   case CURLUPART_URL: {
1824     /*
1825      * Allow a new URL to replace the existing (if any) contents.
1826      *
1827      * If the existing contents is enough for a URL, allow a relative URL to
1828      * replace it.
1829      */
1830     CURLcode result;
1831     CURLUcode uc;
1832     char *oldurl;
1833     char *redired_url;
1834 
1835     if(!nalloc)
1836       /* a blank URL is not a valid URL */
1837       return CURLUE_MALFORMED_INPUT;
1838 
1839     /* if the new thing is absolute or the old one is not
1840      * (we could not get an absolute url in 'oldurl'),
1841      * then replace the existing with the new. */
1842     if(Curl_is_absolute_url(part, NULL, 0,
1843                             flags & (CURLU_GUESS_SCHEME|
1844                                      CURLU_DEFAULT_SCHEME))
1845        || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1846       return parseurl_and_replace(part, u, flags);
1847     }
1848 
1849     /* apply the relative part to create a new URL
1850      * and replace the existing one with it. */
1851     result = concat_url(oldurl, part, &redired_url);
1852     free(oldurl);
1853     if(result)
1854       return cc2cu(result);
1855 
1856     uc = parseurl_and_replace(redired_url, u, flags);
1857     free(redired_url);
1858     return uc;
1859   }
1860   default:
1861     return CURLUE_UNKNOWN_PART;
1862   }
1863   DEBUGASSERT(storep);
1864   {
1865     const char *newp;
1866     struct dynbuf enc;
1867     Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1868 
1869     if(leadingslash && (part[0] != '/')) {
1870       CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1871       if(result)
1872         return cc2cu(result);
1873     }
1874     if(urlencode) {
1875       const unsigned char *i;
1876 
1877       for(i = (const unsigned char *)part; *i; i++) {
1878         CURLcode result;
1879         if((*i == ' ') && plusencode) {
1880           result = Curl_dyn_addn(&enc, "+", 1);
1881           if(result)
1882             return CURLUE_OUT_OF_MEMORY;
1883         }
1884         else if(ISUNRESERVED(*i) ||
1885                 ((*i == '/') && urlskipslash) ||
1886                 ((*i == '=') && equalsencode)) {
1887           if((*i == '=') && equalsencode)
1888             /* only skip the first equals sign */
1889             equalsencode = FALSE;
1890           result = Curl_dyn_addn(&enc, i, 1);
1891           if(result)
1892             return cc2cu(result);
1893         }
1894         else {
1895           char out[3]={'%'};
1896           out[1] = hexdigits[*i>>4];
1897           out[2] = hexdigits[*i & 0xf];
1898           result = Curl_dyn_addn(&enc, out, 3);
1899           if(result)
1900             return cc2cu(result);
1901         }
1902       }
1903     }
1904     else {
1905       char *p;
1906       CURLcode result = Curl_dyn_add(&enc, part);
1907       if(result)
1908         return cc2cu(result);
1909       p = Curl_dyn_ptr(&enc);
1910       while(*p) {
1911         /* make sure percent encoded are lower case */
1912         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1913            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1914           p[1] = Curl_raw_tolower(p[1]);
1915           p[2] = Curl_raw_tolower(p[2]);
1916           p += 3;
1917         }
1918         else
1919           p++;
1920       }
1921     }
1922     newp = Curl_dyn_ptr(&enc);
1923 
1924     if(appendquery && newp) {
1925       /* Append the 'newp' string onto the old query. Add a '&' separator if
1926          none is present at the end of the existing query already */
1927 
1928       size_t querylen = u->query ? strlen(u->query) : 0;
1929       bool addamperand = querylen && (u->query[querylen -1] != '&');
1930       if(querylen) {
1931         struct dynbuf qbuf;
1932         Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1933 
1934         if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1935           goto nomem;
1936 
1937         if(addamperand) {
1938           if(Curl_dyn_addn(&qbuf, "&", 1))
1939             goto nomem;
1940         }
1941         if(Curl_dyn_add(&qbuf, newp))
1942           goto nomem;
1943         Curl_dyn_free(&enc);
1944         free(*storep);
1945         *storep = Curl_dyn_ptr(&qbuf);
1946         return CURLUE_OK;
1947 nomem:
1948         Curl_dyn_free(&enc);
1949         return CURLUE_OUT_OF_MEMORY;
1950       }
1951     }
1952 
1953     else if(what == CURLUPART_HOST) {
1954       size_t n = Curl_dyn_len(&enc);
1955       if(!n && (flags & CURLU_NO_AUTHORITY)) {
1956         /* Skip hostname check, it's allowed to be empty. */
1957       }
1958       else {
1959         if(!n || hostname_check(u, (char *)newp, n)) {
1960           Curl_dyn_free(&enc);
1961           return CURLUE_BAD_HOSTNAME;
1962         }
1963       }
1964     }
1965 
1966     free(*storep);
1967     *storep = (char *)newp;
1968   }
1969   /* set after the string, to make it not assigned if the allocation above
1970      fails */
1971   if(port)
1972     u->portnum = port;
1973   return CURLUE_OK;
1974 }
1975