• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 #include "curl_memrchr.h"
38 
39 /* The last 3 #include files should be in this order */
40 #include "curl_printf.h"
41 #include "curl_memory.h"
42 #include "memdebug.h"
43 
44   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45 #define STARTS_WITH_DRIVE_PREFIX(str) \
46   ((('a' <= str[0] && str[0] <= 'z') || \
47     ('A' <= str[0] && str[0] <= 'Z')) && \
48    (str[1] == ':'))
49 
50   /* MSDOS/Windows style drive prefix, optionally with
51    * a '|' instead of ':', followed by a slash or NUL */
52 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55    ((str)[1] == ':' || (str)[1] == '|') && \
56    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57 
58 /* scheme is not URL encoded, the longest libcurl supported ones are... */
59 #define MAX_SCHEME_LEN 40
60 
61 /*
62  * If USE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63  * sure we have _some_ value for AF_INET6 without polluting our fake value
64  * everywhere.
65  */
66 #if !defined(USE_IPV6) && !defined(AF_INET6)
67 #define AF_INET6 (AF_INET + 1)
68 #endif
69 
70 /* Internal representation of CURLU. Point to URL-encoded strings. */
71 struct Curl_URL {
72   char *scheme;
73   char *user;
74   char *password;
75   char *options; /* IMAP only? */
76   char *host;
77   char *zoneid; /* for numerical IPv6 addresses */
78   char *port;
79   char *path;
80   char *query;
81   char *fragment;
82   unsigned short portnum; /* the numerical version (if 'port' is set) */
83   BIT(query_present);    /* to support blank */
84   BIT(fragment_present); /* to support blank */
85 };
86 
87 #define DEFAULT_SCHEME "https"
88 
free_urlhandle(struct Curl_URL * u)89 static void free_urlhandle(struct Curl_URL *u)
90 {
91   free(u->scheme);
92   free(u->user);
93   free(u->password);
94   free(u->options);
95   free(u->host);
96   free(u->zoneid);
97   free(u->port);
98   free(u->path);
99   free(u->query);
100   free(u->fragment);
101 }
102 
103 /*
104  * Find the separator at the end of the host name, or the '?' in cases like
105  * http://www.example.com?id=2380
106  */
find_host_sep(const char * url)107 static const char *find_host_sep(const char *url)
108 {
109   const char *sep;
110   const char *query;
111 
112   /* Find the start of the hostname */
113   sep = strstr(url, "//");
114   if(!sep)
115     sep = url;
116   else
117     sep += 2;
118 
119   query = strchr(sep, '?');
120   sep = strchr(sep, '/');
121 
122   if(!sep)
123     sep = url + strlen(url);
124 
125   if(!query)
126     query = url + strlen(url);
127 
128   return sep < query ? sep : query;
129 }
130 
131 /* convert CURLcode to CURLUcode */
132 #define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
133                   CURLUE_OUT_OF_MEMORY)
134 /*
135  * Decide whether a character in a URL must be escaped.
136  */
137 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
138 
139 static const char hexdigits[] = "0123456789abcdef";
140 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
141  * spaces in the source URL accordingly.
142  *
143  * URL encoding should be skipped for host names, otherwise IDN resolution
144  * will fail.
145  */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)146 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
147                                size_t len, bool relative,
148                                bool query)
149 {
150   /* we must add this with whitespace-replacing */
151   bool left = !query;
152   const unsigned char *iptr;
153   const unsigned char *host_sep = (const unsigned char *) url;
154   CURLcode result;
155 
156   if(!relative)
157     host_sep = (const unsigned char *) find_host_sep(url);
158 
159   for(iptr = (unsigned char *)url;    /* read from here */
160       len; iptr++, len--) {
161 
162     if(iptr < host_sep) {
163       result = Curl_dyn_addn(o, iptr, 1);
164       if(result)
165         return cc2cu(result);
166       continue;
167     }
168 
169     if(*iptr == ' ') {
170       if(left)
171         result = Curl_dyn_addn(o, "%20", 3);
172       else
173         result = Curl_dyn_addn(o, "+", 1);
174       if(result)
175         return cc2cu(result);
176       continue;
177     }
178 
179     if(*iptr == '?')
180       left = FALSE;
181 
182     if(urlchar_needs_escaping(*iptr)) {
183       char out[3]={'%'};
184       out[1] = hexdigits[*iptr>>4];
185       out[2] = hexdigits[*iptr & 0xf];
186       result = Curl_dyn_addn(o, out, 3);
187     }
188     else
189       result = Curl_dyn_addn(o, iptr, 1);
190     if(result)
191       return cc2cu(result);
192   }
193 
194   return CURLUE_OK;
195 }
196 
197 /*
198  * Returns the length of the scheme if the given URL is absolute (as opposed
199  * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
200  * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
201  *
202  * If 'guess_scheme' is TRUE, it means the URL might be provided without
203  * scheme.
204  */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)205 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
206                             bool guess_scheme)
207 {
208   int i = 0;
209   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
210   (void)buflen; /* only used in debug-builds */
211   if(buf)
212     buf[0] = 0; /* always leave a defined value in buf */
213 #ifdef _WIN32
214   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
215     return 0;
216 #endif
217   if(ISALPHA(url[0]))
218     for(i = 1; i < MAX_SCHEME_LEN; ++i) {
219       char s = url[i];
220       if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
221         /* RFC 3986 3.1 explains:
222            scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223         */
224       }
225       else {
226         break;
227       }
228     }
229   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
230     /* If this does not guess scheme, the scheme always ends with the colon so
231        that this also detects data: URLs etc. In guessing mode, data: could
232        be the host name "data" with a specified port number. */
233 
234     /* the length of the scheme is the name part only */
235     size_t len = i;
236     if(buf) {
237       Curl_strntolower(buf, url, i);
238       buf[i] = 0;
239     }
240     return len;
241   }
242   return 0;
243 }
244 
245 /*
246  * Concatenate a relative URL to a base URL making it absolute.
247  * URL-encodes any spaces.
248  * The returned pointer must be freed by the caller unless NULL
249  * (returns NULL on out of memory).
250  *
251  * Note that this function destroys the 'base' string.
252  */
concat_url(char * base,const char * relurl,char ** newurl)253 static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254 {
255   /***
256    TRY to append this new path to the old URL
257    to the right of the host part. Oh crap, this is doomed to cause
258    problems in the future...
259   */
260   struct dynbuf newest;
261   char *protsep;
262   char *pathsep;
263   bool host_changed = FALSE;
264   const char *useurl = relurl;
265   CURLcode result = CURLE_OK;
266   CURLUcode uc;
267   bool skip_slash = FALSE;
268   *newurl = NULL;
269 
270   /* protsep points to the start of the host name */
271   protsep = strstr(base, "//");
272   if(!protsep)
273     protsep = base;
274   else
275     protsep += 2; /* pass the slashes */
276 
277   if('/' != relurl[0]) {
278     int level = 0;
279 
280     /* First we need to find out if there's a ?-letter in the URL,
281        and cut it and the right-side of that off */
282     pathsep = strchr(protsep, '?');
283     if(pathsep)
284       *pathsep = 0;
285 
286     /* we have a relative path to append to the last slash if there's one
287        available, or the new URL is just a query string (starts with a '?') or
288        a fragment (starts with '#') we append the new one at the end of the
289        current URL */
290     if((useurl[0] != '?') && (useurl[0] != '#')) {
291       pathsep = strrchr(protsep, '/');
292       if(pathsep)
293         *pathsep = 0;
294 
295       /* Check if there's any slash after the host name, and if so, remember
296          that position instead */
297       pathsep = strchr(protsep, '/');
298       if(pathsep)
299         protsep = pathsep + 1;
300       else
301         protsep = NULL;
302 
303       /* now deal with one "./" or any amount of "../" in the newurl
304          and act accordingly */
305 
306       if((useurl[0] == '.') && (useurl[1] == '/'))
307         useurl += 2; /* just skip the "./" */
308 
309       while((useurl[0] == '.') &&
310             (useurl[1] == '.') &&
311             (useurl[2] == '/')) {
312         level++;
313         useurl += 3; /* pass the "../" */
314       }
315 
316       if(protsep) {
317         while(level--) {
318           /* cut off one more level from the right of the original URL */
319           pathsep = strrchr(protsep, '/');
320           if(pathsep)
321             *pathsep = 0;
322           else {
323             *protsep = 0;
324             break;
325           }
326         }
327       }
328     }
329     else
330       skip_slash = TRUE;
331   }
332   else {
333     /* We got a new absolute path for this server */
334 
335     if(relurl[1] == '/') {
336       /* the new URL starts with //, just keep the protocol part from the
337          original one */
338       *protsep = 0;
339       useurl = &relurl[2]; /* we keep the slashes from the original, so we
340                               skip the new ones */
341       host_changed = TRUE;
342     }
343     else {
344       /* cut off the original URL from the first slash, or deal with URLs
345          without slash */
346       pathsep = strchr(protsep, '/');
347       if(pathsep) {
348         /* When people use badly formatted URLs, such as
349            "http://www.example.com?dir=/home/daniel" we must not use the first
350            slash, if there's a ?-letter before it! */
351         char *sep = strchr(protsep, '?');
352         if(sep && (sep < pathsep))
353           pathsep = sep;
354         *pathsep = 0;
355       }
356       else {
357         /* There was no slash. Now, since we might be operating on a badly
358            formatted URL, such as "http://www.example.com?id=2380" which
359            doesn't use a slash separator as it is supposed to, we need to check
360            for a ?-letter as well! */
361         pathsep = strchr(protsep, '?');
362         if(pathsep)
363           *pathsep = 0;
364       }
365     }
366   }
367 
368   Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
369 
370   /* copy over the root url part */
371   result = Curl_dyn_add(&newest, base);
372   if(result)
373     return result;
374 
375   /* check if we need to append a slash */
376   if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
377     ;
378   else {
379     result = Curl_dyn_addn(&newest, "/", 1);
380     if(result)
381       return result;
382   }
383 
384   /* then append the new piece on the right side */
385   uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
386                      FALSE);
387   if(uc)
388     return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
389 
390   *newurl = Curl_dyn_ptr(&newest);
391   return CURLE_OK;
392 }
393 
394 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)395 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
396 {
397   static const char badbytes[]={
398     /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
399     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
400     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
401     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
402     0x7f, 0x00 /* null-terminate */
403   };
404   size_t n = strlen(url);
405   size_t nfine;
406 
407   if(n > CURL_MAX_INPUT_LENGTH)
408     /* excessive input length */
409     return CURLUE_MALFORMED_INPUT;
410 
411   nfine = strcspn(url, badbytes);
412   if((nfine != n) ||
413      (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
414     return CURLUE_MALFORMED_INPUT;
415 
416   *urllen = n;
417   return CURLUE_OK;
418 }
419 
420 /*
421  * parse_hostname_login()
422  *
423  * Parse the login details (user name, password and options) from the URL and
424  * strip them out of the host name
425  *
426  */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)427 static CURLUcode parse_hostname_login(struct Curl_URL *u,
428                                       const char *login,
429                                       size_t len,
430                                       unsigned int flags,
431                                       size_t *offset) /* to the host name */
432 {
433   CURLUcode result = CURLUE_OK;
434   CURLcode ccode;
435   char *userp = NULL;
436   char *passwdp = NULL;
437   char *optionsp = NULL;
438   const struct Curl_handler *h = NULL;
439 
440   /* At this point, we assume all the other special cases have been taken
441    * care of, so the host is at most
442    *
443    *   [user[:password][;options]]@]hostname
444    *
445    * We need somewhere to put the embedded details, so do that first.
446    */
447   char *ptr;
448 
449   DEBUGASSERT(login);
450 
451   *offset = 0;
452   ptr = memchr(login, '@', len);
453   if(!ptr)
454     goto out;
455 
456   /* We will now try to extract the
457    * possible login information in a string like:
458    * ftp://user:password@ftp.my.site:8021/README */
459   ptr++;
460 
461   /* if this is a known scheme, get some details */
462   if(u->scheme)
463     h = Curl_get_scheme_handler(u->scheme);
464 
465   /* We could use the login information in the URL so extract it. Only parse
466      options if the handler says we should. Note that 'h' might be NULL! */
467   ccode = Curl_parse_login_details(login, ptr - login - 1,
468                                    &userp, &passwdp,
469                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
470                                    &optionsp:NULL);
471   if(ccode) {
472     result = CURLUE_BAD_LOGIN;
473     goto out;
474   }
475 
476   if(userp) {
477     if(flags & CURLU_DISALLOW_USER) {
478       /* Option DISALLOW_USER is set and url contains username. */
479       result = CURLUE_USER_NOT_ALLOWED;
480       goto out;
481     }
482     free(u->user);
483     u->user = userp;
484   }
485 
486   if(passwdp) {
487     free(u->password);
488     u->password = passwdp;
489   }
490 
491   if(optionsp) {
492     free(u->options);
493     u->options = optionsp;
494   }
495 
496   /* the host name starts at this offset */
497   *offset = ptr - login;
498   return CURLUE_OK;
499 
500 out:
501 
502   free(userp);
503   free(passwdp);
504   free(optionsp);
505   u->user = NULL;
506   u->password = NULL;
507   u->options = NULL;
508 
509   return result;
510 }
511 
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)512 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
513                                    bool has_scheme)
514 {
515   char *portptr;
516   char *hostname = Curl_dyn_ptr(host);
517   /*
518    * Find the end of an IPv6 address on the ']' ending bracket.
519    */
520   if(hostname[0] == '[') {
521     portptr = strchr(hostname, ']');
522     if(!portptr)
523       return CURLUE_BAD_IPV6;
524     portptr++;
525     /* this is a RFC2732-style specified IP-address */
526     if(*portptr) {
527       if(*portptr != ':')
528         return CURLUE_BAD_PORT_NUMBER;
529     }
530     else
531       portptr = NULL;
532   }
533   else
534     portptr = strchr(hostname, ':');
535 
536   if(portptr) {
537     char *rest = NULL;
538     unsigned long port;
539     size_t keep = portptr - hostname;
540 
541     /* Browser behavior adaptation. If there's a colon with no digits after,
542        just cut off the name there which makes us ignore the colon and just
543        use the default port. Firefox, Chrome and Safari all do that.
544 
545        Don't do it if the URL has no scheme, to make something that looks like
546        a scheme not work!
547     */
548     Curl_dyn_setlen(host, keep);
549     portptr++;
550     if(!*portptr)
551       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552 
553     if(!ISDIGIT(*portptr))
554       return CURLUE_BAD_PORT_NUMBER;
555 
556     errno = 0;
557     port = strtoul(portptr, &rest, 10);  /* Port number must be decimal */
558 
559     if(errno || (port > 0xffff) || *rest)
560       return CURLUE_BAD_PORT_NUMBER;
561 
562     u->portnum = (unsigned short) port;
563     /* generate a new port number string to get rid of leading zeroes etc */
564     free(u->port);
565     u->port = aprintf("%ld", port);
566     if(!u->port)
567       return CURLUE_OUT_OF_MEMORY;
568   }
569 
570   return CURLUE_OK;
571 }
572 
573 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)574 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
575                             size_t hlen) /* length of hostname */
576 {
577   size_t len;
578   DEBUGASSERT(*hostname == '[');
579   if(hlen < 4) /* '[::]' is the shortest possible valid string */
580     return CURLUE_BAD_IPV6;
581   hostname++;
582   hlen -= 2;
583 
584   /* only valid IPv6 letters are ok */
585   len = strspn(hostname, "0123456789abcdefABCDEF:.");
586 
587   if(hlen != len) {
588     hlen = len;
589     if(hostname[len] == '%') {
590       /* this could now be '%[zone id]' */
591       char zoneid[16];
592       int i = 0;
593       char *h = &hostname[len + 1];
594       /* pass '25' if present and is a url encoded percent sign */
595       if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
596         h += 2;
597       while(*h && (*h != ']') && (i < 15))
598         zoneid[i++] = *h++;
599       if(!i || (']' != *h))
600         return CURLUE_BAD_IPV6;
601       zoneid[i] = 0;
602       u->zoneid = strdup(zoneid);
603       if(!u->zoneid)
604         return CURLUE_OUT_OF_MEMORY;
605       hostname[len] = ']'; /* insert end bracket */
606       hostname[len + 1] = 0; /* terminate the hostname */
607     }
608     else
609       return CURLUE_BAD_IPV6;
610     /* hostname is fine */
611   }
612 
613   /* Check the IPv6 address. */
614   {
615     char dest[16]; /* fits a binary IPv6 address */
616     char norm[MAX_IPADR_LEN];
617     hostname[hlen] = 0; /* end the address there */
618     if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
619       return CURLUE_BAD_IPV6;
620 
621     /* check if it can be done shorter */
622     if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
623        (strlen(norm) < hlen)) {
624       strcpy(hostname, norm);
625       hlen = strlen(norm);
626       hostname[hlen + 1] = 0;
627     }
628     hostname[hlen] = ']'; /* restore ending bracket */
629   }
630   return CURLUE_OK;
631 }
632 
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)633 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
634                                 size_t hlen) /* length of hostname */
635 {
636   size_t len;
637   DEBUGASSERT(hostname);
638 
639   if(!hlen)
640     return CURLUE_NO_HOST;
641   else if(hostname[0] == '[')
642     return ipv6_parse(u, hostname, hlen);
643   else {
644     /* letters from the second string are not ok */
645     len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
646     if(hlen != len)
647       /* hostname with bad content */
648       return CURLUE_BAD_HOSTNAME;
649   }
650   return CURLUE_OK;
651 }
652 
653 /*
654  * Handle partial IPv4 numerical addresses and different bases, like
655  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
656  *
657  * If the given input string is syntactically wrong IPv4 or any part for
658  * example is too big, this function returns HOST_NAME.
659  *
660  * Output the "normalized" version of that input string in plain quad decimal
661  * integers.
662  *
663  * Returns the host type.
664  */
665 
666 #define HOST_ERROR   -1 /* out of memory */
667 #define HOST_BAD     -2 /* bad IPv4 address */
668 
669 #define HOST_NAME    1
670 #define HOST_IPV4    2
671 #define HOST_IPV6    3
672 
ipv4_normalize(struct dynbuf * host)673 static int ipv4_normalize(struct dynbuf *host)
674 {
675   bool done = FALSE;
676   int n = 0;
677   const char *c = Curl_dyn_ptr(host);
678   unsigned long parts[4] = {0, 0, 0, 0};
679   CURLcode result = CURLE_OK;
680 
681   if(*c == '[')
682     return HOST_IPV6;
683 
684   errno = 0; /* for strtoul */
685   while(!done) {
686     char *endp = NULL;
687     unsigned long l;
688     if(!ISDIGIT(*c))
689       /* most importantly this doesn't allow a leading plus or minus */
690       return HOST_NAME;
691     l = strtoul(c, &endp, 0);
692     if(errno)
693       return HOST_NAME;
694 #if SIZEOF_LONG > 4
695     /* a value larger than 32 bits */
696     if(l > UINT_MAX)
697       return HOST_NAME;
698 #endif
699 
700     parts[n] = l;
701     c = endp;
702 
703     switch(*c) {
704     case '.':
705       if(n == 3)
706         return HOST_NAME;
707       n++;
708       c++;
709       break;
710 
711     case '\0':
712       done = TRUE;
713       break;
714 
715     default:
716       return HOST_NAME;
717     }
718   }
719 
720   switch(n) {
721   case 0: /* a -- 32 bits */
722     Curl_dyn_reset(host);
723 
724     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
725                            (unsigned int)(parts[0] >> 24),
726                            (unsigned int)((parts[0] >> 16) & 0xff),
727                            (unsigned int)((parts[0] >> 8) & 0xff),
728                            (unsigned int)(parts[0] & 0xff));
729     break;
730   case 1: /* a.b -- 8.24 bits */
731     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
732       return HOST_NAME;
733     Curl_dyn_reset(host);
734     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
735                            (unsigned int)(parts[0]),
736                            (unsigned int)((parts[1] >> 16) & 0xff),
737                            (unsigned int)((parts[1] >> 8) & 0xff),
738                            (unsigned int)(parts[1] & 0xff));
739     break;
740   case 2: /* a.b.c -- 8.8.16 bits */
741     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
742       return HOST_NAME;
743     Curl_dyn_reset(host);
744     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
745                            (unsigned int)(parts[0]),
746                            (unsigned int)(parts[1]),
747                            (unsigned int)((parts[2] >> 8) & 0xff),
748                            (unsigned int)(parts[2] & 0xff));
749     break;
750   case 3: /* a.b.c.d -- 8.8.8.8 bits */
751     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
752        (parts[3] > 0xff))
753       return HOST_NAME;
754     Curl_dyn_reset(host);
755     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
756                            (unsigned int)(parts[0]),
757                            (unsigned int)(parts[1]),
758                            (unsigned int)(parts[2]),
759                            (unsigned int)(parts[3]));
760     break;
761   }
762   if(result)
763     return HOST_ERROR;
764   return HOST_IPV4;
765 }
766 
767 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)768 static CURLUcode urldecode_host(struct dynbuf *host)
769 {
770   char *per = NULL;
771   const char *hostname = Curl_dyn_ptr(host);
772   per = strchr(hostname, '%');
773   if(!per)
774     /* nothing to decode */
775     return CURLUE_OK;
776   else {
777     /* encoded */
778     size_t dlen;
779     char *decoded;
780     CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
781                                      REJECT_CTRL);
782     if(result)
783       return CURLUE_BAD_HOSTNAME;
784     Curl_dyn_reset(host);
785     result = Curl_dyn_addn(host, decoded, dlen);
786     free(decoded);
787     if(result)
788       return cc2cu(result);
789   }
790 
791   return CURLUE_OK;
792 }
793 
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)794 static CURLUcode parse_authority(struct Curl_URL *u,
795                                  const char *auth, size_t authlen,
796                                  unsigned int flags,
797                                  struct dynbuf *host,
798                                  bool has_scheme)
799 {
800   size_t offset;
801   CURLUcode uc;
802   CURLcode result;
803 
804   /*
805    * Parse the login details and strip them out of the host name.
806    */
807   uc = parse_hostname_login(u, auth, authlen, flags, &offset);
808   if(uc)
809     goto out;
810 
811   result = Curl_dyn_addn(host, auth + offset, authlen - offset);
812   if(result) {
813     uc = cc2cu(result);
814     goto out;
815   }
816 
817   uc = Curl_parse_port(u, host, has_scheme);
818   if(uc)
819     goto out;
820 
821   if(!Curl_dyn_len(host))
822     return CURLUE_NO_HOST;
823 
824   switch(ipv4_normalize(host)) {
825   case HOST_IPV4:
826     break;
827   case HOST_IPV6:
828     uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
829     break;
830   case HOST_NAME:
831     uc = urldecode_host(host);
832     if(!uc)
833       uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
834     break;
835   case HOST_ERROR:
836     uc = CURLUE_OUT_OF_MEMORY;
837     break;
838   case HOST_BAD:
839   default:
840     uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
841     break;
842   }
843 
844 out:
845   return uc;
846 }
847 
848 /* used for HTTP/2 server push */
Curl_url_set_authority(CURLU * u,const char * authority)849 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
850 {
851   CURLUcode result;
852   struct dynbuf host;
853 
854   DEBUGASSERT(authority);
855   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
856 
857   result = parse_authority(u, authority, strlen(authority),
858                            CURLU_DISALLOW_USER, &host, !!u->scheme);
859   if(result)
860     Curl_dyn_free(&host);
861   else {
862     free(u->host);
863     u->host = Curl_dyn_ptr(&host);
864   }
865   return result;
866 }
867 
868 /*
869  * "Remove Dot Segments"
870  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
871  */
872 
873 /*
874  * dedotdotify()
875  * @unittest: 1395
876  *
877  * This function gets a null-terminated path with dot and dotdot sequences
878  * passed in and strips them off according to the rules in RFC 3986 section
879  * 5.2.4.
880  *
881  * The function handles a query part ('?' + stuff) appended but it expects
882  * that fragments ('#' + stuff) have already been cut off.
883  *
884  * RETURNS
885  *
886  * Zero for success and 'out' set to an allocated dedotdotified string.
887  */
888 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)889 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
890 {
891   char *outptr;
892   const char *endp = &input[clen];
893   char *out;
894 
895   *outp = NULL;
896   /* the path always starts with a slash, and a slash has not dot */
897   if((clen < 2) || !memchr(input, '.', clen))
898     return 0;
899 
900   out = malloc(clen + 1);
901   if(!out)
902     return 1; /* out of memory */
903 
904   *out = 0; /* null-terminates, for inputs like "./" */
905   outptr = out;
906 
907   do {
908     bool dotdot = TRUE;
909     if(*input == '.') {
910       /*  A.  If the input buffer begins with a prefix of "../" or "./", then
911           remove that prefix from the input buffer; otherwise, */
912 
913       if(!strncmp("./", input, 2)) {
914         input += 2;
915         clen -= 2;
916       }
917       else if(!strncmp("../", input, 3)) {
918         input += 3;
919         clen -= 3;
920       }
921       /*  D.  if the input buffer consists only of "." or "..", then remove
922           that from the input buffer; otherwise, */
923 
924       else if(!strcmp(".", input) || !strcmp("..", input) ||
925               !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
926         *out = 0;
927         break;
928       }
929       else
930         dotdot = FALSE;
931     }
932     else if(*input == '/') {
933       /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
934           "."  is a complete path segment, then replace that prefix with "/" in
935           the input buffer; otherwise, */
936       if(!strncmp("/./", input, 3)) {
937         input += 2;
938         clen -= 2;
939       }
940       else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
941         *outptr++ = '/';
942         *outptr = 0;
943         break;
944       }
945 
946       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
947           where ".." is a complete path segment, then replace that prefix with
948           "/" in the input buffer and remove the last segment and its
949           preceding "/" (if any) from the output buffer; otherwise, */
950 
951       else if(!strncmp("/../", input, 4)) {
952         input += 3;
953         clen -= 3;
954         /* remove the last segment from the output buffer */
955         while(outptr > out) {
956           outptr--;
957           if(*outptr == '/')
958             break;
959         }
960         *outptr = 0; /* null-terminate where it stops */
961       }
962       else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
963         /* remove the last segment from the output buffer */
964         while(outptr > out) {
965           outptr--;
966           if(*outptr == '/')
967             break;
968         }
969         *outptr++ = '/';
970         *outptr = 0; /* null-terminate where it stops */
971         break;
972       }
973       else
974         dotdot = FALSE;
975     }
976     else
977       dotdot = FALSE;
978 
979     if(!dotdot) {
980       /*  E.  move the first path segment in the input buffer to the end of
981           the output buffer, including the initial "/" character (if any) and
982           any subsequent characters up to, but not including, the next "/"
983           character or the end of the input buffer. */
984 
985       do {
986         *outptr++ = *input++;
987         clen--;
988       } while(*input && (*input != '/') && (*input != '?'));
989       *outptr = 0;
990     }
991 
992     /* continue until end of path */
993   } while(input < endp);
994 
995   *outp = out;
996   return 0; /* success */
997 }
998 
parseurl(const char * url,CURLU * u,unsigned int flags)999 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1000 {
1001   const char *path;
1002   size_t pathlen;
1003   char *query = NULL;
1004   char *fragment = NULL;
1005   char schemebuf[MAX_SCHEME_LEN + 1];
1006   size_t schemelen = 0;
1007   size_t urllen;
1008   CURLUcode result = CURLUE_OK;
1009   size_t fraglen = 0;
1010   struct dynbuf host;
1011 
1012   DEBUGASSERT(url);
1013 
1014   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1015 
1016   result = junkscan(url, &urllen, flags);
1017   if(result)
1018     goto fail;
1019 
1020   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1021                                    flags & (CURLU_GUESS_SCHEME|
1022                                             CURLU_DEFAULT_SCHEME));
1023 
1024   /* handle the file: scheme */
1025   if(schemelen && !strcmp(schemebuf, "file")) {
1026     bool uncpath = FALSE;
1027     if(urllen <= 6) {
1028       /* file:/ is not enough to actually be a complete file: URL */
1029       result = CURLUE_BAD_FILE_URL;
1030       goto fail;
1031     }
1032 
1033     /* path has been allocated large enough to hold this */
1034     path = (char *)&url[5];
1035     pathlen = urllen - 5;
1036 
1037     u->scheme = strdup("file");
1038     if(!u->scheme) {
1039       result = CURLUE_OUT_OF_MEMORY;
1040       goto fail;
1041     }
1042 
1043     /* Extra handling URLs with an authority component (i.e. that start with
1044      * "file://")
1045      *
1046      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1047      * RFC 8089, but not the (current) WHAT-WG URL spec.
1048      */
1049     if(path[0] == '/' && path[1] == '/') {
1050       /* swallow the two slashes */
1051       const char *ptr = &path[2];
1052 
1053       /*
1054        * According to RFC 8089, a file: URL can be reliably dereferenced if:
1055        *
1056        *  o it has no/blank hostname, or
1057        *
1058        *  o the hostname matches "localhost" (case-insensitively), or
1059        *
1060        *  o the hostname is a FQDN that resolves to this machine, or
1061        *
1062        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
1063        *    Appendix E.3).
1064        *
1065        * For brevity, we only consider URLs with empty, "localhost", or
1066        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1067        *
1068        * Additionally, there is an exception for URLs with a Windows drive
1069        * letter in the authority (which was accidentally omitted from RFC 8089
1070        * Appendix E, but believe me, it was meant to be there. --MK)
1071        */
1072       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1073         /* the URL includes a host name, it must match "localhost" or
1074            "127.0.0.1" to be valid */
1075         if(checkprefix("localhost/", ptr) ||
1076            checkprefix("127.0.0.1/", ptr)) {
1077           ptr += 9; /* now points to the slash after the host */
1078         }
1079         else {
1080 #if defined(_WIN32)
1081           size_t len;
1082 
1083           /* the host name, NetBIOS computer name, can not contain disallowed
1084              chars, and the delimiting slash character must be appended to the
1085              host name */
1086           path = strpbrk(ptr, "/\\:*?\"<>|");
1087           if(!path || *path != '/') {
1088             result = CURLUE_BAD_FILE_URL;
1089             goto fail;
1090           }
1091 
1092           len = path - ptr;
1093           if(len) {
1094             CURLcode code = Curl_dyn_addn(&host, ptr, len);
1095             if(code) {
1096               result = cc2cu(code);
1097               goto fail;
1098             }
1099             uncpath = TRUE;
1100           }
1101 
1102           ptr -= 2; /* now points to the // before the host in UNC */
1103 #else
1104           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1105              none */
1106           result = CURLUE_BAD_FILE_URL;
1107           goto fail;
1108 #endif
1109         }
1110       }
1111 
1112       path = ptr;
1113       pathlen = urllen - (ptr - url);
1114     }
1115 
1116     if(!uncpath)
1117       /* no host for file: URLs by default */
1118       Curl_dyn_reset(&host);
1119 
1120 #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1121     /* Don't allow Windows drive letters when not in Windows.
1122      * This catches both "file:/c:" and "file:c:" */
1123     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1124        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1125       /* File drive letters are only accepted in MSDOS/Windows */
1126       result = CURLUE_BAD_FILE_URL;
1127       goto fail;
1128     }
1129 #else
1130     /* If the path starts with a slash and a drive letter, ditch the slash */
1131     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1132       /* This cannot be done with strcpy, as the memory chunks overlap! */
1133       path++;
1134       pathlen--;
1135     }
1136 #endif
1137 
1138   }
1139   else {
1140     /* clear path */
1141     const char *schemep = NULL;
1142     const char *hostp;
1143     size_t hostlen;
1144 
1145     if(schemelen) {
1146       int i = 0;
1147       const char *p = &url[schemelen + 1];
1148       while((*p == '/') && (i < 4)) {
1149         p++;
1150         i++;
1151       }
1152 
1153       schemep = schemebuf;
1154       if(!Curl_get_scheme_handler(schemep) &&
1155          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1156         result = CURLUE_UNSUPPORTED_SCHEME;
1157         goto fail;
1158       }
1159 
1160       if((i < 1) || (i > 3)) {
1161         /* less than one or more than three slashes */
1162         result = CURLUE_BAD_SLASHES;
1163         goto fail;
1164       }
1165       hostp = p; /* host name starts here */
1166     }
1167     else {
1168       /* no scheme! */
1169 
1170       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1171         result = CURLUE_BAD_SCHEME;
1172         goto fail;
1173       }
1174       if(flags & CURLU_DEFAULT_SCHEME)
1175         schemep = DEFAULT_SCHEME;
1176 
1177       /*
1178        * The URL was badly formatted, let's try without scheme specified.
1179        */
1180       hostp = url;
1181     }
1182 
1183     if(schemep) {
1184       u->scheme = strdup(schemep);
1185       if(!u->scheme) {
1186         result = CURLUE_OUT_OF_MEMORY;
1187         goto fail;
1188       }
1189     }
1190 
1191     /* find the end of the host name + port number */
1192     hostlen = strcspn(hostp, "/?#");
1193     path = &hostp[hostlen];
1194 
1195     /* this pathlen also contains the query and the fragment */
1196     pathlen = urllen - (path - url);
1197     if(hostlen) {
1198 
1199       result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1200       if(result)
1201         goto fail;
1202 
1203       if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1204         const char *hostname = Curl_dyn_ptr(&host);
1205         /* legacy curl-style guess based on host name */
1206         if(checkprefix("ftp.", hostname))
1207           schemep = "ftp";
1208         else if(checkprefix("dict.", hostname))
1209           schemep = "dict";
1210         else if(checkprefix("ldap.", hostname))
1211           schemep = "ldap";
1212         else if(checkprefix("imap.", hostname))
1213           schemep = "imap";
1214         else if(checkprefix("smtp.", hostname))
1215           schemep = "smtp";
1216         else if(checkprefix("pop3.", hostname))
1217           schemep = "pop3";
1218         else
1219           schemep = "http";
1220 
1221         u->scheme = strdup(schemep);
1222         if(!u->scheme) {
1223           result = CURLUE_OUT_OF_MEMORY;
1224           goto fail;
1225         }
1226       }
1227     }
1228     else if(flags & CURLU_NO_AUTHORITY) {
1229       /* allowed to be empty. */
1230       if(Curl_dyn_add(&host, "")) {
1231         result = CURLUE_OUT_OF_MEMORY;
1232         goto fail;
1233       }
1234     }
1235     else {
1236       result = CURLUE_NO_HOST;
1237       goto fail;
1238     }
1239   }
1240 
1241   fragment = strchr(path, '#');
1242   if(fragment) {
1243     fraglen = pathlen - (fragment - path);
1244     u->fragment_present = TRUE;
1245     if(fraglen > 1) {
1246       /* skip the leading '#' in the copy but include the terminating null */
1247       if(flags & CURLU_URLENCODE) {
1248         struct dynbuf enc;
1249         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250         result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251         if(result)
1252           goto fail;
1253         u->fragment = Curl_dyn_ptr(&enc);
1254       }
1255       else {
1256         u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257         if(!u->fragment) {
1258           result = CURLUE_OUT_OF_MEMORY;
1259           goto fail;
1260         }
1261       }
1262     }
1263     /* after this, pathlen still contains the query */
1264     pathlen -= fraglen;
1265   }
1266 
1267   query = memchr(path, '?', pathlen);
1268   if(query) {
1269     size_t qlen = fragment ? (size_t)(fragment - query) :
1270       pathlen - (query - path);
1271     pathlen -= qlen;
1272     u->query_present = TRUE;
1273     if(qlen > 1) {
1274       if(flags & CURLU_URLENCODE) {
1275         struct dynbuf enc;
1276         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1277         /* skip the leading question mark */
1278         result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1279         if(result)
1280           goto fail;
1281         u->query = Curl_dyn_ptr(&enc);
1282       }
1283       else {
1284         u->query = Curl_memdup0(query + 1, qlen - 1);
1285         if(!u->query) {
1286           result = CURLUE_OUT_OF_MEMORY;
1287           goto fail;
1288         }
1289       }
1290     }
1291     else {
1292       /* single byte query */
1293       u->query = strdup("");
1294       if(!u->query) {
1295         result = CURLUE_OUT_OF_MEMORY;
1296         goto fail;
1297       }
1298     }
1299   }
1300 
1301   if(pathlen && (flags & CURLU_URLENCODE)) {
1302     struct dynbuf enc;
1303     Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1304     result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1305     if(result)
1306       goto fail;
1307     pathlen = Curl_dyn_len(&enc);
1308     path = u->path = Curl_dyn_ptr(&enc);
1309   }
1310 
1311   if(pathlen <= 1) {
1312     /* there is no path left or just the slash, unset */
1313     path = NULL;
1314   }
1315   else {
1316     if(!u->path) {
1317       u->path = Curl_memdup0(path, pathlen);
1318       if(!u->path) {
1319         result = CURLUE_OUT_OF_MEMORY;
1320         goto fail;
1321       }
1322       path = u->path;
1323     }
1324     else if(flags & CURLU_URLENCODE)
1325       /* it might have encoded more than just the path so cut it */
1326       u->path[pathlen] = 0;
1327 
1328     if(!(flags & CURLU_PATH_AS_IS)) {
1329       /* remove ../ and ./ sequences according to RFC3986 */
1330       char *dedot;
1331       int err = dedotdotify((char *)path, pathlen, &dedot);
1332       if(err) {
1333         result = CURLUE_OUT_OF_MEMORY;
1334         goto fail;
1335       }
1336       if(dedot) {
1337         free(u->path);
1338         u->path = dedot;
1339       }
1340     }
1341   }
1342 
1343   u->host = Curl_dyn_ptr(&host);
1344 
1345   return result;
1346 fail:
1347   Curl_dyn_free(&host);
1348   free_urlhandle(u);
1349   return result;
1350 }
1351 
1352 /*
1353  * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1354  */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1355 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1356                                       unsigned int flags)
1357 {
1358   CURLUcode result;
1359   CURLU tmpurl;
1360   memset(&tmpurl, 0, sizeof(tmpurl));
1361   result = parseurl(url, &tmpurl, flags);
1362   if(!result) {
1363     free_urlhandle(u);
1364     *u = tmpurl;
1365   }
1366   return result;
1367 }
1368 
1369 /*
1370  */
curl_url(void)1371 CURLU *curl_url(void)
1372 {
1373   return calloc(1, sizeof(struct Curl_URL));
1374 }
1375 
curl_url_cleanup(CURLU * u)1376 void curl_url_cleanup(CURLU *u)
1377 {
1378   if(u) {
1379     free_urlhandle(u);
1380     free(u);
1381   }
1382 }
1383 
1384 #define DUP(dest, src, name)                    \
1385   do {                                          \
1386     if(src->name) {                             \
1387       dest->name = strdup(src->name);           \
1388       if(!dest->name)                           \
1389         goto fail;                              \
1390     }                                           \
1391   } while(0)
1392 
curl_url_dup(const CURLU * in)1393 CURLU *curl_url_dup(const CURLU *in)
1394 {
1395   struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1396   if(u) {
1397     DUP(u, in, scheme);
1398     DUP(u, in, user);
1399     DUP(u, in, password);
1400     DUP(u, in, options);
1401     DUP(u, in, host);
1402     DUP(u, in, port);
1403     DUP(u, in, path);
1404     DUP(u, in, query);
1405     DUP(u, in, fragment);
1406     DUP(u, in, zoneid);
1407     u->portnum = in->portnum;
1408     u->fragment_present = in->fragment_present;
1409     u->query_present = in->query_present;
1410   }
1411   return u;
1412 fail:
1413   curl_url_cleanup(u);
1414   return NULL;
1415 }
1416 
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1417 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1418                        char **part, unsigned int flags)
1419 {
1420   const char *ptr;
1421   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1422   char portbuf[7];
1423   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1424   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1425   bool punycode = FALSE;
1426   bool depunyfy = FALSE;
1427   bool plusdecode = FALSE;
1428   (void)flags;
1429   if(!u)
1430     return CURLUE_BAD_HANDLE;
1431   if(!part)
1432     return CURLUE_BAD_PARTPOINTER;
1433   *part = NULL;
1434 
1435   switch(what) {
1436   case CURLUPART_SCHEME:
1437     ptr = u->scheme;
1438     ifmissing = CURLUE_NO_SCHEME;
1439     urldecode = FALSE; /* never for schemes */
1440     break;
1441   case CURLUPART_USER:
1442     ptr = u->user;
1443     ifmissing = CURLUE_NO_USER;
1444     break;
1445   case CURLUPART_PASSWORD:
1446     ptr = u->password;
1447     ifmissing = CURLUE_NO_PASSWORD;
1448     break;
1449   case CURLUPART_OPTIONS:
1450     ptr = u->options;
1451     ifmissing = CURLUE_NO_OPTIONS;
1452     break;
1453   case CURLUPART_HOST:
1454     ptr = u->host;
1455     ifmissing = CURLUE_NO_HOST;
1456     punycode = (flags & CURLU_PUNYCODE)?1:0;
1457     depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1458     break;
1459   case CURLUPART_ZONEID:
1460     ptr = u->zoneid;
1461     ifmissing = CURLUE_NO_ZONEID;
1462     break;
1463   case CURLUPART_PORT:
1464     ptr = u->port;
1465     ifmissing = CURLUE_NO_PORT;
1466     urldecode = FALSE; /* never for port */
1467     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1468       /* there's no stored port number, but asked to deliver
1469          a default one for the scheme */
1470       const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1471       if(h) {
1472         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1473         ptr = portbuf;
1474       }
1475     }
1476     else if(ptr && u->scheme) {
1477       /* there is a stored port number, but ask to inhibit if
1478          it matches the default one for the scheme */
1479       const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1480       if(h && (h->defport == u->portnum) &&
1481          (flags & CURLU_NO_DEFAULT_PORT))
1482         ptr = NULL;
1483     }
1484     break;
1485   case CURLUPART_PATH:
1486     ptr = u->path;
1487     if(!ptr)
1488       ptr = "/";
1489     break;
1490   case CURLUPART_QUERY:
1491     ptr = u->query;
1492     ifmissing = CURLUE_NO_QUERY;
1493     plusdecode = urldecode;
1494     if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
1495       /* there was a blank query and the user do not ask for it */
1496       ptr = NULL;
1497     break;
1498   case CURLUPART_FRAGMENT:
1499     ptr = u->fragment;
1500     ifmissing = CURLUE_NO_FRAGMENT;
1501     if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
1502       /* there was a blank fragment and the user asks for it */
1503       ptr = "";
1504     break;
1505   case CURLUPART_URL: {
1506     char *url;
1507     char *scheme;
1508     char *options = u->options;
1509     char *port = u->port;
1510     char *allochost = NULL;
1511     bool show_fragment =
1512       u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
1513     bool show_query =
1514       (u->query && u->query[0]) ||
1515       (u->query_present && flags & CURLU_GET_EMPTY);
1516     punycode = (flags & CURLU_PUNYCODE)?1:0;
1517     depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1518     if(u->scheme && strcasecompare("file", u->scheme)) {
1519       url = aprintf("file://%s%s%s",
1520                     u->path,
1521                     show_fragment ? "#": "",
1522                     u->fragment ? u->fragment : "");
1523     }
1524     else if(!u->host)
1525       return CURLUE_NO_HOST;
1526     else {
1527       const struct Curl_handler *h = NULL;
1528       if(u->scheme)
1529         scheme = u->scheme;
1530       else if(flags & CURLU_DEFAULT_SCHEME)
1531         scheme = (char *) DEFAULT_SCHEME;
1532       else
1533         return CURLUE_NO_SCHEME;
1534 
1535       h = Curl_get_scheme_handler(scheme);
1536       if(!port && (flags & CURLU_DEFAULT_PORT)) {
1537         /* there's no stored port number, but asked to deliver
1538            a default one for the scheme */
1539         if(h) {
1540           msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1541           port = portbuf;
1542         }
1543       }
1544       else if(port) {
1545         /* there is a stored port number, but asked to inhibit if it matches
1546            the default one for the scheme */
1547         if(h && (h->defport == u->portnum) &&
1548            (flags & CURLU_NO_DEFAULT_PORT))
1549           port = NULL;
1550       }
1551 
1552       if(h && !(h->flags & PROTOPT_URLOPTIONS))
1553         options = NULL;
1554 
1555       if(u->host[0] == '[') {
1556         if(u->zoneid) {
1557           /* make it '[ host %25 zoneid ]' */
1558           struct dynbuf enc;
1559           size_t hostlen = strlen(u->host);
1560           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1561           if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1562                            u->zoneid))
1563             return CURLUE_OUT_OF_MEMORY;
1564           allochost = Curl_dyn_ptr(&enc);
1565         }
1566       }
1567       else if(urlencode) {
1568         allochost = curl_easy_escape(NULL, u->host, 0);
1569         if(!allochost)
1570           return CURLUE_OUT_OF_MEMORY;
1571       }
1572       else if(punycode) {
1573         if(!Curl_is_ASCII_name(u->host)) {
1574 #ifndef USE_IDN
1575           return CURLUE_LACKS_IDN;
1576 #else
1577           CURLcode result = Curl_idn_decode(u->host, &allochost);
1578           if(result)
1579             return (result == CURLE_OUT_OF_MEMORY) ?
1580               CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1581 #endif
1582         }
1583       }
1584       else if(depunyfy) {
1585         if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1586 #ifndef USE_IDN
1587           return CURLUE_LACKS_IDN;
1588 #else
1589           CURLcode result = Curl_idn_encode(u->host, &allochost);
1590           if(result)
1591             /* this is the most likely error */
1592             return (result == CURLE_OUT_OF_MEMORY) ?
1593               CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1594 #endif
1595         }
1596       }
1597 
1598       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1599                     scheme,
1600                     u->user ? u->user : "",
1601                     u->password ? ":": "",
1602                     u->password ? u->password : "",
1603                     options ? ";" : "",
1604                     options ? options : "",
1605                     (u->user || u->password || options) ? "@": "",
1606                     allochost ? allochost : u->host,
1607                     port ? ":": "",
1608                     port ? port : "",
1609                     u->path ? u->path : "/",
1610                     show_query ? "?": "",
1611                     u->query ? u->query : "",
1612                     show_fragment ? "#": "",
1613                     u->fragment? u->fragment : "");
1614       free(allochost);
1615     }
1616     if(!url)
1617       return CURLUE_OUT_OF_MEMORY;
1618     *part = url;
1619     return CURLUE_OK;
1620   }
1621   default:
1622     ptr = NULL;
1623     break;
1624   }
1625   if(ptr) {
1626     size_t partlen = strlen(ptr);
1627     size_t i = 0;
1628     *part = Curl_memdup0(ptr, partlen);
1629     if(!*part)
1630       return CURLUE_OUT_OF_MEMORY;
1631     if(plusdecode) {
1632       /* convert + to space */
1633       char *plus = *part;
1634       for(i = 0; i < partlen; ++plus, i++) {
1635         if(*plus == '+')
1636           *plus = ' ';
1637       }
1638     }
1639     if(urldecode) {
1640       char *decoded;
1641       size_t dlen;
1642       /* this unconditional rejection of control bytes is documented
1643          API behavior */
1644       CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1645       free(*part);
1646       if(res) {
1647         *part = NULL;
1648         return CURLUE_URLDECODE;
1649       }
1650       *part = decoded;
1651       partlen = dlen;
1652     }
1653     if(urlencode) {
1654       struct dynbuf enc;
1655       CURLUcode uc;
1656       Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1657       uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1658       if(uc)
1659         return uc;
1660       free(*part);
1661       *part = Curl_dyn_ptr(&enc);
1662     }
1663     else if(punycode) {
1664       if(!Curl_is_ASCII_name(u->host)) {
1665 #ifndef USE_IDN
1666         return CURLUE_LACKS_IDN;
1667 #else
1668         char *allochost;
1669         CURLcode result = Curl_idn_decode(*part, &allochost);
1670         if(result)
1671           return (result == CURLE_OUT_OF_MEMORY) ?
1672             CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1673         free(*part);
1674         *part = allochost;
1675 #endif
1676       }
1677     }
1678     else if(depunyfy) {
1679       if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
1680 #ifndef USE_IDN
1681         return CURLUE_LACKS_IDN;
1682 #else
1683         char *allochost;
1684         CURLcode result = Curl_idn_encode(*part, &allochost);
1685         if(result)
1686           return (result == CURLE_OUT_OF_MEMORY) ?
1687             CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1688         free(*part);
1689         *part = allochost;
1690 #endif
1691       }
1692     }
1693 
1694     return CURLUE_OK;
1695   }
1696   else
1697     return ifmissing;
1698 }
1699 
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1700 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1701                        const char *part, unsigned int flags)
1702 {
1703   char **storep = NULL;
1704   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1705   bool plusencode = FALSE;
1706   bool urlskipslash = FALSE;
1707   bool leadingslash = FALSE;
1708   bool appendquery = FALSE;
1709   bool equalsencode = FALSE;
1710   size_t nalloc;
1711 
1712   if(!u)
1713     return CURLUE_BAD_HANDLE;
1714   if(!part) {
1715     /* setting a part to NULL clears it */
1716     switch(what) {
1717     case CURLUPART_URL:
1718       break;
1719     case CURLUPART_SCHEME:
1720       storep = &u->scheme;
1721       break;
1722     case CURLUPART_USER:
1723       storep = &u->user;
1724       break;
1725     case CURLUPART_PASSWORD:
1726       storep = &u->password;
1727       break;
1728     case CURLUPART_OPTIONS:
1729       storep = &u->options;
1730       break;
1731     case CURLUPART_HOST:
1732       storep = &u->host;
1733       break;
1734     case CURLUPART_ZONEID:
1735       storep = &u->zoneid;
1736       break;
1737     case CURLUPART_PORT:
1738       u->portnum = 0;
1739       storep = &u->port;
1740       break;
1741     case CURLUPART_PATH:
1742       storep = &u->path;
1743       break;
1744     case CURLUPART_QUERY:
1745       storep = &u->query;
1746       u->query_present = FALSE;
1747       break;
1748     case CURLUPART_FRAGMENT:
1749       storep = &u->fragment;
1750       u->fragment_present = FALSE;
1751       break;
1752     default:
1753       return CURLUE_UNKNOWN_PART;
1754     }
1755     if(storep && *storep) {
1756       Curl_safefree(*storep);
1757     }
1758     else if(!storep) {
1759       free_urlhandle(u);
1760       memset(u, 0, sizeof(struct Curl_URL));
1761     }
1762     return CURLUE_OK;
1763   }
1764 
1765   nalloc = strlen(part);
1766   if(nalloc > CURL_MAX_INPUT_LENGTH)
1767     /* excessive input length */
1768     return CURLUE_MALFORMED_INPUT;
1769 
1770   switch(what) {
1771   case CURLUPART_SCHEME: {
1772     size_t plen = strlen(part);
1773     const char *s = part;
1774     if((plen > MAX_SCHEME_LEN) || (plen < 1))
1775       /* too long or too short */
1776       return CURLUE_BAD_SCHEME;
1777    /* verify that it is a fine scheme */
1778     if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1779       return CURLUE_UNSUPPORTED_SCHEME;
1780     storep = &u->scheme;
1781     urlencode = FALSE; /* never */
1782     if(ISALPHA(*s)) {
1783       /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1784       while(--plen) {
1785         if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1786           s++; /* fine */
1787         else
1788           return CURLUE_BAD_SCHEME;
1789       }
1790     }
1791     else
1792       return CURLUE_BAD_SCHEME;
1793     break;
1794   }
1795   case CURLUPART_USER:
1796     storep = &u->user;
1797     break;
1798   case CURLUPART_PASSWORD:
1799     storep = &u->password;
1800     break;
1801   case CURLUPART_OPTIONS:
1802     storep = &u->options;
1803     break;
1804   case CURLUPART_HOST:
1805     storep = &u->host;
1806     Curl_safefree(u->zoneid);
1807     break;
1808   case CURLUPART_ZONEID:
1809     storep = &u->zoneid;
1810     break;
1811   case CURLUPART_PORT:
1812     if(!ISDIGIT(part[0]))
1813       /* not a number */
1814       return CURLUE_BAD_PORT_NUMBER;
1815     else {
1816       char *tmp;
1817       char *endp;
1818       unsigned long port;
1819       errno = 0;
1820       port = strtoul(part, &endp, 10);  /* must be decimal */
1821       if(errno || (port > 0xffff) || *endp)
1822         /* weirdly provided number, not good! */
1823         return CURLUE_BAD_PORT_NUMBER;
1824       tmp = strdup(part);
1825       if(!tmp)
1826         return CURLUE_OUT_OF_MEMORY;
1827       free(u->port);
1828       u->port = tmp;
1829       u->portnum = (unsigned short)port;
1830       return CURLUE_OK;
1831     }
1832   case CURLUPART_PATH:
1833     urlskipslash = TRUE;
1834     leadingslash = TRUE; /* enforce */
1835     storep = &u->path;
1836     break;
1837   case CURLUPART_QUERY:
1838     plusencode = urlencode;
1839     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1840     equalsencode = appendquery;
1841     storep = &u->query;
1842     u->query_present = TRUE;
1843     break;
1844   case CURLUPART_FRAGMENT:
1845     storep = &u->fragment;
1846     u->fragment_present = TRUE;
1847     break;
1848   case CURLUPART_URL: {
1849     /*
1850      * Allow a new URL to replace the existing (if any) contents.
1851      *
1852      * If the existing contents is enough for a URL, allow a relative URL to
1853      * replace it.
1854      */
1855     CURLcode result;
1856     CURLUcode uc;
1857     char *oldurl;
1858     char *redired_url;
1859 
1860     if(!nalloc)
1861       /* a blank URL is not a valid URL */
1862       return CURLUE_MALFORMED_INPUT;
1863 
1864     /* if the new thing is absolute or the old one is not
1865      * (we could not get an absolute url in 'oldurl'),
1866      * then replace the existing with the new. */
1867     if(Curl_is_absolute_url(part, NULL, 0,
1868                             flags & (CURLU_GUESS_SCHEME|
1869                                      CURLU_DEFAULT_SCHEME))
1870        || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1871       return parseurl_and_replace(part, u, flags);
1872     }
1873 
1874     /* apply the relative part to create a new URL
1875      * and replace the existing one with it. */
1876     result = concat_url(oldurl, part, &redired_url);
1877     free(oldurl);
1878     if(result)
1879       return cc2cu(result);
1880 
1881     uc = parseurl_and_replace(redired_url, u, flags);
1882     free(redired_url);
1883     return uc;
1884   }
1885   default:
1886     return CURLUE_UNKNOWN_PART;
1887   }
1888   DEBUGASSERT(storep);
1889   {
1890     const char *newp;
1891     struct dynbuf enc;
1892     Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1893 
1894     if(leadingslash && (part[0] != '/')) {
1895       CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1896       if(result)
1897         return cc2cu(result);
1898     }
1899     if(urlencode) {
1900       const unsigned char *i;
1901 
1902       for(i = (const unsigned char *)part; *i; i++) {
1903         CURLcode result;
1904         if((*i == ' ') && plusencode) {
1905           result = Curl_dyn_addn(&enc, "+", 1);
1906           if(result)
1907             return CURLUE_OUT_OF_MEMORY;
1908         }
1909         else if(ISUNRESERVED(*i) ||
1910                 ((*i == '/') && urlskipslash) ||
1911                 ((*i == '=') && equalsencode)) {
1912           if((*i == '=') && equalsencode)
1913             /* only skip the first equals sign */
1914             equalsencode = FALSE;
1915           result = Curl_dyn_addn(&enc, i, 1);
1916           if(result)
1917             return cc2cu(result);
1918         }
1919         else {
1920           char out[3]={'%'};
1921           out[1] = hexdigits[*i>>4];
1922           out[2] = hexdigits[*i & 0xf];
1923           result = Curl_dyn_addn(&enc, out, 3);
1924           if(result)
1925             return cc2cu(result);
1926         }
1927       }
1928     }
1929     else {
1930       char *p;
1931       CURLcode result = Curl_dyn_add(&enc, part);
1932       if(result)
1933         return cc2cu(result);
1934       p = Curl_dyn_ptr(&enc);
1935       while(*p) {
1936         /* make sure percent encoded are lower case */
1937         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1938            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1939           p[1] = Curl_raw_tolower(p[1]);
1940           p[2] = Curl_raw_tolower(p[2]);
1941           p += 3;
1942         }
1943         else
1944           p++;
1945       }
1946     }
1947     newp = Curl_dyn_ptr(&enc);
1948 
1949     if(appendquery && newp) {
1950       /* Append the 'newp' string onto the old query. Add a '&' separator if
1951          none is present at the end of the existing query already */
1952 
1953       size_t querylen = u->query ? strlen(u->query) : 0;
1954       bool addamperand = querylen && (u->query[querylen -1] != '&');
1955       if(querylen) {
1956         struct dynbuf qbuf;
1957         Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1958 
1959         if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1960           goto nomem;
1961 
1962         if(addamperand) {
1963           if(Curl_dyn_addn(&qbuf, "&", 1))
1964             goto nomem;
1965         }
1966         if(Curl_dyn_add(&qbuf, newp))
1967           goto nomem;
1968         Curl_dyn_free(&enc);
1969         free(*storep);
1970         *storep = Curl_dyn_ptr(&qbuf);
1971         return CURLUE_OK;
1972 nomem:
1973         Curl_dyn_free(&enc);
1974         return CURLUE_OUT_OF_MEMORY;
1975       }
1976     }
1977 
1978     else if(what == CURLUPART_HOST) {
1979       size_t n = Curl_dyn_len(&enc);
1980       if(!n && (flags & CURLU_NO_AUTHORITY)) {
1981         /* Skip hostname check, it's allowed to be empty. */
1982       }
1983       else {
1984         if(!n || hostname_check(u, (char *)newp, n)) {
1985           Curl_dyn_free(&enc);
1986           return CURLUE_BAD_HOSTNAME;
1987         }
1988       }
1989     }
1990 
1991     free(*storep);
1992     *storep = (char *)newp;
1993   }
1994   return CURLUE_OK;
1995 }
1996