• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 #include "curl_memrchr.h"
38 
39 /* The last 3 #include files should be in this order */
40 #include "curl_printf.h"
41 #include "curl_memory.h"
42 #include "memdebug.h"
43 
44   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45 #define STARTS_WITH_DRIVE_PREFIX(str) \
46   ((('a' <= str[0] && str[0] <= 'z') || \
47     ('A' <= str[0] && str[0] <= 'Z')) && \
48    (str[1] == ':'))
49 
50   /* MSDOS/Windows style drive prefix, optionally with
51    * a '|' instead of ':', followed by a slash or NUL */
52 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55    ((str)[1] == ':' || (str)[1] == '|') && \
56    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57 
58 /* scheme is not URL encoded, the longest libcurl supported ones are... */
59 #define MAX_SCHEME_LEN 40
60 
61 /*
62  * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63  * sure we have _some_ value for AF_INET6 without polluting our fake value
64  * everywhere.
65  */
66 #if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67 #define AF_INET6 (AF_INET + 1)
68 #endif
69 
70 /* Internal representation of CURLU. Point to URL-encoded strings. */
71 struct Curl_URL {
72   char *scheme;
73   char *user;
74   char *password;
75   char *options; /* IMAP only? */
76   char *host;
77   char *zoneid; /* for numerical IPv6 addresses */
78   char *port;
79   char *path;
80   char *query;
81   char *fragment;
82   long portnum; /* the numerical version */
83 };
84 
85 #define DEFAULT_SCHEME "https"
86 
free_urlhandle(struct Curl_URL * u)87 static void free_urlhandle(struct Curl_URL *u)
88 {
89   free(u->scheme);
90   free(u->user);
91   free(u->password);
92   free(u->options);
93   free(u->host);
94   free(u->zoneid);
95   free(u->port);
96   free(u->path);
97   free(u->query);
98   free(u->fragment);
99 }
100 
101 /*
102  * Find the separator at the end of the host name, or the '?' in cases like
103  * http://www.example.com?id=2380
104  */
find_host_sep(const char * url)105 static const char *find_host_sep(const char *url)
106 {
107   const char *sep;
108   const char *query;
109 
110   /* Find the start of the hostname */
111   sep = strstr(url, "//");
112   if(!sep)
113     sep = url;
114   else
115     sep += 2;
116 
117   query = strchr(sep, '?');
118   sep = strchr(sep, '/');
119 
120   if(!sep)
121     sep = url + strlen(url);
122 
123   if(!query)
124     query = url + strlen(url);
125 
126   return sep < query ? sep : query;
127 }
128 
129 /*
130  * Decide whether a character in a URL must be escaped.
131  */
132 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
133 
134 static const char hexdigits[] = "0123456789abcdef";
135 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
136  * spaces in the source URL accordingly.
137  *
138  * URL encoding should be skipped for host names, otherwise IDN resolution
139  * will fail.
140  */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)141 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
142                                size_t len, bool relative,
143                                bool query)
144 {
145   /* we must add this with whitespace-replacing */
146   bool left = !query;
147   const unsigned char *iptr;
148   const unsigned char *host_sep = (const unsigned char *) url;
149 
150   if(!relative)
151     host_sep = (const unsigned char *) find_host_sep(url);
152 
153   for(iptr = (unsigned char *)url;    /* read from here */
154       len; iptr++, len--) {
155 
156     if(iptr < host_sep) {
157       if(Curl_dyn_addn(o, iptr, 1))
158         return CURLUE_OUT_OF_MEMORY;
159       continue;
160     }
161 
162     if(*iptr == ' ') {
163       if(left) {
164         if(Curl_dyn_addn(o, "%20", 3))
165           return CURLUE_OUT_OF_MEMORY;
166       }
167       else {
168         if(Curl_dyn_addn(o, "+", 1))
169           return CURLUE_OUT_OF_MEMORY;
170       }
171       continue;
172     }
173 
174     if(*iptr == '?')
175       left = FALSE;
176 
177     if(urlchar_needs_escaping(*iptr)) {
178       char out[3]={'%'};
179       out[1] = hexdigits[*iptr>>4];
180       out[2] = hexdigits[*iptr & 0xf];
181       if(Curl_dyn_addn(o, out, 3))
182         return CURLUE_OUT_OF_MEMORY;
183     }
184     else {
185       if(Curl_dyn_addn(o, iptr, 1))
186         return CURLUE_OUT_OF_MEMORY;
187     }
188   }
189 
190   return CURLUE_OK;
191 }
192 
193 /*
194  * Returns the length of the scheme if the given URL is absolute (as opposed
195  * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
196  * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
197  *
198  * If 'guess_scheme' is TRUE, it means the URL might be provided without
199  * scheme.
200  */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)201 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
202                             bool guess_scheme)
203 {
204   int i = 0;
205   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
206   (void)buflen; /* only used in debug-builds */
207   if(buf)
208     buf[0] = 0; /* always leave a defined value in buf */
209 #ifdef WIN32
210   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
211     return 0;
212 #endif
213   if(ISALPHA(url[0]))
214     for(i = 1; i < MAX_SCHEME_LEN; ++i) {
215       char s = url[i];
216       if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
217         /* RFC 3986 3.1 explains:
218            scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
219         */
220       }
221       else {
222         break;
223       }
224     }
225   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
226     /* If this does not guess scheme, the scheme always ends with the colon so
227        that this also detects data: URLs etc. In guessing mode, data: could
228        be the host name "data" with a specified port number. */
229 
230     /* the length of the scheme is the name part only */
231     size_t len = i;
232     if(buf) {
233       buf[i] = 0;
234       while(i--) {
235         buf[i] = Curl_raw_tolower(url[i]);
236       }
237     }
238     return len;
239   }
240   return 0;
241 }
242 
243 /*
244  * Concatenate a relative URL to a base URL making it absolute.
245  * URL-encodes any spaces.
246  * The returned pointer must be freed by the caller unless NULL
247  * (returns NULL on out of memory).
248  *
249  * Note that this function destroys the 'base' string.
250  */
concat_url(char * base,const char * relurl)251 static char *concat_url(char *base, const char *relurl)
252 {
253   /***
254    TRY to append this new path to the old URL
255    to the right of the host part. Oh crap, this is doomed to cause
256    problems in the future...
257   */
258   struct dynbuf newest;
259   char *protsep;
260   char *pathsep;
261   bool host_changed = FALSE;
262   const char *useurl = relurl;
263 
264   /* protsep points to the start of the host name */
265   protsep = strstr(base, "//");
266   if(!protsep)
267     protsep = base;
268   else
269     protsep += 2; /* pass the slashes */
270 
271   if('/' != relurl[0]) {
272     int level = 0;
273 
274     /* First we need to find out if there's a ?-letter in the URL,
275        and cut it and the right-side of that off */
276     pathsep = strchr(protsep, '?');
277     if(pathsep)
278       *pathsep = 0;
279 
280     /* we have a relative path to append to the last slash if there's one
281        available, or if the new URL is just a query string (starts with a
282        '?')  we append the new one at the end of the entire currently worked
283        out URL */
284     if(useurl[0] != '?') {
285       pathsep = strrchr(protsep, '/');
286       if(pathsep)
287         *pathsep = 0;
288     }
289 
290     /* Check if there's any slash after the host name, and if so, remember
291        that position instead */
292     pathsep = strchr(protsep, '/');
293     if(pathsep)
294       protsep = pathsep + 1;
295     else
296       protsep = NULL;
297 
298     /* now deal with one "./" or any amount of "../" in the newurl
299        and act accordingly */
300 
301     if((useurl[0] == '.') && (useurl[1] == '/'))
302       useurl += 2; /* just skip the "./" */
303 
304     while((useurl[0] == '.') &&
305           (useurl[1] == '.') &&
306           (useurl[2] == '/')) {
307       level++;
308       useurl += 3; /* pass the "../" */
309     }
310 
311     if(protsep) {
312       while(level--) {
313         /* cut off one more level from the right of the original URL */
314         pathsep = strrchr(protsep, '/');
315         if(pathsep)
316           *pathsep = 0;
317         else {
318           *protsep = 0;
319           break;
320         }
321       }
322     }
323   }
324   else {
325     /* We got a new absolute path for this server */
326 
327     if(relurl[1] == '/') {
328       /* the new URL starts with //, just keep the protocol part from the
329          original one */
330       *protsep = 0;
331       useurl = &relurl[2]; /* we keep the slashes from the original, so we
332                               skip the new ones */
333       host_changed = TRUE;
334     }
335     else {
336       /* cut off the original URL from the first slash, or deal with URLs
337          without slash */
338       pathsep = strchr(protsep, '/');
339       if(pathsep) {
340         /* When people use badly formatted URLs, such as
341            "http://www.example.com?dir=/home/daniel" we must not use the first
342            slash, if there's a ?-letter before it! */
343         char *sep = strchr(protsep, '?');
344         if(sep && (sep < pathsep))
345           pathsep = sep;
346         *pathsep = 0;
347       }
348       else {
349         /* There was no slash. Now, since we might be operating on a badly
350            formatted URL, such as "http://www.example.com?id=2380" which
351            doesn't use a slash separator as it is supposed to, we need to check
352            for a ?-letter as well! */
353         pathsep = strchr(protsep, '?');
354         if(pathsep)
355           *pathsep = 0;
356       }
357     }
358   }
359 
360   Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
361 
362   /* copy over the root url part */
363   if(Curl_dyn_add(&newest, base))
364     return NULL;
365 
366   /* check if we need to append a slash */
367   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
368     ;
369   else {
370     if(Curl_dyn_addn(&newest, "/", 1))
371       return NULL;
372   }
373 
374   /* then append the new piece on the right side */
375   urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
376 
377   return Curl_dyn_ptr(&newest);
378 }
379 
380 /* scan for byte values <= 31, 127 and sometimes space */
junkscan(const char * url,size_t * urllen,unsigned int flags)381 static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
382 {
383   static const char badbytes[]={
384     /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
385     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
386     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
387     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
388     0x7f, 0x00 /* null-terminate */
389   };
390   size_t n = strlen(url);
391   size_t nfine;
392 
393   if(n > CURL_MAX_INPUT_LENGTH)
394     /* excessive input length */
395     return CURLUE_MALFORMED_INPUT;
396 
397   nfine = strcspn(url, badbytes);
398   if((nfine != n) ||
399      (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
400     return CURLUE_MALFORMED_INPUT;
401 
402   *urllen = n;
403   return CURLUE_OK;
404 }
405 
406 /*
407  * parse_hostname_login()
408  *
409  * Parse the login details (user name, password and options) from the URL and
410  * strip them out of the host name
411  *
412  */
parse_hostname_login(struct Curl_URL * u,const char * login,size_t len,unsigned int flags,size_t * offset)413 static CURLUcode parse_hostname_login(struct Curl_URL *u,
414                                       const char *login,
415                                       size_t len,
416                                       unsigned int flags,
417                                       size_t *offset) /* to the host name */
418 {
419   CURLUcode result = CURLUE_OK;
420   CURLcode ccode;
421   char *userp = NULL;
422   char *passwdp = NULL;
423   char *optionsp = NULL;
424   const struct Curl_handler *h = NULL;
425 
426   /* At this point, we assume all the other special cases have been taken
427    * care of, so the host is at most
428    *
429    *   [user[:password][;options]]@]hostname
430    *
431    * We need somewhere to put the embedded details, so do that first.
432    */
433   char *ptr;
434 
435   DEBUGASSERT(login);
436 
437   *offset = 0;
438   ptr = memchr(login, '@', len);
439   if(!ptr)
440     goto out;
441 
442   /* We will now try to extract the
443    * possible login information in a string like:
444    * ftp://user:password@ftp.my.site:8021/README */
445   ptr++;
446 
447   /* if this is a known scheme, get some details */
448   if(u->scheme)
449     h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
450 
451   /* We could use the login information in the URL so extract it. Only parse
452      options if the handler says we should. Note that 'h' might be NULL! */
453   ccode = Curl_parse_login_details(login, ptr - login - 1,
454                                    &userp, &passwdp,
455                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
456                                    &optionsp:NULL);
457   if(ccode) {
458     result = CURLUE_BAD_LOGIN;
459     goto out;
460   }
461 
462   if(userp) {
463     if(flags & CURLU_DISALLOW_USER) {
464       /* Option DISALLOW_USER is set and url contains username. */
465       result = CURLUE_USER_NOT_ALLOWED;
466       goto out;
467     }
468     free(u->user);
469     u->user = userp;
470   }
471 
472   if(passwdp) {
473     free(u->password);
474     u->password = passwdp;
475   }
476 
477   if(optionsp) {
478     free(u->options);
479     u->options = optionsp;
480   }
481 
482   /* the host name starts at this offset */
483   *offset = ptr - login;
484   return CURLUE_OK;
485 
486 out:
487 
488   free(userp);
489   free(passwdp);
490   free(optionsp);
491   u->user = NULL;
492   u->password = NULL;
493   u->options = NULL;
494 
495   return result;
496 }
497 
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)498 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
499                                    bool has_scheme)
500 {
501   char *portptr;
502   char *hostname = Curl_dyn_ptr(host);
503   /*
504    * Find the end of an IPv6 address on the ']' ending bracket.
505    */
506   if(hostname[0] == '[') {
507     portptr = strchr(hostname, ']');
508     if(!portptr)
509       return CURLUE_BAD_IPV6;
510     portptr++;
511     /* this is a RFC2732-style specified IP-address */
512     if(*portptr) {
513       if(*portptr != ':')
514         return CURLUE_BAD_PORT_NUMBER;
515     }
516     else
517       portptr = NULL;
518   }
519   else
520     portptr = strchr(hostname, ':');
521 
522   if(portptr) {
523     char *rest;
524     long port;
525     size_t keep = portptr - hostname;
526 
527     /* Browser behavior adaptation. If there's a colon with no digits after,
528        just cut off the name there which makes us ignore the colon and just
529        use the default port. Firefox, Chrome and Safari all do that.
530 
531        Don't do it if the URL has no scheme, to make something that looks like
532        a scheme not work!
533     */
534     Curl_dyn_setlen(host, keep);
535     portptr++;
536     if(!*portptr)
537       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
538 
539     if(!ISDIGIT(*portptr))
540       return CURLUE_BAD_PORT_NUMBER;
541 
542     port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
543 
544     if(port > 0xffff)
545       return CURLUE_BAD_PORT_NUMBER;
546 
547     if(rest[0])
548       return CURLUE_BAD_PORT_NUMBER;
549 
550     u->portnum = port;
551     /* generate a new port number string to get rid of leading zeroes etc */
552     free(u->port);
553     u->port = aprintf("%ld", port);
554     if(!u->port)
555       return CURLUE_OUT_OF_MEMORY;
556   }
557 
558   return CURLUE_OK;
559 }
560 
561 /* this assumes 'hostname' now starts with [ */
ipv6_parse(struct Curl_URL * u,char * hostname,size_t hlen)562 static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
563                             size_t hlen) /* length of hostname */
564 {
565   size_t len;
566   DEBUGASSERT(*hostname == '[');
567   if(hlen < 4) /* '[::]' is the shortest possible valid string */
568     return CURLUE_BAD_IPV6;
569   hostname++;
570   hlen -= 2;
571 
572   /* only valid IPv6 letters are ok */
573   len = strspn(hostname, "0123456789abcdefABCDEF:.");
574 
575   if(hlen != len) {
576     hlen = len;
577     if(hostname[len] == '%') {
578       /* this could now be '%[zone id]' */
579       char zoneid[16];
580       int i = 0;
581       char *h = &hostname[len + 1];
582       /* pass '25' if present and is a url encoded percent sign */
583       if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
584         h += 2;
585       while(*h && (*h != ']') && (i < 15))
586         zoneid[i++] = *h++;
587       if(!i || (']' != *h))
588         return CURLUE_BAD_IPV6;
589       zoneid[i] = 0;
590       u->zoneid = strdup(zoneid);
591       if(!u->zoneid)
592         return CURLUE_OUT_OF_MEMORY;
593       hostname[len] = ']'; /* insert end bracket */
594       hostname[len + 1] = 0; /* terminate the hostname */
595     }
596     else
597       return CURLUE_BAD_IPV6;
598     /* hostname is fine */
599   }
600 
601   /* Check the IPv6 address. */
602   {
603     char dest[16]; /* fits a binary IPv6 address */
604     char norm[MAX_IPADR_LEN];
605     hostname[hlen] = 0; /* end the address there */
606     if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
607       return CURLUE_BAD_IPV6;
608 
609     /* check if it can be done shorter */
610     if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
611        (strlen(norm) < hlen)) {
612       strcpy(hostname, norm);
613       hlen = strlen(norm);
614       hostname[hlen + 1] = 0;
615     }
616     hostname[hlen] = ']'; /* restore ending bracket */
617   }
618   return CURLUE_OK;
619 }
620 
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)621 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
622                                 size_t hlen) /* length of hostname */
623 {
624   size_t len;
625   DEBUGASSERT(hostname);
626 
627   if(!hlen)
628     return CURLUE_NO_HOST;
629   else if(hostname[0] == '[')
630     return ipv6_parse(u, hostname, hlen);
631   else {
632     /* letters from the second string are not ok */
633     len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
634     if(hlen != len)
635       /* hostname with bad content */
636       return CURLUE_BAD_HOSTNAME;
637   }
638   return CURLUE_OK;
639 }
640 
641 /*
642  * Handle partial IPv4 numerical addresses and different bases, like
643  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
644  *
645  * If the given input string is syntactically wrong IPv4 or any part for
646  * example is too big, this function returns HOST_NAME.
647  *
648  * Output the "normalized" version of that input string in plain quad decimal
649  * integers.
650  *
651  * Returns the host type.
652  */
653 
654 #define HOST_ERROR   -1 /* out of memory */
655 #define HOST_BAD     -2 /* bad IPv4 address */
656 
657 #define HOST_NAME    1
658 #define HOST_IPV4    2
659 #define HOST_IPV6    3
660 
ipv4_normalize(struct dynbuf * host)661 static int ipv4_normalize(struct dynbuf *host)
662 {
663   bool done = FALSE;
664   int n = 0;
665   const char *c = Curl_dyn_ptr(host);
666   unsigned long parts[4] = {0, 0, 0, 0};
667   CURLcode result = CURLE_OK;
668 
669   if(*c == '[')
670     return HOST_IPV6;
671 
672   while(!done) {
673     char *endp;
674     unsigned long l;
675     if(!ISDIGIT(*c))
676       /* most importantly this doesn't allow a leading plus or minus */
677       return HOST_NAME;
678     l = strtoul(c, &endp, 0);
679 
680     parts[n] = l;
681     c = endp;
682 
683     switch(*c) {
684     case '.':
685       if(n == 3)
686         return HOST_NAME;
687       n++;
688       c++;
689       break;
690 
691     case '\0':
692       done = TRUE;
693       break;
694 
695     default:
696       return HOST_NAME;
697     }
698 
699     /* overflow */
700     if((l == ULONG_MAX) && (errno == ERANGE))
701       return HOST_NAME;
702 
703 #if SIZEOF_LONG > 4
704     /* a value larger than 32 bits */
705     if(l > UINT_MAX)
706       return HOST_NAME;
707 #endif
708   }
709 
710   switch(n) {
711   case 0: /* a -- 32 bits */
712     Curl_dyn_reset(host);
713 
714     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
715                            parts[0] >> 24, (parts[0] >> 16) & 0xff,
716                            (parts[0] >> 8) & 0xff, parts[0] & 0xff);
717     break;
718   case 1: /* a.b -- 8.24 bits */
719     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
720       return HOST_NAME;
721     Curl_dyn_reset(host);
722     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
723                            parts[0], (parts[1] >> 16) & 0xff,
724                            (parts[1] >> 8) & 0xff, parts[1] & 0xff);
725     break;
726   case 2: /* a.b.c -- 8.8.16 bits */
727     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
728       return HOST_NAME;
729     Curl_dyn_reset(host);
730     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
731                            parts[0], parts[1], (parts[2] >> 8) & 0xff,
732                            parts[2] & 0xff);
733     break;
734   case 3: /* a.b.c.d -- 8.8.8.8 bits */
735     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
736        (parts[3] > 0xff))
737       return HOST_NAME;
738     Curl_dyn_reset(host);
739     result = Curl_dyn_addf(host, "%u.%u.%u.%u",
740                            parts[0], parts[1], parts[2], parts[3]);
741     break;
742   }
743   if(result)
744     return HOST_ERROR;
745   return HOST_IPV4;
746 }
747 
748 /* if necessary, replace the host content with a URL decoded version */
urldecode_host(struct dynbuf * host)749 static CURLUcode urldecode_host(struct dynbuf *host)
750 {
751   char *per = NULL;
752   const char *hostname = Curl_dyn_ptr(host);
753   per = strchr(hostname, '%');
754   if(!per)
755     /* nothing to decode */
756     return CURLUE_OK;
757   else {
758     /* encoded */
759     size_t dlen;
760     char *decoded;
761     CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
762                                      REJECT_CTRL);
763     if(result)
764       return CURLUE_BAD_HOSTNAME;
765     Curl_dyn_reset(host);
766     result = Curl_dyn_addn(host, decoded, dlen);
767     free(decoded);
768     if(result)
769       return CURLUE_OUT_OF_MEMORY;
770   }
771 
772   return CURLUE_OK;
773 }
774 
parse_authority(struct Curl_URL * u,const char * auth,size_t authlen,unsigned int flags,struct dynbuf * host,bool has_scheme)775 static CURLUcode parse_authority(struct Curl_URL *u,
776                                  const char *auth, size_t authlen,
777                                  unsigned int flags,
778                                  struct dynbuf *host,
779                                  bool has_scheme)
780 {
781   size_t offset;
782   CURLUcode result;
783 
784   /*
785    * Parse the login details and strip them out of the host name.
786    */
787   result = parse_hostname_login(u, auth, authlen, flags, &offset);
788   if(result)
789     goto out;
790 
791   if(Curl_dyn_addn(host, auth + offset, authlen - offset)) {
792     result = CURLUE_OUT_OF_MEMORY;
793     goto out;
794   }
795 
796   result = Curl_parse_port(u, host, has_scheme);
797   if(result)
798     goto out;
799 
800   if(!Curl_dyn_len(host))
801     return CURLUE_NO_HOST;
802 
803   switch(ipv4_normalize(host)) {
804   case HOST_IPV4:
805     break;
806   case HOST_IPV6:
807     result = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
808     break;
809   case HOST_NAME:
810     result = urldecode_host(host);
811     if(!result)
812       result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
813     break;
814   case HOST_ERROR:
815     result = CURLUE_OUT_OF_MEMORY;
816     break;
817   case HOST_BAD:
818   default:
819     result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
820     break;
821   }
822 
823 out:
824   return result;
825 }
826 
Curl_url_set_authority(CURLU * u,const char * authority,unsigned int flags)827 CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
828                                  unsigned int flags)
829 {
830   CURLUcode result;
831   struct dynbuf host;
832 
833   DEBUGASSERT(authority);
834   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
835 
836   result = parse_authority(u, authority, strlen(authority), flags,
837                            &host, !!u->scheme);
838   if(result)
839     Curl_dyn_free(&host);
840   else {
841     free(u->host);
842     u->host = Curl_dyn_ptr(&host);
843   }
844   return result;
845 }
846 
847 /*
848  * "Remove Dot Segments"
849  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
850  */
851 
852 /*
853  * dedotdotify()
854  * @unittest: 1395
855  *
856  * This function gets a null-terminated path with dot and dotdot sequences
857  * passed in and strips them off according to the rules in RFC 3986 section
858  * 5.2.4.
859  *
860  * The function handles a query part ('?' + stuff) appended but it expects
861  * that fragments ('#' + stuff) have already been cut off.
862  *
863  * RETURNS
864  *
865  * Zero for success and 'out' set to an allocated dedotdotified string.
866  */
867 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)868 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
869 {
870   char *outptr;
871   const char *endp = &input[clen];
872   char *out;
873 
874   *outp = NULL;
875   /* the path always starts with a slash, and a slash has not dot */
876   if((clen < 2) || !memchr(input, '.', clen))
877     return 0;
878 
879   out = malloc(clen + 1);
880   if(!out)
881     return 1; /* out of memory */
882 
883   *out = 0; /* null-terminates, for inputs like "./" */
884   outptr = out;
885 
886   do {
887     bool dotdot = TRUE;
888     if(*input == '.') {
889       /*  A.  If the input buffer begins with a prefix of "../" or "./", then
890           remove that prefix from the input buffer; otherwise, */
891 
892       if(!strncmp("./", input, 2)) {
893         input += 2;
894         clen -= 2;
895       }
896       else if(!strncmp("../", input, 3)) {
897         input += 3;
898         clen -= 3;
899       }
900       /*  D.  if the input buffer consists only of "." or "..", then remove
901           that from the input buffer; otherwise, */
902 
903       else if(!strcmp(".", input) || !strcmp("..", input) ||
904               !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
905         *out = 0;
906         break;
907       }
908       else
909         dotdot = FALSE;
910     }
911     else if(*input == '/') {
912       /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
913           "."  is a complete path segment, then replace that prefix with "/" in
914           the input buffer; otherwise, */
915       if(!strncmp("/./", input, 3)) {
916         input += 2;
917         clen -= 2;
918       }
919       else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
920         *outptr++ = '/';
921         *outptr = 0;
922         break;
923       }
924 
925       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
926           where ".." is a complete path segment, then replace that prefix with
927           "/" in the input buffer and remove the last segment and its
928           preceding "/" (if any) from the output buffer; otherwise, */
929 
930       else if(!strncmp("/../", input, 4)) {
931         input += 3;
932         clen -= 3;
933         /* remove the last segment from the output buffer */
934         while(outptr > out) {
935           outptr--;
936           if(*outptr == '/')
937             break;
938         }
939         *outptr = 0; /* null-terminate where it stops */
940       }
941       else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
942         /* remove the last segment from the output buffer */
943         while(outptr > out) {
944           outptr--;
945           if(*outptr == '/')
946             break;
947         }
948         *outptr++ = '/';
949         *outptr = 0; /* null-terminate where it stops */
950         break;
951       }
952       else
953         dotdot = FALSE;
954     }
955     else
956       dotdot = FALSE;
957 
958     if(!dotdot) {
959       /*  E.  move the first path segment in the input buffer to the end of
960           the output buffer, including the initial "/" character (if any) and
961           any subsequent characters up to, but not including, the next "/"
962           character or the end of the input buffer. */
963 
964       do {
965         *outptr++ = *input++;
966         clen--;
967       } while(*input && (*input != '/') && (*input != '?'));
968       *outptr = 0;
969     }
970 
971     /* continue until end of path */
972   } while(input < endp);
973 
974   *outp = out;
975   return 0; /* success */
976 }
977 
parseurl(const char * url,CURLU * u,unsigned int flags)978 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
979 {
980   const char *path;
981   size_t pathlen;
982   char *query = NULL;
983   char *fragment = NULL;
984   char schemebuf[MAX_SCHEME_LEN + 1];
985   size_t schemelen = 0;
986   size_t urllen;
987   CURLUcode result = CURLUE_OK;
988   size_t fraglen = 0;
989   struct dynbuf host;
990 
991   DEBUGASSERT(url);
992 
993   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
994 
995   result = junkscan(url, &urllen, flags);
996   if(result)
997     goto fail;
998 
999   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1000                                    flags & (CURLU_GUESS_SCHEME|
1001                                             CURLU_DEFAULT_SCHEME));
1002 
1003   /* handle the file: scheme */
1004   if(schemelen && !strcmp(schemebuf, "file")) {
1005     bool uncpath = FALSE;
1006     if(urllen <= 6) {
1007       /* file:/ is not enough to actually be a complete file: URL */
1008       result = CURLUE_BAD_FILE_URL;
1009       goto fail;
1010     }
1011 
1012     /* path has been allocated large enough to hold this */
1013     path = (char *)&url[5];
1014     pathlen = urllen - 5;
1015 
1016     u->scheme = strdup("file");
1017     if(!u->scheme) {
1018       result = CURLUE_OUT_OF_MEMORY;
1019       goto fail;
1020     }
1021 
1022     /* Extra handling URLs with an authority component (i.e. that start with
1023      * "file://")
1024      *
1025      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1026      * RFC 8089, but not the (current) WHAT-WG URL spec.
1027      */
1028     if(path[0] == '/' && path[1] == '/') {
1029       /* swallow the two slashes */
1030       const char *ptr = &path[2];
1031 
1032       /*
1033        * According to RFC 8089, a file: URL can be reliably dereferenced if:
1034        *
1035        *  o it has no/blank hostname, or
1036        *
1037        *  o the hostname matches "localhost" (case-insensitively), or
1038        *
1039        *  o the hostname is a FQDN that resolves to this machine, or
1040        *
1041        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
1042        *    Appendix E.3).
1043        *
1044        * For brevity, we only consider URLs with empty, "localhost", or
1045        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1046        *
1047        * Additionally, there is an exception for URLs with a Windows drive
1048        * letter in the authority (which was accidentally omitted from RFC 8089
1049        * Appendix E, but believe me, it was meant to be there. --MK)
1050        */
1051       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1052         /* the URL includes a host name, it must match "localhost" or
1053            "127.0.0.1" to be valid */
1054         if(checkprefix("localhost/", ptr) ||
1055            checkprefix("127.0.0.1/", ptr)) {
1056           ptr += 9; /* now points to the slash after the host */
1057         }
1058         else {
1059 #if defined(WIN32)
1060           size_t len;
1061 
1062           /* the host name, NetBIOS computer name, can not contain disallowed
1063              chars, and the delimiting slash character must be appended to the
1064              host name */
1065           path = strpbrk(ptr, "/\\:*?\"<>|");
1066           if(!path || *path != '/') {
1067             result = CURLUE_BAD_FILE_URL;
1068             goto fail;
1069           }
1070 
1071           len = path - ptr;
1072           if(len) {
1073             if(Curl_dyn_addn(&host, ptr, len)) {
1074               result = CURLUE_OUT_OF_MEMORY;
1075               goto fail;
1076             }
1077             uncpath = TRUE;
1078           }
1079 
1080           ptr -= 2; /* now points to the // before the host in UNC */
1081 #else
1082           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1083              none */
1084           result = CURLUE_BAD_FILE_URL;
1085           goto fail;
1086 #endif
1087         }
1088       }
1089 
1090       path = ptr;
1091       pathlen = urllen - (ptr - url);
1092     }
1093 
1094     if(!uncpath)
1095       /* no host for file: URLs by default */
1096       Curl_dyn_reset(&host);
1097 
1098 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1099     /* Don't allow Windows drive letters when not in Windows.
1100      * This catches both "file:/c:" and "file:c:" */
1101     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1102        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1103       /* File drive letters are only accepted in MSDOS/Windows */
1104       result = CURLUE_BAD_FILE_URL;
1105       goto fail;
1106     }
1107 #else
1108     /* If the path starts with a slash and a drive letter, ditch the slash */
1109     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1110       /* This cannot be done with strcpy, as the memory chunks overlap! */
1111       path++;
1112       pathlen--;
1113     }
1114 #endif
1115 
1116   }
1117   else {
1118     /* clear path */
1119     const char *schemep = NULL;
1120     const char *hostp;
1121     size_t hostlen;
1122 
1123     if(schemelen) {
1124       int i = 0;
1125       const char *p = &url[schemelen + 1];
1126       while((*p == '/') && (i < 4)) {
1127         p++;
1128         i++;
1129       }
1130 
1131       schemep = schemebuf;
1132       if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1133          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1134         result = CURLUE_UNSUPPORTED_SCHEME;
1135         goto fail;
1136       }
1137 
1138       if((i < 1) || (i > 3)) {
1139         /* less than one or more than three slashes */
1140         result = CURLUE_BAD_SLASHES;
1141         goto fail;
1142       }
1143       hostp = p; /* host name starts here */
1144     }
1145     else {
1146       /* no scheme! */
1147 
1148       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1149         result = CURLUE_BAD_SCHEME;
1150         goto fail;
1151       }
1152       if(flags & CURLU_DEFAULT_SCHEME)
1153         schemep = DEFAULT_SCHEME;
1154 
1155       /*
1156        * The URL was badly formatted, let's try without scheme specified.
1157        */
1158       hostp = url;
1159     }
1160 
1161     if(schemep) {
1162       u->scheme = strdup(schemep);
1163       if(!u->scheme) {
1164         result = CURLUE_OUT_OF_MEMORY;
1165         goto fail;
1166       }
1167     }
1168 
1169     /* find the end of the host name + port number */
1170     hostlen = strcspn(hostp, "/?#");
1171     path = &hostp[hostlen];
1172 
1173     /* this pathlen also contains the query and the fragment */
1174     pathlen = urllen - (path - url);
1175     if(hostlen) {
1176 
1177       result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1178       if(result)
1179         goto fail;
1180 
1181       if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1182         const char *hostname = Curl_dyn_ptr(&host);
1183         /* legacy curl-style guess based on host name */
1184         if(checkprefix("ftp.", hostname))
1185           schemep = "ftp";
1186         else if(checkprefix("dict.", hostname))
1187           schemep = "dict";
1188         else if(checkprefix("ldap.", hostname))
1189           schemep = "ldap";
1190         else if(checkprefix("imap.", hostname))
1191           schemep = "imap";
1192         else if(checkprefix("smtp.", hostname))
1193           schemep = "smtp";
1194         else if(checkprefix("pop3.", hostname))
1195           schemep = "pop3";
1196         else
1197           schemep = "http";
1198 
1199         u->scheme = strdup(schemep);
1200         if(!u->scheme) {
1201           result = CURLUE_OUT_OF_MEMORY;
1202           goto fail;
1203         }
1204       }
1205     }
1206     else if(flags & CURLU_NO_AUTHORITY) {
1207       /* allowed to be empty. */
1208       if(Curl_dyn_add(&host, "")) {
1209         result = CURLUE_OUT_OF_MEMORY;
1210         goto fail;
1211       }
1212     }
1213     else {
1214       result = CURLUE_NO_HOST;
1215       goto fail;
1216     }
1217   }
1218 
1219   fragment = strchr(path, '#');
1220   if(fragment) {
1221     fraglen = pathlen - (fragment - path);
1222     if(fraglen > 1) {
1223       /* skip the leading '#' in the copy but include the terminating null */
1224       if(flags & CURLU_URLENCODE) {
1225         struct dynbuf enc;
1226         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1227         if(urlencode_str(&enc, fragment + 1, fraglen, TRUE, FALSE)) {
1228           result = CURLUE_OUT_OF_MEMORY;
1229           goto fail;
1230         }
1231         u->fragment = Curl_dyn_ptr(&enc);
1232       }
1233       else {
1234         u->fragment = Curl_memdup(fragment + 1, fraglen);
1235         if(!u->fragment) {
1236           result = CURLUE_OUT_OF_MEMORY;
1237           goto fail;
1238         }
1239       }
1240     }
1241     /* after this, pathlen still contains the query */
1242     pathlen -= fraglen;
1243   }
1244 
1245   DEBUGASSERT(pathlen < urllen);
1246   query = memchr(path, '?', pathlen);
1247   if(query) {
1248     size_t qlen = fragment ? (size_t)(fragment - query) :
1249       pathlen - (query - path);
1250     pathlen -= qlen;
1251     if(qlen > 1) {
1252       if(flags & CURLU_URLENCODE) {
1253         struct dynbuf enc;
1254         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1255         /* skip the leading question mark */
1256         if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1257           result = CURLUE_OUT_OF_MEMORY;
1258           goto fail;
1259         }
1260         u->query = Curl_dyn_ptr(&enc);
1261       }
1262       else {
1263         u->query = Curl_memdup(query + 1, qlen);
1264         if(!u->query) {
1265           result = CURLUE_OUT_OF_MEMORY;
1266           goto fail;
1267         }
1268         u->query[qlen - 1] = 0;
1269       }
1270     }
1271     else {
1272       /* single byte query */
1273       u->query = strdup("");
1274       if(!u->query) {
1275         result = CURLUE_OUT_OF_MEMORY;
1276         goto fail;
1277       }
1278     }
1279   }
1280 
1281   if(pathlen && (flags & CURLU_URLENCODE)) {
1282     struct dynbuf enc;
1283     Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1284     if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1285       result = CURLUE_OUT_OF_MEMORY;
1286       goto fail;
1287     }
1288     pathlen = Curl_dyn_len(&enc);
1289     path = u->path = Curl_dyn_ptr(&enc);
1290   }
1291 
1292   if(pathlen <= 1) {
1293     /* there is no path left or just the slash, unset */
1294     path = NULL;
1295   }
1296   else {
1297     if(!u->path) {
1298       u->path = Curl_memdup(path, pathlen + 1);
1299       if(!u->path) {
1300         result = CURLUE_OUT_OF_MEMORY;
1301         goto fail;
1302       }
1303       u->path[pathlen] = 0;
1304       path = u->path;
1305     }
1306     else if(flags & CURLU_URLENCODE)
1307       /* it might have encoded more than just the path so cut it */
1308       u->path[pathlen] = 0;
1309 
1310     if(!(flags & CURLU_PATH_AS_IS)) {
1311       /* remove ../ and ./ sequences according to RFC3986 */
1312       char *dedot;
1313       int err = dedotdotify((char *)path, pathlen, &dedot);
1314       if(err) {
1315         result = CURLUE_OUT_OF_MEMORY;
1316         goto fail;
1317       }
1318       if(dedot) {
1319         free(u->path);
1320         u->path = dedot;
1321       }
1322     }
1323   }
1324 
1325   u->host = Curl_dyn_ptr(&host);
1326 
1327   return result;
1328 fail:
1329   Curl_dyn_free(&host);
1330   free_urlhandle(u);
1331   return result;
1332 }
1333 
1334 /*
1335  * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1336  */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1337 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1338                                       unsigned int flags)
1339 {
1340   CURLUcode result;
1341   CURLU tmpurl;
1342   memset(&tmpurl, 0, sizeof(tmpurl));
1343   result = parseurl(url, &tmpurl, flags);
1344   if(!result) {
1345     free_urlhandle(u);
1346     *u = tmpurl;
1347   }
1348   return result;
1349 }
1350 
1351 /*
1352  */
curl_url(void)1353 CURLU *curl_url(void)
1354 {
1355   return calloc(sizeof(struct Curl_URL), 1);
1356 }
1357 
curl_url_cleanup(CURLU * u)1358 void curl_url_cleanup(CURLU *u)
1359 {
1360   if(u) {
1361     free_urlhandle(u);
1362     free(u);
1363   }
1364 }
1365 
1366 #define DUP(dest, src, name)                    \
1367   do {                                          \
1368     if(src->name) {                             \
1369       dest->name = strdup(src->name);           \
1370       if(!dest->name)                           \
1371         goto fail;                              \
1372     }                                           \
1373   } while(0)
1374 
curl_url_dup(const CURLU * in)1375 CURLU *curl_url_dup(const CURLU *in)
1376 {
1377   struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1378   if(u) {
1379     DUP(u, in, scheme);
1380     DUP(u, in, user);
1381     DUP(u, in, password);
1382     DUP(u, in, options);
1383     DUP(u, in, host);
1384     DUP(u, in, port);
1385     DUP(u, in, path);
1386     DUP(u, in, query);
1387     DUP(u, in, fragment);
1388     DUP(u, in, zoneid);
1389     u->portnum = in->portnum;
1390   }
1391   return u;
1392 fail:
1393   curl_url_cleanup(u);
1394   return NULL;
1395 }
1396 
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1397 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1398                        char **part, unsigned int flags)
1399 {
1400   const char *ptr;
1401   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1402   char portbuf[7];
1403   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1404   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1405   bool punycode = FALSE;
1406   bool depunyfy = FALSE;
1407   bool plusdecode = FALSE;
1408   (void)flags;
1409   if(!u)
1410     return CURLUE_BAD_HANDLE;
1411   if(!part)
1412     return CURLUE_BAD_PARTPOINTER;
1413   *part = NULL;
1414 
1415   switch(what) {
1416   case CURLUPART_SCHEME:
1417     ptr = u->scheme;
1418     ifmissing = CURLUE_NO_SCHEME;
1419     urldecode = FALSE; /* never for schemes */
1420     break;
1421   case CURLUPART_USER:
1422     ptr = u->user;
1423     ifmissing = CURLUE_NO_USER;
1424     break;
1425   case CURLUPART_PASSWORD:
1426     ptr = u->password;
1427     ifmissing = CURLUE_NO_PASSWORD;
1428     break;
1429   case CURLUPART_OPTIONS:
1430     ptr = u->options;
1431     ifmissing = CURLUE_NO_OPTIONS;
1432     break;
1433   case CURLUPART_HOST:
1434     ptr = u->host;
1435     ifmissing = CURLUE_NO_HOST;
1436     punycode = (flags & CURLU_PUNYCODE)?1:0;
1437     depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1438     break;
1439   case CURLUPART_ZONEID:
1440     ptr = u->zoneid;
1441     ifmissing = CURLUE_NO_ZONEID;
1442     break;
1443   case CURLUPART_PORT:
1444     ptr = u->port;
1445     ifmissing = CURLUE_NO_PORT;
1446     urldecode = FALSE; /* never for port */
1447     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1448       /* there's no stored port number, but asked to deliver
1449          a default one for the scheme */
1450       const struct Curl_handler *h =
1451         Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1452       if(h) {
1453         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1454         ptr = portbuf;
1455       }
1456     }
1457     else if(ptr && u->scheme) {
1458       /* there is a stored port number, but ask to inhibit if
1459          it matches the default one for the scheme */
1460       const struct Curl_handler *h =
1461         Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1462       if(h && (h->defport == u->portnum) &&
1463          (flags & CURLU_NO_DEFAULT_PORT))
1464         ptr = NULL;
1465     }
1466     break;
1467   case CURLUPART_PATH:
1468     ptr = u->path;
1469     if(!ptr)
1470       ptr = "/";
1471     break;
1472   case CURLUPART_QUERY:
1473     ptr = u->query;
1474     ifmissing = CURLUE_NO_QUERY;
1475     plusdecode = urldecode;
1476     break;
1477   case CURLUPART_FRAGMENT:
1478     ptr = u->fragment;
1479     ifmissing = CURLUE_NO_FRAGMENT;
1480     break;
1481   case CURLUPART_URL: {
1482     char *url;
1483     char *scheme;
1484     char *options = u->options;
1485     char *port = u->port;
1486     char *allochost = NULL;
1487     punycode = (flags & CURLU_PUNYCODE)?1:0;
1488     depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1489     if(u->scheme && strcasecompare("file", u->scheme)) {
1490       url = aprintf("file://%s%s%s",
1491                     u->path,
1492                     u->fragment? "#": "",
1493                     u->fragment? u->fragment : "");
1494     }
1495     else if(!u->host)
1496       return CURLUE_NO_HOST;
1497     else {
1498       const struct Curl_handler *h = NULL;
1499       if(u->scheme)
1500         scheme = u->scheme;
1501       else if(flags & CURLU_DEFAULT_SCHEME)
1502         scheme = (char *) DEFAULT_SCHEME;
1503       else
1504         return CURLUE_NO_SCHEME;
1505 
1506       h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1507       if(!port && (flags & CURLU_DEFAULT_PORT)) {
1508         /* there's no stored port number, but asked to deliver
1509            a default one for the scheme */
1510         if(h) {
1511           msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1512           port = portbuf;
1513         }
1514       }
1515       else if(port) {
1516         /* there is a stored port number, but asked to inhibit if it matches
1517            the default one for the scheme */
1518         if(h && (h->defport == u->portnum) &&
1519            (flags & CURLU_NO_DEFAULT_PORT))
1520           port = NULL;
1521       }
1522 
1523       if(h && !(h->flags & PROTOPT_URLOPTIONS))
1524         options = NULL;
1525 
1526       if(u->host[0] == '[') {
1527         if(u->zoneid) {
1528           /* make it '[ host %25 zoneid ]' */
1529           struct dynbuf enc;
1530           size_t hostlen = strlen(u->host);
1531           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1532           if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1533                            u->zoneid))
1534             return CURLUE_OUT_OF_MEMORY;
1535           allochost = Curl_dyn_ptr(&enc);
1536         }
1537       }
1538       else if(urlencode) {
1539         allochost = curl_easy_escape(NULL, u->host, 0);
1540         if(!allochost)
1541           return CURLUE_OUT_OF_MEMORY;
1542       }
1543       else if(punycode) {
1544         if(!Curl_is_ASCII_name(u->host)) {
1545 #ifndef USE_IDN
1546           return CURLUE_LACKS_IDN;
1547 #else
1548           CURLcode result = Curl_idn_decode(u->host, &allochost);
1549           if(result)
1550             return (result == CURLE_OUT_OF_MEMORY) ?
1551               CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1552 #endif
1553         }
1554       }
1555       else if(depunyfy) {
1556         if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1557 #ifndef USE_IDN
1558           return CURLUE_LACKS_IDN;
1559 #else
1560           CURLcode result = Curl_idn_encode(u->host, &allochost);
1561           if(result)
1562             /* this is the most likely error */
1563             return (result == CURLE_OUT_OF_MEMORY) ?
1564               CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1565 #endif
1566         }
1567       }
1568 
1569       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1570                     scheme,
1571                     u->user ? u->user : "",
1572                     u->password ? ":": "",
1573                     u->password ? u->password : "",
1574                     options ? ";" : "",
1575                     options ? options : "",
1576                     (u->user || u->password || options) ? "@": "",
1577                     allochost ? allochost : u->host,
1578                     port ? ":": "",
1579                     port ? port : "",
1580                     u->path ? u->path : "/",
1581                     (u->query && u->query[0]) ? "?": "",
1582                     (u->query && u->query[0]) ? u->query : "",
1583                     u->fragment? "#": "",
1584                     u->fragment? u->fragment : "");
1585       free(allochost);
1586     }
1587     if(!url)
1588       return CURLUE_OUT_OF_MEMORY;
1589     *part = url;
1590     return CURLUE_OK;
1591   }
1592   default:
1593     ptr = NULL;
1594     break;
1595   }
1596   if(ptr) {
1597     size_t partlen = strlen(ptr);
1598     size_t i = 0;
1599     *part = Curl_memdup(ptr, partlen + 1);
1600     if(!*part)
1601       return CURLUE_OUT_OF_MEMORY;
1602     if(plusdecode) {
1603       /* convert + to space */
1604       char *plus = *part;
1605       for(i = 0; i < partlen; ++plus, i++) {
1606         if(*plus == '+')
1607           *plus = ' ';
1608       }
1609     }
1610     if(urldecode) {
1611       char *decoded;
1612       size_t dlen;
1613       /* this unconditional rejection of control bytes is documented
1614          API behavior */
1615       CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1616       free(*part);
1617       if(res) {
1618         *part = NULL;
1619         return CURLUE_URLDECODE;
1620       }
1621       *part = decoded;
1622       partlen = dlen;
1623     }
1624     if(urlencode) {
1625       struct dynbuf enc;
1626       Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1627       if(urlencode_str(&enc, *part, partlen, TRUE,
1628                        what == CURLUPART_QUERY))
1629         return CURLUE_OUT_OF_MEMORY;
1630       free(*part);
1631       *part = Curl_dyn_ptr(&enc);
1632     }
1633     else if(punycode) {
1634       if(!Curl_is_ASCII_name(u->host)) {
1635 #ifndef USE_IDN
1636         return CURLUE_LACKS_IDN;
1637 #else
1638         char *allochost;
1639         CURLcode result = Curl_idn_decode(*part, &allochost);
1640         if(result)
1641           return (result == CURLE_OUT_OF_MEMORY) ?
1642             CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1643         free(*part);
1644         *part = allochost;
1645 #endif
1646       }
1647     }
1648     else if(depunyfy) {
1649       if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
1650 #ifndef USE_IDN
1651         return CURLUE_LACKS_IDN;
1652 #else
1653         char *allochost;
1654         CURLcode result = Curl_idn_encode(*part, &allochost);
1655         if(result)
1656           return (result == CURLE_OUT_OF_MEMORY) ?
1657             CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1658         free(*part);
1659         *part = allochost;
1660 #endif
1661       }
1662     }
1663 
1664     return CURLUE_OK;
1665   }
1666   else
1667     return ifmissing;
1668 }
1669 
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1670 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1671                        const char *part, unsigned int flags)
1672 {
1673   char **storep = NULL;
1674   long port = 0;
1675   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1676   bool plusencode = FALSE;
1677   bool urlskipslash = FALSE;
1678   bool leadingslash = FALSE;
1679   bool appendquery = FALSE;
1680   bool equalsencode = FALSE;
1681   size_t nalloc;
1682 
1683   if(!u)
1684     return CURLUE_BAD_HANDLE;
1685   if(!part) {
1686     /* setting a part to NULL clears it */
1687     switch(what) {
1688     case CURLUPART_URL:
1689       break;
1690     case CURLUPART_SCHEME:
1691       storep = &u->scheme;
1692       break;
1693     case CURLUPART_USER:
1694       storep = &u->user;
1695       break;
1696     case CURLUPART_PASSWORD:
1697       storep = &u->password;
1698       break;
1699     case CURLUPART_OPTIONS:
1700       storep = &u->options;
1701       break;
1702     case CURLUPART_HOST:
1703       storep = &u->host;
1704       break;
1705     case CURLUPART_ZONEID:
1706       storep = &u->zoneid;
1707       break;
1708     case CURLUPART_PORT:
1709       u->portnum = 0;
1710       storep = &u->port;
1711       break;
1712     case CURLUPART_PATH:
1713       storep = &u->path;
1714       break;
1715     case CURLUPART_QUERY:
1716       storep = &u->query;
1717       break;
1718     case CURLUPART_FRAGMENT:
1719       storep = &u->fragment;
1720       break;
1721     default:
1722       return CURLUE_UNKNOWN_PART;
1723     }
1724     if(storep && *storep) {
1725       Curl_safefree(*storep);
1726     }
1727     else if(!storep) {
1728       free_urlhandle(u);
1729       memset(u, 0, sizeof(struct Curl_URL));
1730     }
1731     return CURLUE_OK;
1732   }
1733 
1734   nalloc = strlen(part);
1735   if(nalloc > CURL_MAX_INPUT_LENGTH)
1736     /* excessive input length */
1737     return CURLUE_MALFORMED_INPUT;
1738 
1739   switch(what) {
1740   case CURLUPART_SCHEME: {
1741     size_t plen = strlen(part);
1742     const char *s = part;
1743     if((plen > MAX_SCHEME_LEN) || (plen < 1))
1744       /* too long or too short */
1745       return CURLUE_BAD_SCHEME;
1746     if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1747        /* verify that it is a fine scheme */
1748        !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1749       return CURLUE_UNSUPPORTED_SCHEME;
1750     storep = &u->scheme;
1751     urlencode = FALSE; /* never */
1752     if(ISALPHA(*s)) {
1753       /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1754       while(--plen) {
1755         if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1756           s++; /* fine */
1757         else
1758           return CURLUE_BAD_SCHEME;
1759       }
1760     }
1761     else
1762       return CURLUE_BAD_SCHEME;
1763     break;
1764   }
1765   case CURLUPART_USER:
1766     storep = &u->user;
1767     break;
1768   case CURLUPART_PASSWORD:
1769     storep = &u->password;
1770     break;
1771   case CURLUPART_OPTIONS:
1772     storep = &u->options;
1773     break;
1774   case CURLUPART_HOST:
1775     storep = &u->host;
1776     Curl_safefree(u->zoneid);
1777     break;
1778   case CURLUPART_ZONEID:
1779     storep = &u->zoneid;
1780     break;
1781   case CURLUPART_PORT:
1782   {
1783     char *endp;
1784     urlencode = FALSE; /* never */
1785     port = strtol(part, &endp, 10);  /* Port number must be decimal */
1786     if((port <= 0) || (port > 0xffff))
1787       return CURLUE_BAD_PORT_NUMBER;
1788     if(*endp)
1789       /* weirdly provided number, not good! */
1790       return CURLUE_BAD_PORT_NUMBER;
1791     storep = &u->port;
1792   }
1793   break;
1794   case CURLUPART_PATH:
1795     urlskipslash = TRUE;
1796     leadingslash = TRUE; /* enforce */
1797     storep = &u->path;
1798     break;
1799   case CURLUPART_QUERY:
1800     plusencode = urlencode;
1801     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1802     equalsencode = appendquery;
1803     storep = &u->query;
1804     break;
1805   case CURLUPART_FRAGMENT:
1806     storep = &u->fragment;
1807     break;
1808   case CURLUPART_URL: {
1809     /*
1810      * Allow a new URL to replace the existing (if any) contents.
1811      *
1812      * If the existing contents is enough for a URL, allow a relative URL to
1813      * replace it.
1814      */
1815     CURLUcode result;
1816     char *oldurl;
1817     char *redired_url;
1818 
1819     if(!nalloc)
1820       /* a blank URL is not a valid URL */
1821       return CURLUE_MALFORMED_INPUT;
1822 
1823     /* if the new thing is absolute or the old one is not
1824      * (we could not get an absolute url in 'oldurl'),
1825      * then replace the existing with the new. */
1826     if(Curl_is_absolute_url(part, NULL, 0,
1827                             flags & (CURLU_GUESS_SCHEME|
1828                                      CURLU_DEFAULT_SCHEME))
1829        || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1830       return parseurl_and_replace(part, u, flags);
1831     }
1832 
1833     /* apply the relative part to create a new URL
1834      * and replace the existing one with it. */
1835     redired_url = concat_url(oldurl, part);
1836     free(oldurl);
1837     if(!redired_url)
1838       return CURLUE_OUT_OF_MEMORY;
1839 
1840     result = parseurl_and_replace(redired_url, u, flags);
1841     free(redired_url);
1842     return result;
1843   }
1844   default:
1845     return CURLUE_UNKNOWN_PART;
1846   }
1847   DEBUGASSERT(storep);
1848   {
1849     const char *newp;
1850     struct dynbuf enc;
1851     Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1852 
1853     if(leadingslash && (part[0] != '/')) {
1854       CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1855       if(result)
1856         return CURLUE_OUT_OF_MEMORY;
1857     }
1858     if(urlencode) {
1859       const unsigned char *i;
1860 
1861       for(i = (const unsigned char *)part; *i; i++) {
1862         CURLcode result;
1863         if((*i == ' ') && plusencode) {
1864           result = Curl_dyn_addn(&enc, "+", 1);
1865           if(result)
1866             return CURLUE_OUT_OF_MEMORY;
1867         }
1868         else if(ISUNRESERVED(*i) ||
1869                 ((*i == '/') && urlskipslash) ||
1870                 ((*i == '=') && equalsencode)) {
1871           if((*i == '=') && equalsencode)
1872             /* only skip the first equals sign */
1873             equalsencode = FALSE;
1874           result = Curl_dyn_addn(&enc, i, 1);
1875           if(result)
1876             return CURLUE_OUT_OF_MEMORY;
1877         }
1878         else {
1879           char out[3]={'%'};
1880           out[1] = hexdigits[*i>>4];
1881           out[2] = hexdigits[*i & 0xf];
1882           result = Curl_dyn_addn(&enc, out, 3);
1883           if(result)
1884             return CURLUE_OUT_OF_MEMORY;
1885         }
1886       }
1887     }
1888     else {
1889       char *p;
1890       CURLcode result = Curl_dyn_add(&enc, part);
1891       if(result)
1892         return CURLUE_OUT_OF_MEMORY;
1893       p = Curl_dyn_ptr(&enc);
1894       while(*p) {
1895         /* make sure percent encoded are lower case */
1896         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1897            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1898           p[1] = Curl_raw_tolower(p[1]);
1899           p[2] = Curl_raw_tolower(p[2]);
1900           p += 3;
1901         }
1902         else
1903           p++;
1904       }
1905     }
1906     newp = Curl_dyn_ptr(&enc);
1907 
1908     if(appendquery) {
1909       /* Append the 'newp' string onto the old query. Add a '&' separator if
1910          none is present at the end of the existing query already */
1911 
1912       size_t querylen = u->query ? strlen(u->query) : 0;
1913       bool addamperand = querylen && (u->query[querylen -1] != '&');
1914       if(querylen) {
1915         struct dynbuf qbuf;
1916         Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1917 
1918         if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1919           goto nomem;
1920 
1921         if(addamperand) {
1922           if(Curl_dyn_addn(&qbuf, "&", 1))
1923             goto nomem;
1924         }
1925         if(Curl_dyn_add(&qbuf, newp))
1926           goto nomem;
1927         Curl_dyn_free(&enc);
1928         free(*storep);
1929         *storep = Curl_dyn_ptr(&qbuf);
1930         return CURLUE_OK;
1931 nomem:
1932         Curl_dyn_free(&enc);
1933         return CURLUE_OUT_OF_MEMORY;
1934       }
1935     }
1936 
1937     if(what == CURLUPART_HOST) {
1938       size_t n = strlen(newp);
1939       if(!n && (flags & CURLU_NO_AUTHORITY)) {
1940         /* Skip hostname check, it's allowed to be empty. */
1941       }
1942       else {
1943         if(!n || hostname_check(u, (char *)newp, n)) {
1944           Curl_dyn_free(&enc);
1945           return CURLUE_BAD_HOSTNAME;
1946         }
1947       }
1948     }
1949 
1950     free(*storep);
1951     *storep = (char *)newp;
1952   }
1953   /* set after the string, to make it not assigned if the allocation above
1954      fails */
1955   if(port)
1956     u->portnum = port;
1957   return CURLUE_OK;
1958 }
1959