• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * SPDX-License-Identifier: curl
22  *
23  ***************************************************************************/
24 
25 #include "curl_setup.h"
26 
27 #include "urldata.h"
28 #include "urlapi-int.h"
29 #include "strcase.h"
30 #include "url.h"
31 #include "escape.h"
32 #include "curl_ctype.h"
33 #include "inet_pton.h"
34 #include "inet_ntop.h"
35 #include "strdup.h"
36 #include "idn.h"
37 
38 /* The last 3 #include files should be in this order */
39 #include "curl_printf.h"
40 #include "curl_memory.h"
41 #include "memdebug.h"
42 
43   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
44 #define STARTS_WITH_DRIVE_PREFIX(str) \
45   ((('a' <= str[0] && str[0] <= 'z') || \
46     ('A' <= str[0] && str[0] <= 'Z')) && \
47    (str[1] == ':'))
48 
49   /* MSDOS/Windows style drive prefix, optionally with
50    * a '|' instead of ':', followed by a slash or NUL */
51 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
53     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54    ((str)[1] == ':' || (str)[1] == '|') && \
55    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
56 
57 /* scheme is not URL encoded, the longest libcurl supported ones are... */
58 #define MAX_SCHEME_LEN 40
59 
60 /*
61  * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62  * sure we have _some_ value for AF_INET6 without polluting our fake value
63  * everywhere.
64  */
65 #if !defined(ENABLE_IPV6) && !defined(AF_INET6)
66 #define AF_INET6 (AF_INET + 1)
67 #endif
68 
69 /* Internal representation of CURLU. Point to URL-encoded strings. */
70 struct Curl_URL {
71   char *scheme;
72   char *user;
73   char *password;
74   char *options; /* IMAP only? */
75   char *host;
76   char *zoneid; /* for numerical IPv6 addresses */
77   char *port;
78   char *path;
79   char *query;
80   char *fragment;
81   long portnum; /* the numerical version */
82 };
83 
84 #define DEFAULT_SCHEME "https"
85 
free_urlhandle(struct Curl_URL * u)86 static void free_urlhandle(struct Curl_URL *u)
87 {
88   free(u->scheme);
89   free(u->user);
90   free(u->password);
91   free(u->options);
92   free(u->host);
93   free(u->zoneid);
94   free(u->port);
95   free(u->path);
96   free(u->query);
97   free(u->fragment);
98 }
99 
100 /*
101  * Find the separator at the end of the host name, or the '?' in cases like
102  * http://www.url.com?id=2380
103  */
find_host_sep(const char * url)104 static const char *find_host_sep(const char *url)
105 {
106   const char *sep;
107   const char *query;
108 
109   /* Find the start of the hostname */
110   sep = strstr(url, "//");
111   if(!sep)
112     sep = url;
113   else
114     sep += 2;
115 
116   query = strchr(sep, '?');
117   sep = strchr(sep, '/');
118 
119   if(!sep)
120     sep = url + strlen(url);
121 
122   if(!query)
123     query = url + strlen(url);
124 
125   return sep < query ? sep : query;
126 }
127 
128 /*
129  * Decide whether a character in a URL must be escaped.
130  */
131 #define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
132 
133 static const char hexdigits[] = "0123456789abcdef";
134 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
135  * spaces in the source URL accordingly.
136  *
137  * URL encoding should be skipped for host names, otherwise IDN resolution
138  * will fail.
139  */
urlencode_str(struct dynbuf * o,const char * url,size_t len,bool relative,bool query)140 static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
141                                size_t len, bool relative,
142                                bool query)
143 {
144   /* we must add this with whitespace-replacing */
145   bool left = !query;
146   const unsigned char *iptr;
147   const unsigned char *host_sep = (const unsigned char *) url;
148 
149   if(!relative)
150     host_sep = (const unsigned char *) find_host_sep(url);
151 
152   for(iptr = (unsigned char *)url;    /* read from here */
153       len; iptr++, len--) {
154 
155     if(iptr < host_sep) {
156       if(Curl_dyn_addn(o, iptr, 1))
157         return CURLUE_OUT_OF_MEMORY;
158       continue;
159     }
160 
161     if(*iptr == ' ') {
162       if(left) {
163         if(Curl_dyn_addn(o, "%20", 3))
164           return CURLUE_OUT_OF_MEMORY;
165       }
166       else {
167         if(Curl_dyn_addn(o, "+", 1))
168           return CURLUE_OUT_OF_MEMORY;
169       }
170       continue;
171     }
172 
173     if(*iptr == '?')
174       left = FALSE;
175 
176     if(urlchar_needs_escaping(*iptr)) {
177       char out[3]={'%'};
178       out[1] = hexdigits[*iptr>>4];
179       out[2] = hexdigits[*iptr & 0xf];
180       if(Curl_dyn_addn(o, out, 3))
181         return CURLUE_OUT_OF_MEMORY;
182     }
183     else {
184       if(Curl_dyn_addn(o, iptr, 1))
185         return CURLUE_OUT_OF_MEMORY;
186     }
187   }
188 
189   return CURLUE_OK;
190 }
191 
192 /*
193  * Returns the length of the scheme if the given URL is absolute (as opposed
194  * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
195  * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
196  *
197  * If 'guess_scheme' is TRUE, it means the URL might be provided without
198  * scheme.
199  */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen,bool guess_scheme)200 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
201                             bool guess_scheme)
202 {
203   int i;
204   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
205   (void)buflen; /* only used in debug-builds */
206   if(buf)
207     buf[0] = 0; /* always leave a defined value in buf */
208 #ifdef WIN32
209   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
210     return 0;
211 #endif
212   for(i = 0; i < MAX_SCHEME_LEN; ++i) {
213     char s = url[i];
214     if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
215       /* RFC 3986 3.1 explains:
216         scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
217       */
218     }
219     else {
220       break;
221     }
222   }
223   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
224     /* If this does not guess scheme, the scheme always ends with the colon so
225        that this also detects data: URLs etc. In guessing mode, data: could
226        be the host name "data" with a specified port number. */
227 
228     /* the length of the scheme is the name part only */
229     size_t len = i;
230     if(buf) {
231       buf[i] = 0;
232       while(i--) {
233         buf[i] = Curl_raw_tolower(url[i]);
234       }
235     }
236     return len;
237   }
238   return 0;
239 }
240 
241 /*
242  * Concatenate a relative URL to a base URL making it absolute.
243  * URL-encodes any spaces.
244  * The returned pointer must be freed by the caller unless NULL
245  * (returns NULL on out of memory).
246  *
247  * Note that this function destroys the 'base' string.
248  */
concat_url(char * base,const char * relurl)249 static char *concat_url(char *base, const char *relurl)
250 {
251   /***
252    TRY to append this new path to the old URL
253    to the right of the host part. Oh crap, this is doomed to cause
254    problems in the future...
255   */
256   struct dynbuf newest;
257   char *protsep;
258   char *pathsep;
259   bool host_changed = FALSE;
260   const char *useurl = relurl;
261 
262   /* protsep points to the start of the host name */
263   protsep = strstr(base, "//");
264   if(!protsep)
265     protsep = base;
266   else
267     protsep += 2; /* pass the slashes */
268 
269   if('/' != relurl[0]) {
270     int level = 0;
271 
272     /* First we need to find out if there's a ?-letter in the URL,
273        and cut it and the right-side of that off */
274     pathsep = strchr(protsep, '?');
275     if(pathsep)
276       *pathsep = 0;
277 
278     /* we have a relative path to append to the last slash if there's one
279        available, or if the new URL is just a query string (starts with a
280        '?')  we append the new one at the end of the entire currently worked
281        out URL */
282     if(useurl[0] != '?') {
283       pathsep = strrchr(protsep, '/');
284       if(pathsep)
285         *pathsep = 0;
286     }
287 
288     /* Check if there's any slash after the host name, and if so, remember
289        that position instead */
290     pathsep = strchr(protsep, '/');
291     if(pathsep)
292       protsep = pathsep + 1;
293     else
294       protsep = NULL;
295 
296     /* now deal with one "./" or any amount of "../" in the newurl
297        and act accordingly */
298 
299     if((useurl[0] == '.') && (useurl[1] == '/'))
300       useurl += 2; /* just skip the "./" */
301 
302     while((useurl[0] == '.') &&
303           (useurl[1] == '.') &&
304           (useurl[2] == '/')) {
305       level++;
306       useurl += 3; /* pass the "../" */
307     }
308 
309     if(protsep) {
310       while(level--) {
311         /* cut off one more level from the right of the original URL */
312         pathsep = strrchr(protsep, '/');
313         if(pathsep)
314           *pathsep = 0;
315         else {
316           *protsep = 0;
317           break;
318         }
319       }
320     }
321   }
322   else {
323     /* We got a new absolute path for this server */
324 
325     if(relurl[1] == '/') {
326       /* the new URL starts with //, just keep the protocol part from the
327          original one */
328       *protsep = 0;
329       useurl = &relurl[2]; /* we keep the slashes from the original, so we
330                               skip the new ones */
331       host_changed = TRUE;
332     }
333     else {
334       /* cut off the original URL from the first slash, or deal with URLs
335          without slash */
336       pathsep = strchr(protsep, '/');
337       if(pathsep) {
338         /* When people use badly formatted URLs, such as
339            "http://www.url.com?dir=/home/daniel" we must not use the first
340            slash, if there's a ?-letter before it! */
341         char *sep = strchr(protsep, '?');
342         if(sep && (sep < pathsep))
343           pathsep = sep;
344         *pathsep = 0;
345       }
346       else {
347         /* There was no slash. Now, since we might be operating on a badly
348            formatted URL, such as "http://www.url.com?id=2380" which doesn't
349            use a slash separator as it is supposed to, we need to check for a
350            ?-letter as well! */
351         pathsep = strchr(protsep, '?');
352         if(pathsep)
353           *pathsep = 0;
354       }
355     }
356   }
357 
358   Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
359 
360   /* copy over the root url part */
361   if(Curl_dyn_add(&newest, base))
362     return NULL;
363 
364   /* check if we need to append a slash */
365   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
366     ;
367   else {
368     if(Curl_dyn_addn(&newest, "/", 1))
369       return NULL;
370   }
371 
372   /* then append the new piece on the right side */
373   urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
374 
375   return Curl_dyn_ptr(&newest);
376 }
377 
378 /* scan for byte values < 31 or 127 */
junkscan(const char * part,unsigned int flags)379 static bool junkscan(const char *part, unsigned int flags)
380 {
381   if(part) {
382     static const char badbytes[]={
383       /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
384       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
385       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
386       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
387       0x7f, 0x00 /* null-terminate */
388     };
389     size_t n = strlen(part);
390     size_t nfine = strcspn(part, badbytes);
391     if(nfine != n)
392       /* since we don't know which part is scanned, return a generic error
393          code */
394       return TRUE;
395     if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
396       return TRUE;
397   }
398   return FALSE;
399 }
400 
401 /*
402  * parse_hostname_login()
403  *
404  * Parse the login details (user name, password and options) from the URL and
405  * strip them out of the host name
406  *
407  */
parse_hostname_login(struct Curl_URL * u,struct dynbuf * host,unsigned int flags)408 static CURLUcode parse_hostname_login(struct Curl_URL *u,
409                                       struct dynbuf *host,
410                                       unsigned int flags)
411 {
412   CURLUcode result = CURLUE_OK;
413   CURLcode ccode;
414   char *userp = NULL;
415   char *passwdp = NULL;
416   char *optionsp = NULL;
417   const struct Curl_handler *h = NULL;
418 
419   /* At this point, we assume all the other special cases have been taken
420    * care of, so the host is at most
421    *
422    *   [user[:password][;options]]@]hostname
423    *
424    * We need somewhere to put the embedded details, so do that first.
425    */
426 
427   char *login = Curl_dyn_ptr(host);
428   char *ptr;
429 
430   DEBUGASSERT(login);
431 
432   ptr = strchr(login, '@');
433   if(!ptr)
434     goto out;
435 
436   /* We will now try to extract the
437    * possible login information in a string like:
438    * ftp://user:password@ftp.my.site:8021/README */
439   ptr++;
440 
441   /* if this is a known scheme, get some details */
442   if(u->scheme)
443     h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
444 
445   /* We could use the login information in the URL so extract it. Only parse
446      options if the handler says we should. Note that 'h' might be NULL! */
447   ccode = Curl_parse_login_details(login, ptr - login - 1,
448                                    &userp, &passwdp,
449                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
450                                    &optionsp:NULL);
451   if(ccode) {
452     result = CURLUE_BAD_LOGIN;
453     goto out;
454   }
455 
456   if(userp) {
457     if(flags & CURLU_DISALLOW_USER) {
458       /* Option DISALLOW_USER is set and url contains username. */
459       result = CURLUE_USER_NOT_ALLOWED;
460       goto out;
461     }
462     if(junkscan(userp, flags)) {
463       result = CURLUE_BAD_USER;
464       goto out;
465     }
466     u->user = userp;
467   }
468 
469   if(passwdp) {
470     if(junkscan(passwdp, flags)) {
471       result = CURLUE_BAD_PASSWORD;
472       goto out;
473     }
474     u->password = passwdp;
475   }
476 
477   if(optionsp) {
478     if(junkscan(optionsp, flags)) {
479       result = CURLUE_BAD_LOGIN;
480       goto out;
481     }
482     u->options = optionsp;
483   }
484 
485   /* move the name to the start of the host buffer */
486   if(Curl_dyn_tail(host, strlen(ptr)))
487     return CURLUE_OUT_OF_MEMORY;
488 
489   return CURLUE_OK;
490   out:
491 
492   free(userp);
493   free(passwdp);
494   free(optionsp);
495   u->user = NULL;
496   u->password = NULL;
497   u->options = NULL;
498 
499   return result;
500 }
501 
Curl_parse_port(struct Curl_URL * u,struct dynbuf * host,bool has_scheme)502 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
503                                    bool has_scheme)
504 {
505   char *portptr;
506   char *hostname = Curl_dyn_ptr(host);
507   /*
508    * Find the end of an IPv6 address, either on the ']' ending bracket or
509    * a percent-encoded zone index.
510    */
511   if(hostname[0] == '[') {
512     portptr = strchr(hostname, ']');
513     if(!portptr)
514       return CURLUE_BAD_IPV6;
515     portptr++;
516     /* this is a RFC2732-style specified IP-address */
517     if(*portptr) {
518       if(*portptr != ':')
519         return CURLUE_BAD_PORT_NUMBER;
520     }
521     else
522       portptr = NULL;
523   }
524   else
525     portptr = strchr(hostname, ':');
526 
527   if(portptr) {
528     char *rest;
529     long port;
530     char portbuf[7];
531     size_t keep = portptr - hostname;
532 
533     /* Browser behavior adaptation. If there's a colon with no digits after,
534        just cut off the name there which makes us ignore the colon and just
535        use the default port. Firefox, Chrome and Safari all do that.
536 
537        Don't do it if the URL has no scheme, to make something that looks like
538        a scheme not work!
539     */
540     Curl_dyn_setlen(host, keep);
541     portptr++;
542     if(!*portptr)
543       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
544 
545     if(!ISDIGIT(*portptr))
546       return CURLUE_BAD_PORT_NUMBER;
547 
548     port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
549 
550     if(port > 0xffff)
551       return CURLUE_BAD_PORT_NUMBER;
552 
553     if(rest[0])
554       return CURLUE_BAD_PORT_NUMBER;
555 
556     *rest = 0;
557     /* generate a new port number string to get rid of leading zeroes etc */
558     msnprintf(portbuf, sizeof(portbuf), "%ld", port);
559     u->portnum = port;
560     u->port = strdup(portbuf);
561     if(!u->port)
562       return CURLUE_OUT_OF_MEMORY;
563   }
564 
565   return CURLUE_OK;
566 }
567 
hostname_check(struct Curl_URL * u,char * hostname,size_t hlen)568 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
569                                 size_t hlen) /* length of hostname */
570 {
571   size_t len;
572   DEBUGASSERT(hostname);
573 
574   if(!hostname[0])
575     return CURLUE_NO_HOST;
576   else if(hostname[0] == '[') {
577     const char *l = "0123456789abcdefABCDEF:.";
578     if(hlen < 4) /* '[::]' is the shortest possible valid string */
579       return CURLUE_BAD_IPV6;
580     hostname++;
581     hlen -= 2;
582 
583     /* only valid IPv6 letters are ok */
584     len = strspn(hostname, l);
585 
586     if(hlen != len) {
587       hlen = len;
588       if(hostname[len] == '%') {
589         /* this could now be '%[zone id]' */
590         char zoneid[16];
591         int i = 0;
592         char *h = &hostname[len + 1];
593         /* pass '25' if present and is a url encoded percent sign */
594         if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595           h += 2;
596         while(*h && (*h != ']') && (i < 15))
597           zoneid[i++] = *h++;
598         if(!i || (']' != *h))
599           return CURLUE_BAD_IPV6;
600         zoneid[i] = 0;
601         u->zoneid = strdup(zoneid);
602         if(!u->zoneid)
603           return CURLUE_OUT_OF_MEMORY;
604         hostname[len] = ']'; /* insert end bracket */
605         hostname[len + 1] = 0; /* terminate the hostname */
606       }
607       else
608         return CURLUE_BAD_IPV6;
609       /* hostname is fine */
610     }
611 
612     /* Check the IPv6 address. */
613     {
614       char dest[16]; /* fits a binary IPv6 address */
615       char norm[MAX_IPADR_LEN];
616       hostname[hlen] = 0; /* end the address there */
617       if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618         return CURLUE_BAD_IPV6;
619 
620       /* check if it can be done shorter */
621       if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622          (strlen(norm) < hlen)) {
623         strcpy(hostname, norm);
624         hlen = strlen(norm);
625         hostname[hlen + 1] = 0;
626       }
627       hostname[hlen] = ']'; /* restore ending bracket */
628     }
629   }
630   else {
631     /* letters from the second string are not ok */
632     len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
633     if(hlen != len)
634       /* hostname with bad content */
635       return CURLUE_BAD_HOSTNAME;
636   }
637   return CURLUE_OK;
638 }
639 
640 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
641 
642 /*
643  * Handle partial IPv4 numerical addresses and different bases, like
644  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
645  *
646  * If the given input string is syntactically wrong or any part for example is
647  * too big, this function returns FALSE and doesn't create any output.
648  *
649  * Output the "normalized" version of that input string in plain quad decimal
650  * integers and return TRUE.
651  */
ipv4_normalize(const char * hostname,char * outp,size_t olen)652 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
653 {
654   bool done = FALSE;
655   int n = 0;
656   const char *c = hostname;
657   unsigned long parts[4] = {0, 0, 0, 0};
658 
659   while(!done) {
660     char *endp;
661     unsigned long l;
662     if((*c < '0') || (*c > '9'))
663       /* most importantly this doesn't allow a leading plus or minus */
664       return FALSE;
665     l = strtoul(c, &endp, 0);
666 
667     /* overflow or nothing parsed at all */
668     if(((l == ULONG_MAX) && (errno == ERANGE)) ||  (endp == c))
669       return FALSE;
670 
671 #if SIZEOF_LONG > 4
672     /* a value larger than 32 bits */
673     if(l > UINT_MAX)
674       return FALSE;
675 #endif
676 
677     parts[n] = l;
678     c = endp;
679 
680     switch (*c) {
681     case '.' :
682       if(n == 3)
683         return FALSE;
684       n++;
685       c++;
686       break;
687 
688     case '\0':
689       done = TRUE;
690       break;
691 
692     default:
693       return FALSE;
694     }
695   }
696 
697   /* this is deemed a valid IPv4 numerical address */
698 
699   switch(n) {
700   case 0: /* a -- 32 bits */
701     msnprintf(outp, olen, "%u.%u.%u.%u",
702               parts[0] >> 24, (parts[0] >> 16) & 0xff,
703               (parts[0] >> 8) & 0xff, parts[0] & 0xff);
704     break;
705   case 1: /* a.b -- 8.24 bits */
706     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
707       return FALSE;
708     msnprintf(outp, olen, "%u.%u.%u.%u",
709               parts[0], (parts[1] >> 16) & 0xff,
710               (parts[1] >> 8) & 0xff, parts[1] & 0xff);
711     break;
712   case 2: /* a.b.c -- 8.8.16 bits */
713     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
714       return FALSE;
715     msnprintf(outp, olen, "%u.%u.%u.%u",
716               parts[0], parts[1], (parts[2] >> 8) & 0xff,
717               parts[2] & 0xff);
718     break;
719   case 3: /* a.b.c.d -- 8.8.8.8 bits */
720     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
721        (parts[3] > 0xff))
722       return FALSE;
723     msnprintf(outp, olen, "%u.%u.%u.%u",
724               parts[0], parts[1], parts[2], parts[3]);
725     break;
726   }
727   return TRUE;
728 }
729 
730 /* if necessary, replace the host content with a URL decoded version */
decode_host(struct dynbuf * host)731 static CURLUcode decode_host(struct dynbuf *host)
732 {
733   char *per = NULL;
734   const char *hostname = Curl_dyn_ptr(host);
735   if(hostname[0] == '[')
736     /* only decode if not an ipv6 numerical */
737     return CURLUE_OK;
738   per = strchr(hostname, '%');
739   if(!per)
740     /* nothing to decode */
741     return CURLUE_OK;
742   else {
743     /* encoded */
744     size_t dlen;
745     char *decoded;
746     CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
747                                      REJECT_CTRL);
748     if(result)
749       return CURLUE_BAD_HOSTNAME;
750     Curl_dyn_reset(host);
751     result = Curl_dyn_addn(host, decoded, dlen);
752     free(decoded);
753     if(result)
754       return CURLUE_OUT_OF_MEMORY;
755   }
756 
757   return CURLUE_OK;
758 }
759 
760 /*
761  * "Remove Dot Segments"
762  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
763  */
764 
765 /*
766  * dedotdotify()
767  * @unittest: 1395
768  *
769  * This function gets a null-terminated path with dot and dotdot sequences
770  * passed in and strips them off according to the rules in RFC 3986 section
771  * 5.2.4.
772  *
773  * The function handles a query part ('?' + stuff) appended but it expects
774  * that fragments ('#' + stuff) have already been cut off.
775  *
776  * RETURNS
777  *
778  * Zero for success and 'out' set to an allocated dedotdotified string.
779  */
780 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
dedotdotify(const char * input,size_t clen,char ** outp)781 UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
782 {
783   char *outptr;
784   const char *orginput = input;
785   char *queryp;
786   char *out;
787 
788   *outp = NULL;
789   /* the path always starts with a slash, and a slash has not dot */
790   if((clen < 2) || !memchr(input, '.', clen))
791     return 0;
792 
793   out = malloc(clen + 1);
794   if(!out)
795     return 1; /* out of memory */
796 
797   *out = 0; /* null-terminates, for inputs like "./" */
798   outptr = out;
799 
800   /*
801    * To handle query-parts properly, we must find it and remove it during the
802    * dotdot-operation and then append it again at the end to the output
803    * string.
804    */
805   queryp = strchr(input, '?');
806 
807   do {
808     bool dotdot = TRUE;
809     if(*input == '.') {
810       /*  A.  If the input buffer begins with a prefix of "../" or "./", then
811           remove that prefix from the input buffer; otherwise, */
812 
813       if(!strncmp("./", input, 2)) {
814         input += 2;
815         clen -= 2;
816       }
817       else if(!strncmp("../", input, 3)) {
818         input += 3;
819         clen -= 3;
820       }
821       /*  D.  if the input buffer consists only of "." or "..", then remove
822           that from the input buffer; otherwise, */
823 
824       else if(!strcmp(".", input) || !strcmp("..", input) ||
825               !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
826         *out = 0;
827         break;
828       }
829       else
830         dotdot = FALSE;
831     }
832     else if(*input == '/') {
833       /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
834           "."  is a complete path segment, then replace that prefix with "/" in
835           the input buffer; otherwise, */
836       if(!strncmp("/./", input, 3)) {
837         input += 2;
838         clen -= 2;
839       }
840       else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
841         *outptr++ = '/';
842         *outptr = 0;
843         break;
844       }
845 
846       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
847           where ".." is a complete path segment, then replace that prefix with
848           "/" in the input buffer and remove the last segment and its
849           preceding "/" (if any) from the output buffer; otherwise, */
850 
851       else if(!strncmp("/../", input, 4)) {
852         input += 3;
853         clen -= 3;
854         /* remove the last segment from the output buffer */
855         while(outptr > out) {
856           outptr--;
857           if(*outptr == '/')
858             break;
859         }
860         *outptr = 0; /* null-terminate where it stops */
861       }
862       else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
863         /* remove the last segment from the output buffer */
864         while(outptr > out) {
865           outptr--;
866           if(*outptr == '/')
867             break;
868         }
869         *outptr++ = '/';
870         *outptr = 0; /* null-terminate where it stops */
871         break;
872       }
873       else
874         dotdot = FALSE;
875     }
876     else
877       dotdot = FALSE;
878 
879     if(!dotdot) {
880       /*  E.  move the first path segment in the input buffer to the end of
881           the output buffer, including the initial "/" character (if any) and
882           any subsequent characters up to, but not including, the next "/"
883           character or the end of the input buffer. */
884 
885       do {
886         *outptr++ = *input++;
887         clen--;
888       } while(*input && (*input != '/') && (*input != '?'));
889       *outptr = 0;
890     }
891 
892     /* continue until end of input string OR, if there is a terminating
893        query part, stop there */
894   } while(*input && (!queryp || (input < queryp)));
895 
896   if(queryp) {
897     size_t qlen;
898     /* There was a query part, append that to the output. */
899     size_t oindex = queryp - orginput;
900     qlen = strlen(&orginput[oindex]);
901     memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
902   }
903 
904   *outp = out;
905   return 0; /* success */
906 }
907 
parseurl(const char * url,CURLU * u,unsigned int flags)908 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
909 {
910   const char *path;
911   size_t pathlen;
912   bool uncpath = FALSE;
913   char *query = NULL;
914   char *fragment = NULL;
915   char schemebuf[MAX_SCHEME_LEN + 1];
916   const char *schemep = NULL;
917   size_t schemelen = 0;
918   size_t urllen;
919   CURLUcode result = CURLUE_OK;
920   size_t fraglen = 0;
921   struct dynbuf host;
922 
923   DEBUGASSERT(url);
924 
925   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
926 
927   /*************************************************************
928    * Parse the URL.
929    ************************************************************/
930   /* allocate scratch area */
931   urllen = strlen(url);
932   if(urllen > CURL_MAX_INPUT_LENGTH) {
933     /* excessive input length */
934     result = CURLUE_MALFORMED_INPUT;
935     goto fail;
936   }
937 
938   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
939                                    flags & (CURLU_GUESS_SCHEME|
940                                             CURLU_DEFAULT_SCHEME));
941 
942   /* handle the file: scheme */
943   if(schemelen && !strcmp(schemebuf, "file")) {
944     if(urllen <= 6) {
945       /* file:/ is not enough to actually be a complete file: URL */
946       result = CURLUE_BAD_FILE_URL;
947       goto fail;
948     }
949 
950     /* path has been allocated large enough to hold this */
951     path = (char *)&url[5];
952 
953     schemep = u->scheme = strdup("file");
954     if(!u->scheme) {
955       result = CURLUE_OUT_OF_MEMORY;
956       goto fail;
957     }
958 
959     /* Extra handling URLs with an authority component (i.e. that start with
960      * "file://")
961      *
962      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
963      * RFC 8089, but not the (current) WHAT-WG URL spec.
964      */
965     if(path[0] == '/' && path[1] == '/') {
966       /* swallow the two slashes */
967       const char *ptr = &path[2];
968 
969       /*
970        * According to RFC 8089, a file: URL can be reliably dereferenced if:
971        *
972        *  o it has no/blank hostname, or
973        *
974        *  o the hostname matches "localhost" (case-insensitively), or
975        *
976        *  o the hostname is a FQDN that resolves to this machine, or
977        *
978        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
979        *    Appendix E.3).
980        *
981        * For brevity, we only consider URLs with empty, "localhost", or
982        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
983        *
984        * Additionally, there is an exception for URLs with a Windows drive
985        * letter in the authority (which was accidentally omitted from RFC 8089
986        * Appendix E, but believe me, it was meant to be there. --MK)
987        */
988       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
989         /* the URL includes a host name, it must match "localhost" or
990            "127.0.0.1" to be valid */
991         if(checkprefix("localhost/", ptr) ||
992            checkprefix("127.0.0.1/", ptr)) {
993           ptr += 9; /* now points to the slash after the host */
994         }
995         else {
996 #if defined(WIN32)
997           size_t len;
998 
999           /* the host name, NetBIOS computer name, can not contain disallowed
1000              chars, and the delimiting slash character must be appended to the
1001              host name */
1002           path = strpbrk(ptr, "/\\:*?\"<>|");
1003           if(!path || *path != '/') {
1004             result = CURLUE_BAD_FILE_URL;
1005             goto fail;
1006           }
1007 
1008           len = path - ptr;
1009           if(len) {
1010             if(Curl_dyn_addn(&host, ptr, len)) {
1011               result = CURLUE_OUT_OF_MEMORY;
1012               goto fail;
1013             }
1014             uncpath = TRUE;
1015           }
1016 
1017           ptr -= 2; /* now points to the // before the host in UNC */
1018 #else
1019           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1020              none */
1021           result = CURLUE_BAD_FILE_URL;
1022           goto fail;
1023 #endif
1024         }
1025       }
1026 
1027       path = ptr;
1028     }
1029 
1030     if(!uncpath)
1031       /* no host for file: URLs by default */
1032       Curl_dyn_reset(&host);
1033 
1034 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1035     /* Don't allow Windows drive letters when not in Windows.
1036      * This catches both "file:/c:" and "file:c:" */
1037     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1038        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1039       /* File drive letters are only accepted in MSDOS/Windows */
1040       result = CURLUE_BAD_FILE_URL;
1041       goto fail;
1042     }
1043 #else
1044     /* If the path starts with a slash and a drive letter, ditch the slash */
1045     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1046       /* This cannot be done with strcpy, as the memory chunks overlap! */
1047       path++;
1048     }
1049 #endif
1050 
1051   }
1052   else {
1053     /* clear path */
1054     const char *p;
1055     const char *hostp;
1056     size_t len;
1057 
1058     if(schemelen) {
1059       int i = 0;
1060       p = &url[schemelen + 1];
1061       while(p && (*p == '/') && (i < 4)) {
1062         p++;
1063         i++;
1064       }
1065 
1066       schemep = schemebuf;
1067       if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1068          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1069         result = CURLUE_UNSUPPORTED_SCHEME;
1070         goto fail;
1071       }
1072 
1073       if((i < 1) || (i>3)) {
1074         /* less than one or more than three slashes */
1075         result = CURLUE_BAD_SLASHES;
1076         goto fail;
1077       }
1078       if(junkscan(schemep, flags)) {
1079         result = CURLUE_BAD_SCHEME;
1080         goto fail;
1081       }
1082     }
1083     else {
1084       /* no scheme! */
1085 
1086       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1087         result = CURLUE_BAD_SCHEME;
1088         goto fail;
1089       }
1090       if(flags & CURLU_DEFAULT_SCHEME)
1091         schemep = DEFAULT_SCHEME;
1092 
1093       /*
1094        * The URL was badly formatted, let's try without scheme specified.
1095        */
1096       p = url;
1097     }
1098     hostp = p; /* host name starts here */
1099 
1100     /* find the end of the host name + port number */
1101     while(*p && !HOSTNAME_END(*p))
1102       p++;
1103 
1104     len = p - hostp;
1105     if(len) {
1106       if(Curl_dyn_addn(&host, hostp, len)) {
1107         result = CURLUE_OUT_OF_MEMORY;
1108         goto fail;
1109       }
1110     }
1111     else {
1112       if(!(flags & CURLU_NO_AUTHORITY)) {
1113         result = CURLUE_NO_HOST;
1114         goto fail;
1115       }
1116     }
1117 
1118     path = (char *)p;
1119 
1120     if(schemep) {
1121       u->scheme = strdup(schemep);
1122       if(!u->scheme) {
1123         result = CURLUE_OUT_OF_MEMORY;
1124         goto fail;
1125       }
1126     }
1127   }
1128 
1129   fragment = strchr(path, '#');
1130   if(fragment) {
1131     fraglen = strlen(fragment);
1132     if(fraglen > 1) {
1133       /* skip the leading '#' in the copy but include the terminating null */
1134       u->fragment = Curl_memdup(fragment + 1, fraglen);
1135       if(!u->fragment) {
1136         result = CURLUE_OUT_OF_MEMORY;
1137         goto fail;
1138       }
1139 
1140       if(junkscan(u->fragment, flags)) {
1141         result = CURLUE_BAD_FRAGMENT;
1142         goto fail;
1143       }
1144     }
1145   }
1146 
1147   query = strchr(path, '?');
1148   if(query && (!fragment || (query < fragment))) {
1149     size_t qlen = strlen(query) - fraglen; /* includes '?' */
1150     pathlen = strlen(path) - qlen - fraglen;
1151     if(qlen > 1) {
1152       if(flags & CURLU_URLENCODE) {
1153         struct dynbuf enc;
1154         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1155         /* skip the leading question mark */
1156         if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1157           result = CURLUE_OUT_OF_MEMORY;
1158           goto fail;
1159         }
1160         u->query = Curl_dyn_ptr(&enc);
1161       }
1162       else {
1163         u->query = Curl_memdup(query + 1, qlen);
1164         if(!u->query) {
1165           result = CURLUE_OUT_OF_MEMORY;
1166           goto fail;
1167         }
1168         u->query[qlen - 1] = 0;
1169       }
1170 
1171       if(junkscan(u->query, flags)) {
1172         result = CURLUE_BAD_QUERY;
1173         goto fail;
1174       }
1175     }
1176     else {
1177       /* single byte query */
1178       u->query = strdup("");
1179       if(!u->query) {
1180         result = CURLUE_OUT_OF_MEMORY;
1181         goto fail;
1182       }
1183     }
1184   }
1185   else
1186     pathlen = strlen(path) - fraglen;
1187 
1188   if(pathlen && (flags & CURLU_URLENCODE)) {
1189     struct dynbuf enc;
1190     Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1191     if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1192       result = CURLUE_OUT_OF_MEMORY;
1193       goto fail;
1194     }
1195     pathlen = Curl_dyn_len(&enc);
1196     path = u->path = Curl_dyn_ptr(&enc);
1197   }
1198 
1199   if(pathlen <= 1) {
1200     /* there is no path left or just the slash, unset */
1201     path = NULL;
1202   }
1203   else {
1204     if(!u->path) {
1205       u->path = Curl_memdup(path, pathlen + 1);
1206       if(!u->path) {
1207         result = CURLUE_OUT_OF_MEMORY;
1208         goto fail;
1209       }
1210       u->path[pathlen] = 0;
1211       path = u->path;
1212     }
1213     else if(flags & CURLU_URLENCODE)
1214       /* it might have encoded more than just the path so cut it */
1215       u->path[pathlen] = 0;
1216 
1217     if(junkscan(u->path, flags)) {
1218       result = CURLUE_BAD_PATH;
1219       goto fail;
1220     }
1221 
1222     if(!(flags & CURLU_PATH_AS_IS)) {
1223       /* remove ../ and ./ sequences according to RFC3986 */
1224       char *dedot;
1225       int err = dedotdotify((char *)path, pathlen, &dedot);
1226       if(err) {
1227         result = CURLUE_OUT_OF_MEMORY;
1228         goto fail;
1229       }
1230       if(dedot) {
1231         free(u->path);
1232         u->path = dedot;
1233       }
1234     }
1235   }
1236 
1237   if(Curl_dyn_len(&host)) {
1238     char normalized_ipv4[sizeof("255.255.255.255") + 1];
1239 
1240     /*
1241      * Parse the login details and strip them out of the host name.
1242      */
1243     result = parse_hostname_login(u, &host, flags);
1244     if(!result)
1245       result = Curl_parse_port(u, &host, schemelen);
1246     if(result)
1247       goto fail;
1248 
1249     if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250       result = CURLUE_BAD_HOSTNAME;
1251       goto fail;
1252     }
1253 
1254     if(ipv4_normalize(Curl_dyn_ptr(&host),
1255                       normalized_ipv4, sizeof(normalized_ipv4))) {
1256       Curl_dyn_reset(&host);
1257       if(Curl_dyn_add(&host, normalized_ipv4)) {
1258         result = CURLUE_OUT_OF_MEMORY;
1259         goto fail;
1260       }
1261     }
1262     else {
1263       result = decode_host(&host);
1264       if(!result)
1265         result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1266       if(result)
1267         goto fail;
1268     }
1269 
1270     if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271       const char *hostname = Curl_dyn_ptr(&host);
1272       /* legacy curl-style guess based on host name */
1273       if(checkprefix("ftp.", hostname))
1274         schemep = "ftp";
1275       else if(checkprefix("dict.", hostname))
1276         schemep = "dict";
1277       else if(checkprefix("ldap.", hostname))
1278         schemep = "ldap";
1279       else if(checkprefix("imap.", hostname))
1280         schemep = "imap";
1281       else if(checkprefix("smtp.", hostname))
1282         schemep = "smtp";
1283       else if(checkprefix("pop3.", hostname))
1284         schemep = "pop3";
1285       else
1286         schemep = "http";
1287 
1288       u->scheme = strdup(schemep);
1289       if(!u->scheme) {
1290         result = CURLUE_OUT_OF_MEMORY;
1291         goto fail;
1292       }
1293     }
1294   }
1295   else if(flags & CURLU_NO_AUTHORITY) {
1296     /* allowed to be empty. */
1297     if(Curl_dyn_add(&host, "")) {
1298       result = CURLUE_OUT_OF_MEMORY;
1299       goto fail;
1300     }
1301   }
1302 
1303   u->host = Curl_dyn_ptr(&host);
1304 
1305   return result;
1306   fail:
1307   Curl_dyn_free(&host);
1308   free_urlhandle(u);
1309   return result;
1310 }
1311 
1312 /*
1313  * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1314  */
parseurl_and_replace(const char * url,CURLU * u,unsigned int flags)1315 static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1316                                       unsigned int flags)
1317 {
1318   CURLUcode result;
1319   CURLU tmpurl;
1320   memset(&tmpurl, 0, sizeof(tmpurl));
1321   result = parseurl(url, &tmpurl, flags);
1322   if(!result) {
1323     free_urlhandle(u);
1324     *u = tmpurl;
1325   }
1326   return result;
1327 }
1328 
1329 /*
1330  */
curl_url(void)1331 CURLU *curl_url(void)
1332 {
1333   return calloc(sizeof(struct Curl_URL), 1);
1334 }
1335 
curl_url_cleanup(CURLU * u)1336 void curl_url_cleanup(CURLU *u)
1337 {
1338   if(u) {
1339     free_urlhandle(u);
1340     free(u);
1341   }
1342 }
1343 
1344 #define DUP(dest, src, name)                    \
1345   do {                                          \
1346     if(src->name) {                             \
1347       dest->name = strdup(src->name);           \
1348       if(!dest->name)                           \
1349         goto fail;                              \
1350     }                                           \
1351   } while(0)
1352 
curl_url_dup(const CURLU * in)1353 CURLU *curl_url_dup(const CURLU *in)
1354 {
1355   struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1356   if(u) {
1357     DUP(u, in, scheme);
1358     DUP(u, in, user);
1359     DUP(u, in, password);
1360     DUP(u, in, options);
1361     DUP(u, in, host);
1362     DUP(u, in, port);
1363     DUP(u, in, path);
1364     DUP(u, in, query);
1365     DUP(u, in, fragment);
1366     u->portnum = in->portnum;
1367   }
1368   return u;
1369   fail:
1370   curl_url_cleanup(u);
1371   return NULL;
1372 }
1373 
curl_url_get(const CURLU * u,CURLUPart what,char ** part,unsigned int flags)1374 CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1375                        char **part, unsigned int flags)
1376 {
1377   const char *ptr;
1378   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1379   char portbuf[7];
1380   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382   bool punycode = FALSE;
1383   bool plusdecode = FALSE;
1384   (void)flags;
1385   if(!u)
1386     return CURLUE_BAD_HANDLE;
1387   if(!part)
1388     return CURLUE_BAD_PARTPOINTER;
1389   *part = NULL;
1390 
1391   switch(what) {
1392   case CURLUPART_SCHEME:
1393     ptr = u->scheme;
1394     ifmissing = CURLUE_NO_SCHEME;
1395     urldecode = FALSE; /* never for schemes */
1396     break;
1397   case CURLUPART_USER:
1398     ptr = u->user;
1399     ifmissing = CURLUE_NO_USER;
1400     break;
1401   case CURLUPART_PASSWORD:
1402     ptr = u->password;
1403     ifmissing = CURLUE_NO_PASSWORD;
1404     break;
1405   case CURLUPART_OPTIONS:
1406     ptr = u->options;
1407     ifmissing = CURLUE_NO_OPTIONS;
1408     break;
1409   case CURLUPART_HOST:
1410     ptr = u->host;
1411     ifmissing = CURLUE_NO_HOST;
1412     punycode = (flags & CURLU_PUNYCODE)?1:0;
1413     break;
1414   case CURLUPART_ZONEID:
1415     ptr = u->zoneid;
1416     ifmissing = CURLUE_NO_ZONEID;
1417     break;
1418   case CURLUPART_PORT:
1419     ptr = u->port;
1420     ifmissing = CURLUE_NO_PORT;
1421     urldecode = FALSE; /* never for port */
1422     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1423       /* there's no stored port number, but asked to deliver
1424          a default one for the scheme */
1425       const struct Curl_handler *h =
1426         Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1427       if(h) {
1428         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1429         ptr = portbuf;
1430       }
1431     }
1432     else if(ptr && u->scheme) {
1433       /* there is a stored port number, but ask to inhibit if
1434          it matches the default one for the scheme */
1435       const struct Curl_handler *h =
1436         Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1437       if(h && (h->defport == u->portnum) &&
1438          (flags & CURLU_NO_DEFAULT_PORT))
1439         ptr = NULL;
1440     }
1441     break;
1442   case CURLUPART_PATH:
1443     ptr = u->path;
1444     if(!ptr)
1445       ptr = "/";
1446     break;
1447   case CURLUPART_QUERY:
1448     ptr = u->query;
1449     ifmissing = CURLUE_NO_QUERY;
1450     plusdecode = urldecode;
1451     break;
1452   case CURLUPART_FRAGMENT:
1453     ptr = u->fragment;
1454     ifmissing = CURLUE_NO_FRAGMENT;
1455     break;
1456   case CURLUPART_URL: {
1457     char *url;
1458     char *scheme;
1459     char *options = u->options;
1460     char *port = u->port;
1461     char *allochost = NULL;
1462     punycode = (flags & CURLU_PUNYCODE)?1:0;
1463     if(u->scheme && strcasecompare("file", u->scheme)) {
1464       url = aprintf("file://%s%s%s",
1465                     u->path,
1466                     u->fragment? "#": "",
1467                     u->fragment? u->fragment : "");
1468     }
1469     else if(!u->host)
1470       return CURLUE_NO_HOST;
1471     else {
1472       const struct Curl_handler *h = NULL;
1473       if(u->scheme)
1474         scheme = u->scheme;
1475       else if(flags & CURLU_DEFAULT_SCHEME)
1476         scheme = (char *) DEFAULT_SCHEME;
1477       else
1478         return CURLUE_NO_SCHEME;
1479 
1480       h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481       if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482         /* there's no stored port number, but asked to deliver
1483            a default one for the scheme */
1484         if(h) {
1485           msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1486           port = portbuf;
1487         }
1488       }
1489       else if(port) {
1490         /* there is a stored port number, but asked to inhibit if it matches
1491            the default one for the scheme */
1492         if(h && (h->defport == u->portnum) &&
1493            (flags & CURLU_NO_DEFAULT_PORT))
1494           port = NULL;
1495       }
1496 
1497       if(h && !(h->flags & PROTOPT_URLOPTIONS))
1498         options = NULL;
1499 
1500       if(u->host[0] == '[') {
1501         if(u->zoneid) {
1502           /* make it '[ host %25 zoneid ]' */
1503           struct dynbuf enc;
1504           size_t hostlen = strlen(u->host);
1505           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506           if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1507                            u->zoneid))
1508             return CURLUE_OUT_OF_MEMORY;
1509           allochost = Curl_dyn_ptr(&enc);
1510         }
1511       }
1512       else if(urlencode) {
1513         allochost = curl_easy_escape(NULL, u->host, 0);
1514         if(!allochost)
1515           return CURLUE_OUT_OF_MEMORY;
1516       }
1517       else if(punycode) {
1518         if(!Curl_is_ASCII_name(u->host)) {
1519 #ifndef USE_IDN
1520           return CURLUE_LACKS_IDN;
1521 #else
1522           allochost = Curl_idn_decode(u->host);
1523           if(!allochost)
1524             return CURLUE_OUT_OF_MEMORY;
1525 #endif
1526         }
1527       }
1528       else {
1529         /* only encode '%' in output host name */
1530         char *host = u->host;
1531         bool percent = FALSE;
1532         /* first, count number of percents present in the name */
1533         while(*host) {
1534           if(*host == '%') {
1535             percent = TRUE;
1536             break;
1537           }
1538           host++;
1539         }
1540         /* if there were percent(s), encode the host name */
1541         if(percent) {
1542           struct dynbuf enc;
1543           CURLcode result;
1544           Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1545           host = u->host;
1546           while(*host) {
1547             if(*host == '%')
1548               result = Curl_dyn_addn(&enc, "%25", 3);
1549             else
1550               result = Curl_dyn_addn(&enc, host, 1);
1551             if(result)
1552               return CURLUE_OUT_OF_MEMORY;
1553             host++;
1554           }
1555           allochost = Curl_dyn_ptr(&enc);
1556         }
1557       }
1558 
1559       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1560                     scheme,
1561                     u->user ? u->user : "",
1562                     u->password ? ":": "",
1563                     u->password ? u->password : "",
1564                     options ? ";" : "",
1565                     options ? options : "",
1566                     (u->user || u->password || options) ? "@": "",
1567                     allochost ? allochost : u->host,
1568                     port ? ":": "",
1569                     port ? port : "",
1570                     (u->path && (u->path[0] != '/')) ? "/": "",
1571                     u->path ? u->path : "/",
1572                     (u->query && u->query[0]) ? "?": "",
1573                     (u->query && u->query[0]) ? u->query : "",
1574                     u->fragment? "#": "",
1575                     u->fragment? u->fragment : "");
1576       free(allochost);
1577     }
1578     if(!url)
1579       return CURLUE_OUT_OF_MEMORY;
1580     *part = url;
1581     return CURLUE_OK;
1582   }
1583   default:
1584     ptr = NULL;
1585     break;
1586   }
1587   if(ptr) {
1588     size_t partlen = strlen(ptr);
1589     size_t i = 0;
1590     *part = Curl_memdup(ptr, partlen + 1);
1591     if(!*part)
1592       return CURLUE_OUT_OF_MEMORY;
1593     if(plusdecode) {
1594       /* convert + to space */
1595       char *plus = *part;
1596       for(i = 0; i < partlen; ++plus, i++) {
1597         if(*plus == '+')
1598           *plus = ' ';
1599       }
1600     }
1601     if(urldecode) {
1602       char *decoded;
1603       size_t dlen;
1604       /* this unconditional rejection of control bytes is documented
1605          API behavior */
1606       CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1607       free(*part);
1608       if(res) {
1609         *part = NULL;
1610         return CURLUE_URLDECODE;
1611       }
1612       *part = decoded;
1613       partlen = dlen;
1614     }
1615     if(urlencode) {
1616       struct dynbuf enc;
1617       Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1618       if(urlencode_str(&enc, *part, partlen, TRUE,
1619                        what == CURLUPART_QUERY))
1620         return CURLUE_OUT_OF_MEMORY;
1621       free(*part);
1622       *part = Curl_dyn_ptr(&enc);
1623     }
1624     else if(punycode) {
1625       if(!Curl_is_ASCII_name(u->host)) {
1626 #ifndef USE_IDN
1627         return CURLUE_LACKS_IDN;
1628 #else
1629         char *allochost = Curl_idn_decode(*part);
1630         if(!allochost)
1631           return CURLUE_OUT_OF_MEMORY;
1632         free(*part);
1633         *part = allochost;
1634 #endif
1635       }
1636     }
1637 
1638     return CURLUE_OK;
1639   }
1640   else
1641     return ifmissing;
1642 }
1643 
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1644 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1645                        const char *part, unsigned int flags)
1646 {
1647   char **storep = NULL;
1648   long port = 0;
1649   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1650   bool plusencode = FALSE;
1651   bool urlskipslash = FALSE;
1652   bool appendquery = FALSE;
1653   bool equalsencode = FALSE;
1654 
1655   if(!u)
1656     return CURLUE_BAD_HANDLE;
1657   if(!part) {
1658     /* setting a part to NULL clears it */
1659     switch(what) {
1660     case CURLUPART_URL:
1661       break;
1662     case CURLUPART_SCHEME:
1663       storep = &u->scheme;
1664       break;
1665     case CURLUPART_USER:
1666       storep = &u->user;
1667       break;
1668     case CURLUPART_PASSWORD:
1669       storep = &u->password;
1670       break;
1671     case CURLUPART_OPTIONS:
1672       storep = &u->options;
1673       break;
1674     case CURLUPART_HOST:
1675       storep = &u->host;
1676       break;
1677     case CURLUPART_ZONEID:
1678       storep = &u->zoneid;
1679       break;
1680     case CURLUPART_PORT:
1681       u->portnum = 0;
1682       storep = &u->port;
1683       break;
1684     case CURLUPART_PATH:
1685       storep = &u->path;
1686       break;
1687     case CURLUPART_QUERY:
1688       storep = &u->query;
1689       break;
1690     case CURLUPART_FRAGMENT:
1691       storep = &u->fragment;
1692       break;
1693     default:
1694       return CURLUE_UNKNOWN_PART;
1695     }
1696     if(storep && *storep) {
1697       Curl_safefree(*storep);
1698     }
1699     else if(!storep) {
1700       free_urlhandle(u);
1701       memset(u, 0, sizeof(struct Curl_URL));
1702     }
1703     return CURLUE_OK;
1704   }
1705 
1706   switch(what) {
1707   case CURLUPART_SCHEME:
1708     if(strlen(part) > MAX_SCHEME_LEN)
1709       /* too long */
1710       return CURLUE_BAD_SCHEME;
1711     if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1712        /* verify that it is a fine scheme */
1713        !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1714       return CURLUE_UNSUPPORTED_SCHEME;
1715     storep = &u->scheme;
1716     urlencode = FALSE; /* never */
1717     break;
1718   case CURLUPART_USER:
1719     storep = &u->user;
1720     break;
1721   case CURLUPART_PASSWORD:
1722     storep = &u->password;
1723     break;
1724   case CURLUPART_OPTIONS:
1725     storep = &u->options;
1726     break;
1727   case CURLUPART_HOST: {
1728     size_t len = strcspn(part, " \r\n");
1729     if(strlen(part) != len)
1730       /* hostname with bad content */
1731       return CURLUE_BAD_HOSTNAME;
1732     storep = &u->host;
1733     Curl_safefree(u->zoneid);
1734     break;
1735   }
1736   case CURLUPART_ZONEID:
1737     storep = &u->zoneid;
1738     break;
1739   case CURLUPART_PORT:
1740   {
1741     char *endp;
1742     urlencode = FALSE; /* never */
1743     port = strtol(part, &endp, 10);  /* Port number must be decimal */
1744     if((port <= 0) || (port > 0xffff))
1745       return CURLUE_BAD_PORT_NUMBER;
1746     if(*endp)
1747       /* weirdly provided number, not good! */
1748       return CURLUE_BAD_PORT_NUMBER;
1749     storep = &u->port;
1750   }
1751   break;
1752   case CURLUPART_PATH:
1753     urlskipslash = TRUE;
1754     storep = &u->path;
1755     break;
1756   case CURLUPART_QUERY:
1757     plusencode = urlencode;
1758     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1759     equalsencode = appendquery;
1760     storep = &u->query;
1761     break;
1762   case CURLUPART_FRAGMENT:
1763     storep = &u->fragment;
1764     break;
1765   case CURLUPART_URL: {
1766     /*
1767      * Allow a new URL to replace the existing (if any) contents.
1768      *
1769      * If the existing contents is enough for a URL, allow a relative URL to
1770      * replace it.
1771      */
1772     CURLUcode result;
1773     char *oldurl;
1774     char *redired_url;
1775 
1776     /* if the new thing is absolute or the old one is not
1777      * (we could not get an absolute url in 'oldurl'),
1778      * then replace the existing with the new. */
1779     if(Curl_is_absolute_url(part, NULL, 0,
1780                             flags & (CURLU_GUESS_SCHEME|
1781                                      CURLU_DEFAULT_SCHEME))
1782        || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1783       return parseurl_and_replace(part, u, flags);
1784     }
1785 
1786     /* apply the relative part to create a new URL
1787      * and replace the existing one with it. */
1788     redired_url = concat_url(oldurl, part);
1789     free(oldurl);
1790     if(!redired_url)
1791       return CURLUE_OUT_OF_MEMORY;
1792 
1793     result = parseurl_and_replace(redired_url, u, flags);
1794     free(redired_url);
1795     return result;
1796   }
1797   default:
1798     return CURLUE_UNKNOWN_PART;
1799   }
1800   DEBUGASSERT(storep);
1801   {
1802     const char *newp = part;
1803     size_t nalloc = strlen(part);
1804 
1805     if(nalloc > CURL_MAX_INPUT_LENGTH)
1806       /* excessive input length */
1807       return CURLUE_MALFORMED_INPUT;
1808 
1809     if(urlencode) {
1810       const unsigned char *i;
1811       struct dynbuf enc;
1812 
1813       Curl_dyn_init(&enc, nalloc * 3 + 1);
1814 
1815       for(i = (const unsigned char *)part; *i; i++) {
1816         CURLcode result;
1817         if((*i == ' ') && plusencode) {
1818           result = Curl_dyn_addn(&enc, "+", 1);
1819           if(result)
1820             return CURLUE_OUT_OF_MEMORY;
1821         }
1822         else if(Curl_isunreserved(*i) ||
1823                 ((*i == '/') && urlskipslash) ||
1824                 ((*i == '=') && equalsencode)) {
1825           if((*i == '=') && equalsencode)
1826             /* only skip the first equals sign */
1827             equalsencode = FALSE;
1828           result = Curl_dyn_addn(&enc, i, 1);
1829           if(result)
1830             return CURLUE_OUT_OF_MEMORY;
1831         }
1832         else {
1833           char out[3]={'%'};
1834           out[1] = hexdigits[*i>>4];
1835           out[2] = hexdigits[*i & 0xf];
1836           result = Curl_dyn_addn(&enc, out, 3);
1837           if(result)
1838             return CURLUE_OUT_OF_MEMORY;
1839         }
1840       }
1841       newp = Curl_dyn_ptr(&enc);
1842     }
1843     else {
1844       char *p;
1845       newp = strdup(part);
1846       if(!newp)
1847         return CURLUE_OUT_OF_MEMORY;
1848       p = (char *)newp;
1849       while(*p) {
1850         /* make sure percent encoded are lower case */
1851         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1852            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1853           p[1] = Curl_raw_tolower(p[1]);
1854           p[2] = Curl_raw_tolower(p[2]);
1855           p += 3;
1856         }
1857         else
1858           p++;
1859       }
1860     }
1861 
1862     if(appendquery) {
1863       /* Append the 'newp' string onto the old query. Add a '&' separator if
1864          none is present at the end of the existing query already */
1865 
1866       size_t querylen = u->query ? strlen(u->query) : 0;
1867       bool addamperand = querylen && (u->query[querylen -1] != '&');
1868       if(querylen) {
1869         struct dynbuf enc;
1870         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1871 
1872         if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1873           goto nomem;
1874 
1875         if(addamperand) {
1876           if(Curl_dyn_addn(&enc, "&", 1))
1877             goto nomem;
1878         }
1879         if(Curl_dyn_add(&enc, newp))
1880           goto nomem;
1881         free((char *)newp);
1882         free(*storep);
1883         *storep = Curl_dyn_ptr(&enc);
1884         return CURLUE_OK;
1885         nomem:
1886         free((char *)newp);
1887         return CURLUE_OUT_OF_MEMORY;
1888       }
1889     }
1890 
1891     if(what == CURLUPART_HOST) {
1892       size_t n = strlen(newp);
1893       if(!n && (flags & CURLU_NO_AUTHORITY)) {
1894         /* Skip hostname check, it's allowed to be empty. */
1895       }
1896       else {
1897         if(hostname_check(u, (char *)newp, n)) {
1898           free((char *)newp);
1899           return CURLUE_BAD_HOSTNAME;
1900         }
1901       }
1902     }
1903 
1904     free(*storep);
1905     *storep = (char *)newp;
1906   }
1907   /* set after the string, to make it not assigned if the allocation above
1908      fails */
1909   if(port)
1910     u->portnum = port;
1911   return CURLUE_OK;
1912 }
1913