1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23 #include "curl_setup.h"
24
25 #include "urldata.h"
26 #include "urlapi-int.h"
27 #include "strcase.h"
28 #include "dotdot.h"
29 #include "url.h"
30 #include "escape.h"
31 #include "curl_ctype.h"
32 #include "inet_pton.h"
33
34 /* The last 3 #include files should be in this order */
35 #include "curl_printf.h"
36 #include "curl_memory.h"
37 #include "memdebug.h"
38
39 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
40 #define STARTS_WITH_DRIVE_PREFIX(str) \
41 ((('a' <= str[0] && str[0] <= 'z') || \
42 ('A' <= str[0] && str[0] <= 'Z')) && \
43 (str[1] == ':'))
44
45 /* MSDOS/Windows style drive prefix, optionally with
46 * a '|' instead of ':', followed by a slash or NUL */
47 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
48 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
49 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
50 ((str)[1] == ':' || (str)[1] == '|') && \
51 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
52
53 /* Internal representation of CURLU. Point to URL-encoded strings. */
54 struct Curl_URL {
55 char *scheme;
56 char *user;
57 char *password;
58 char *options; /* IMAP only? */
59 char *host;
60 char *zoneid; /* for numerical IPv6 addresses */
61 char *port;
62 char *path;
63 char *query;
64 char *fragment;
65
66 char *scratch; /* temporary scratch area */
67 char *temppath; /* temporary path pointer */
68 long portnum; /* the numerical version */
69 };
70
71 #define DEFAULT_SCHEME "https"
72
free_urlhandle(struct Curl_URL * u)73 static void free_urlhandle(struct Curl_URL *u)
74 {
75 free(u->scheme);
76 free(u->user);
77 free(u->password);
78 free(u->options);
79 free(u->host);
80 free(u->zoneid);
81 free(u->port);
82 free(u->path);
83 free(u->query);
84 free(u->fragment);
85 free(u->scratch);
86 free(u->temppath);
87 }
88
89 /* move the full contents of one handle onto another and
90 free the original */
mv_urlhandle(struct Curl_URL * from,struct Curl_URL * to)91 static void mv_urlhandle(struct Curl_URL *from,
92 struct Curl_URL *to)
93 {
94 free_urlhandle(to);
95 *to = *from;
96 free(from);
97 }
98
99 /*
100 * Find the separator at the end of the host name, or the '?' in cases like
101 * http://www.url.com?id=2380
102 */
find_host_sep(const char * url)103 static const char *find_host_sep(const char *url)
104 {
105 const char *sep;
106 const char *query;
107
108 /* Find the start of the hostname */
109 sep = strstr(url, "//");
110 if(!sep)
111 sep = url;
112 else
113 sep += 2;
114
115 query = strchr(sep, '?');
116 sep = strchr(sep, '/');
117
118 if(!sep)
119 sep = url + strlen(url);
120
121 if(!query)
122 query = url + strlen(url);
123
124 return sep < query ? sep : query;
125 }
126
127 /*
128 * Decide in an encoding-independent manner whether a character in an
129 * URL must be escaped. The same criterion must be used in strlen_url()
130 * and strcpy_url().
131 */
urlchar_needs_escaping(int c)132 static bool urlchar_needs_escaping(int c)
133 {
134 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
135 }
136
137 /*
138 * strlen_url() returns the length of the given URL if the spaces within the
139 * URL were properly URL encoded.
140 * URL encoding should be skipped for host names, otherwise IDN resolution
141 * will fail.
142 */
strlen_url(const char * url,bool relative)143 static size_t strlen_url(const char *url, bool relative)
144 {
145 const unsigned char *ptr;
146 size_t newlen = 0;
147 bool left = TRUE; /* left side of the ? */
148 const unsigned char *host_sep = (const unsigned char *) url;
149
150 if(!relative)
151 host_sep = (const unsigned char *) find_host_sep(url);
152
153 for(ptr = (unsigned char *)url; *ptr; ptr++) {
154
155 if(ptr < host_sep) {
156 ++newlen;
157 continue;
158 }
159
160 switch(*ptr) {
161 case '?':
162 left = FALSE;
163 /* FALLTHROUGH */
164 default:
165 if(urlchar_needs_escaping(*ptr))
166 newlen += 2;
167 newlen++;
168 break;
169 case ' ':
170 if(left)
171 newlen += 3;
172 else
173 newlen++;
174 break;
175 }
176 }
177 return newlen;
178 }
179
180 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
181 * the source URL accordingly.
182 * URL encoding should be skipped for host names, otherwise IDN resolution
183 * will fail.
184 */
strcpy_url(char * output,const char * url,bool relative)185 static void strcpy_url(char *output, const char *url, bool relative)
186 {
187 /* we must add this with whitespace-replacing */
188 bool left = TRUE;
189 const unsigned char *iptr;
190 char *optr = output;
191 const unsigned char *host_sep = (const unsigned char *) url;
192
193 if(!relative)
194 host_sep = (const unsigned char *) find_host_sep(url);
195
196 for(iptr = (unsigned char *)url; /* read from here */
197 *iptr; /* until zero byte */
198 iptr++) {
199
200 if(iptr < host_sep) {
201 *optr++ = *iptr;
202 continue;
203 }
204
205 switch(*iptr) {
206 case '?':
207 left = FALSE;
208 /* FALLTHROUGH */
209 default:
210 if(urlchar_needs_escaping(*iptr)) {
211 msnprintf(optr, 4, "%%%02x", *iptr);
212 optr += 3;
213 }
214 else
215 *optr++=*iptr;
216 break;
217 case ' ':
218 if(left) {
219 *optr++='%'; /* add a '%' */
220 *optr++='2'; /* add a '2' */
221 *optr++='0'; /* add a '0' */
222 }
223 else
224 *optr++='+'; /* add a '+' here */
225 break;
226 }
227 }
228 *optr = 0; /* null-terminate output buffer */
229
230 }
231
232 /*
233 * Returns true if the given URL is absolute (as opposed to relative) within
234 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
235 * non-NULL.
236 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen)237 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
238 {
239 size_t i;
240 #ifdef WIN32
241 if(STARTS_WITH_DRIVE_PREFIX(url))
242 return FALSE;
243 #endif
244 for(i = 0; i < buflen && url[i]; ++i) {
245 char s = url[i];
246 if((s == ':') && (url[i + 1] == '/')) {
247 if(buf)
248 buf[i] = 0;
249 return TRUE;
250 }
251 /* RFC 3986 3.1 explains:
252 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
253 */
254 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
255 if(buf)
256 buf[i] = (char)TOLOWER(s);
257 }
258 else
259 break;
260 }
261 return FALSE;
262 }
263
264 /*
265 * Concatenate a relative URL to a base URL making it absolute.
266 * URL-encodes any spaces.
267 * The returned pointer must be freed by the caller unless NULL
268 * (returns NULL on out of memory).
269 */
concat_url(const char * base,const char * relurl)270 static char *concat_url(const char *base, const char *relurl)
271 {
272 /***
273 TRY to append this new path to the old URL
274 to the right of the host part. Oh crap, this is doomed to cause
275 problems in the future...
276 */
277 char *newest;
278 char *protsep;
279 char *pathsep;
280 size_t newlen;
281 bool host_changed = FALSE;
282
283 const char *useurl = relurl;
284 size_t urllen;
285
286 /* we must make our own copy of the URL to play with, as it may
287 point to read-only data */
288 char *url_clone = strdup(base);
289
290 if(!url_clone)
291 return NULL; /* skip out of this NOW */
292
293 /* protsep points to the start of the host name */
294 protsep = strstr(url_clone, "//");
295 if(!protsep)
296 protsep = url_clone;
297 else
298 protsep += 2; /* pass the slashes */
299
300 if('/' != relurl[0]) {
301 int level = 0;
302
303 /* First we need to find out if there's a ?-letter in the URL,
304 and cut it and the right-side of that off */
305 pathsep = strchr(protsep, '?');
306 if(pathsep)
307 *pathsep = 0;
308
309 /* we have a relative path to append to the last slash if there's one
310 available, or if the new URL is just a query string (starts with a
311 '?') we append the new one at the end of the entire currently worked
312 out URL */
313 if(useurl[0] != '?') {
314 pathsep = strrchr(protsep, '/');
315 if(pathsep)
316 *pathsep = 0;
317 }
318
319 /* Check if there's any slash after the host name, and if so, remember
320 that position instead */
321 pathsep = strchr(protsep, '/');
322 if(pathsep)
323 protsep = pathsep + 1;
324 else
325 protsep = NULL;
326
327 /* now deal with one "./" or any amount of "../" in the newurl
328 and act accordingly */
329
330 if((useurl[0] == '.') && (useurl[1] == '/'))
331 useurl += 2; /* just skip the "./" */
332
333 while((useurl[0] == '.') &&
334 (useurl[1] == '.') &&
335 (useurl[2] == '/')) {
336 level++;
337 useurl += 3; /* pass the "../" */
338 }
339
340 if(protsep) {
341 while(level--) {
342 /* cut off one more level from the right of the original URL */
343 pathsep = strrchr(protsep, '/');
344 if(pathsep)
345 *pathsep = 0;
346 else {
347 *protsep = 0;
348 break;
349 }
350 }
351 }
352 }
353 else {
354 /* We got a new absolute path for this server */
355
356 if(relurl[1] == '/') {
357 /* the new URL starts with //, just keep the protocol part from the
358 original one */
359 *protsep = 0;
360 useurl = &relurl[2]; /* we keep the slashes from the original, so we
361 skip the new ones */
362 host_changed = TRUE;
363 }
364 else {
365 /* cut off the original URL from the first slash, or deal with URLs
366 without slash */
367 pathsep = strchr(protsep, '/');
368 if(pathsep) {
369 /* When people use badly formatted URLs, such as
370 "http://www.url.com?dir=/home/daniel" we must not use the first
371 slash, if there's a ?-letter before it! */
372 char *sep = strchr(protsep, '?');
373 if(sep && (sep < pathsep))
374 pathsep = sep;
375 *pathsep = 0;
376 }
377 else {
378 /* There was no slash. Now, since we might be operating on a badly
379 formatted URL, such as "http://www.url.com?id=2380" which doesn't
380 use a slash separator as it is supposed to, we need to check for a
381 ?-letter as well! */
382 pathsep = strchr(protsep, '?');
383 if(pathsep)
384 *pathsep = 0;
385 }
386 }
387 }
388
389 /* If the new part contains a space, this is a mighty stupid redirect
390 but we still make an effort to do "right". To the left of a '?'
391 letter we replace each space with %20 while it is replaced with '+'
392 on the right side of the '?' letter.
393 */
394 newlen = strlen_url(useurl, !host_changed);
395
396 urllen = strlen(url_clone);
397
398 newest = malloc(urllen + 1 + /* possible slash */
399 newlen + 1 /* zero byte */);
400
401 if(!newest) {
402 free(url_clone); /* don't leak this */
403 return NULL;
404 }
405
406 /* copy over the root url part */
407 memcpy(newest, url_clone, urllen);
408
409 /* check if we need to append a slash */
410 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
411 ;
412 else
413 newest[urllen++]='/';
414
415 /* then append the new piece on the right side */
416 strcpy_url(&newest[urllen], useurl, !host_changed);
417
418 free(url_clone);
419
420 return newest;
421 }
422
423 /*
424 * parse_hostname_login()
425 *
426 * Parse the login details (user name, password and options) from the URL and
427 * strip them out of the host name
428 *
429 */
parse_hostname_login(struct Curl_URL * u,char ** hostname,unsigned int flags)430 static CURLUcode parse_hostname_login(struct Curl_URL *u,
431 char **hostname,
432 unsigned int flags)
433 {
434 CURLUcode result = CURLUE_OK;
435 CURLcode ccode;
436 char *userp = NULL;
437 char *passwdp = NULL;
438 char *optionsp = NULL;
439 const struct Curl_handler *h = NULL;
440
441 /* At this point, we're hoping all the other special cases have
442 * been taken care of, so conn->host.name is at most
443 * [user[:password][;options]]@]hostname
444 *
445 * We need somewhere to put the embedded details, so do that first.
446 */
447
448 char *ptr = strchr(*hostname, '@');
449 char *login = *hostname;
450
451 if(!ptr)
452 goto out;
453
454 /* We will now try to extract the
455 * possible login information in a string like:
456 * ftp://user:password@ftp.my.site:8021/README */
457 *hostname = ++ptr;
458
459 /* if this is a known scheme, get some details */
460 if(u->scheme)
461 h = Curl_builtin_scheme(u->scheme);
462
463 /* We could use the login information in the URL so extract it. Only parse
464 options if the handler says we should. Note that 'h' might be NULL! */
465 ccode = Curl_parse_login_details(login, ptr - login - 1,
466 &userp, &passwdp,
467 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
468 &optionsp:NULL);
469 if(ccode) {
470 result = CURLUE_MALFORMED_INPUT;
471 goto out;
472 }
473
474 if(userp) {
475 if(flags & CURLU_DISALLOW_USER) {
476 /* Option DISALLOW_USER is set and url contains username. */
477 result = CURLUE_USER_NOT_ALLOWED;
478 goto out;
479 }
480
481 u->user = userp;
482 }
483
484 if(passwdp)
485 u->password = passwdp;
486
487 if(optionsp)
488 u->options = optionsp;
489
490 return CURLUE_OK;
491 out:
492
493 free(userp);
494 free(passwdp);
495 free(optionsp);
496
497 return result;
498 }
499
Curl_parse_port(struct Curl_URL * u,char * hostname,bool has_scheme)500 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
501 bool has_scheme)
502 {
503 char *portptr = NULL;
504 char endbracket;
505 int len;
506
507 /*
508 * Find the end of an IPv6 address, either on the ']' ending bracket or
509 * a percent-encoded zone index.
510 */
511 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
512 &endbracket, &len)) {
513 if(']' == endbracket)
514 portptr = &hostname[len];
515 else if('%' == endbracket) {
516 int zonelen = len;
517 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
518 if(']' != endbracket)
519 return CURLUE_MALFORMED_INPUT;
520 portptr = &hostname[--zonelen + len + 1];
521 }
522 else
523 return CURLUE_MALFORMED_INPUT;
524 }
525 else
526 return CURLUE_MALFORMED_INPUT;
527
528 /* this is a RFC2732-style specified IP-address */
529 if(portptr && *portptr) {
530 if(*portptr != ':')
531 return CURLUE_MALFORMED_INPUT;
532 }
533 else
534 portptr = NULL;
535 }
536 else
537 portptr = strchr(hostname, ':');
538
539 if(portptr) {
540 char *rest;
541 long port;
542 char portbuf[7];
543
544 /* Browser behavior adaptation. If there's a colon with no digits after,
545 just cut off the name there which makes us ignore the colon and just
546 use the default port. Firefox, Chrome and Safari all do that.
547
548 Don't do it if the URL has no scheme, to make something that looks like
549 a scheme not work!
550 */
551 if(!portptr[1]) {
552 *portptr = '\0';
553 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
554 }
555
556 if(!ISDIGIT(portptr[1]))
557 return CURLUE_BAD_PORT_NUMBER;
558
559 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
560
561 if((port <= 0) || (port > 0xffff))
562 /* Single unix standard says port numbers are 16 bits long, but we don't
563 treat port zero as OK. */
564 return CURLUE_BAD_PORT_NUMBER;
565
566 if(rest[0])
567 return CURLUE_BAD_PORT_NUMBER;
568
569 *portptr++ = '\0'; /* cut off the name there */
570 *rest = 0;
571 /* generate a new port number string to get rid of leading zeroes etc */
572 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
573 u->portnum = port;
574 u->port = strdup(portbuf);
575 if(!u->port)
576 return CURLUE_OUT_OF_MEMORY;
577 }
578
579 return CURLUE_OK;
580 }
581
582 /* scan for byte values < 31 or 127 */
junkscan(const char * part,unsigned int flags)583 static bool junkscan(const char *part, unsigned int flags)
584 {
585 if(part) {
586 static const char badbytes[]={
587 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
588 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
589 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
590 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
591 0x7f, 0x00 /* null-terminate */
592 };
593 size_t n = strlen(part);
594 size_t nfine = strcspn(part, badbytes);
595 if(nfine != n)
596 /* since we don't know which part is scanned, return a generic error
597 code */
598 return TRUE;
599 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
600 return TRUE;
601 }
602 return FALSE;
603 }
604
hostname_check(struct Curl_URL * u,char * hostname)605 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
606 {
607 size_t len;
608 size_t hlen = strlen(hostname);
609
610 if(hostname[0] == '[') {
611 #ifdef ENABLE_IPV6
612 char dest[16]; /* fits a binary IPv6 address */
613 #endif
614 const char *l = "0123456789abcdefABCDEF:.";
615 if(hlen < 4) /* '[::]' is the shortest possible valid string */
616 return CURLUE_MALFORMED_INPUT;
617 hostname++;
618 hlen -= 2;
619
620 if(hostname[hlen] != ']')
621 return CURLUE_MALFORMED_INPUT;
622
623 /* only valid letters are ok */
624 len = strspn(hostname, l);
625 if(hlen != len) {
626 hlen = len;
627 if(hostname[len] == '%') {
628 /* this could now be '%[zone id]' */
629 char zoneid[16];
630 int i = 0;
631 char *h = &hostname[len + 1];
632 /* pass '25' if present and is a url encoded percent sign */
633 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
634 h += 2;
635 while(*h && (*h != ']') && (i < 15))
636 zoneid[i++] = *h++;
637 if(!i || (']' != *h))
638 return CURLUE_MALFORMED_INPUT;
639 zoneid[i] = 0;
640 u->zoneid = strdup(zoneid);
641 if(!u->zoneid)
642 return CURLUE_OUT_OF_MEMORY;
643 hostname[len] = ']'; /* insert end bracket */
644 hostname[len + 1] = 0; /* terminate the hostname */
645 }
646 else
647 return CURLUE_MALFORMED_INPUT;
648 /* hostname is fine */
649 }
650 #ifdef ENABLE_IPV6
651 hostname[hlen] = 0; /* end the address there */
652 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
653 return CURLUE_MALFORMED_INPUT;
654 hostname[hlen] = ']'; /* restore ending bracket */
655 #endif
656 }
657 else {
658 /* letters from the second string is not ok */
659 len = strcspn(hostname, " ");
660 if(hlen != len)
661 /* hostname with bad content */
662 return CURLUE_MALFORMED_INPUT;
663 }
664 if(!hostname[0])
665 return CURLUE_NO_HOST;
666 return CURLUE_OK;
667 }
668
669 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
670
671 /*
672 * Handle partial IPv4 numerical addresses and different bases, like
673 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
674 *
675 * If the given input string is syntactically wrong or any part for example is
676 * too big, this function returns FALSE and doesn't create any output.
677 *
678 * Output the "normalized" version of that input string in plain quad decimal
679 * integers and return TRUE.
680 */
ipv4_normalize(const char * hostname,char * outp,size_t olen)681 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
682 {
683 bool done = FALSE;
684 int n = 0;
685 const char *c = hostname;
686 unsigned long parts[4] = {0, 0, 0, 0};
687
688 while(!done) {
689 char *endp;
690 unsigned long l;
691 if((*c < '0') || (*c > '9'))
692 /* most importantly this doesn't allow a leading plus or minus */
693 return FALSE;
694 l = strtoul(c, &endp, 0);
695
696 /* overflow or nothing parsed at all */
697 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
698 return FALSE;
699
700 #if SIZEOF_LONG > 4
701 /* a value larger than 32 bits */
702 if(l > UINT_MAX)
703 return FALSE;
704 #endif
705
706 parts[n] = l;
707 c = endp;
708
709 switch (*c) {
710 case '.' :
711 if(n == 3)
712 return FALSE;
713 n++;
714 c++;
715 break;
716
717 case '\0':
718 done = TRUE;
719 break;
720
721 default:
722 return FALSE;
723 }
724 }
725
726 /* this is deemed a valid IPv4 numerical address */
727
728 switch(n) {
729 case 0: /* a -- 32 bits */
730 msnprintf(outp, olen, "%u.%u.%u.%u",
731 parts[0] >> 24, (parts[0] >> 16) & 0xff,
732 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
733 break;
734 case 1: /* a.b -- 8.24 bits */
735 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
736 return FALSE;
737 msnprintf(outp, olen, "%u.%u.%u.%u",
738 parts[0], (parts[1] >> 16) & 0xff,
739 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
740 break;
741 case 2: /* a.b.c -- 8.8.16 bits */
742 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
743 return FALSE;
744 msnprintf(outp, olen, "%u.%u.%u.%u",
745 parts[0], parts[1], (parts[2] >> 8) & 0xff,
746 parts[2] & 0xff);
747 break;
748 case 3: /* a.b.c.d -- 8.8.8.8 bits */
749 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
750 (parts[3] > 0xff))
751 return FALSE;
752 msnprintf(outp, olen, "%u.%u.%u.%u",
753 parts[0], parts[1], parts[2], parts[3]);
754 break;
755 }
756 return TRUE;
757 }
758
seturl(const char * url,CURLU * u,unsigned int flags)759 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
760 {
761 char *path;
762 bool path_alloced = FALSE;
763 char *hostname;
764 char *query = NULL;
765 char *fragment = NULL;
766 CURLUcode result;
767 bool url_has_scheme = FALSE;
768 char schemebuf[MAX_SCHEME_LEN + 1];
769 const char *schemep = NULL;
770 size_t schemelen = 0;
771 size_t urllen;
772
773 if(!url)
774 return CURLUE_MALFORMED_INPUT;
775
776 /*************************************************************
777 * Parse the URL.
778 ************************************************************/
779 /* allocate scratch area */
780 urllen = strlen(url);
781 if(urllen > CURL_MAX_INPUT_LENGTH)
782 /* excessive input length */
783 return CURLUE_MALFORMED_INPUT;
784
785 path = u->scratch = malloc(urllen * 2 + 2);
786 if(!path)
787 return CURLUE_OUT_OF_MEMORY;
788
789 hostname = &path[urllen + 1];
790 hostname[0] = 0;
791
792 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
793 url_has_scheme = TRUE;
794 schemelen = strlen(schemebuf);
795 }
796
797 /* handle the file: scheme */
798 if(url_has_scheme && strcasecompare(schemebuf, "file")) {
799 /* path has been allocated large enough to hold this */
800 strcpy(path, &url[5]);
801
802 hostname = NULL; /* no host for file: URLs */
803 u->scheme = strdup("file");
804 if(!u->scheme)
805 return CURLUE_OUT_OF_MEMORY;
806
807 /* Extra handling URLs with an authority component (i.e. that start with
808 * "file://")
809 *
810 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
811 * RFC 8089, but not the (current) WHAT-WG URL spec.
812 */
813 if(path[0] == '/' && path[1] == '/') {
814 /* swallow the two slashes */
815 char *ptr = &path[2];
816
817 /*
818 * According to RFC 8089, a file: URL can be reliably dereferenced if:
819 *
820 * o it has no/blank hostname, or
821 *
822 * o the hostname matches "localhost" (case-insensitively), or
823 *
824 * o the hostname is a FQDN that resolves to this machine.
825 *
826 * For brevity, we only consider URLs with empty, "localhost", or
827 * "127.0.0.1" hostnames as local.
828 *
829 * Additionally, there is an exception for URLs with a Windows drive
830 * letter in the authority (which was accidentally omitted from RFC 8089
831 * Appendix E, but believe me, it was meant to be there. --MK)
832 */
833 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
834 /* the URL includes a host name, it must match "localhost" or
835 "127.0.0.1" to be valid */
836 if(!checkprefix("localhost/", ptr) &&
837 !checkprefix("127.0.0.1/", ptr)) {
838 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
839 none */
840 return CURLUE_MALFORMED_INPUT;
841 }
842 ptr += 9; /* now points to the slash after the host */
843 }
844
845 path = ptr;
846 }
847
848 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
849 /* Don't allow Windows drive letters when not in Windows.
850 * This catches both "file:/c:" and "file:c:" */
851 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
852 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
853 /* File drive letters are only accepted in MSDOS/Windows */
854 return CURLUE_MALFORMED_INPUT;
855 }
856 #else
857 /* If the path starts with a slash and a drive letter, ditch the slash */
858 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
859 /* This cannot be done with strcpy, as the memory chunks overlap! */
860 memmove(path, &path[1], strlen(&path[1]) + 1);
861 }
862 #endif
863
864 }
865 else {
866 /* clear path */
867 const char *p;
868 const char *hostp;
869 size_t len;
870 path[0] = 0;
871
872 if(url_has_scheme) {
873 int i = 0;
874 p = &url[schemelen + 1];
875 while(p && (*p == '/') && (i < 4)) {
876 p++;
877 i++;
878 }
879 if((i < 1) || (i>3))
880 /* less than one or more than three slashes */
881 return CURLUE_MALFORMED_INPUT;
882
883 schemep = schemebuf;
884 if(!Curl_builtin_scheme(schemep) &&
885 !(flags & CURLU_NON_SUPPORT_SCHEME))
886 return CURLUE_UNSUPPORTED_SCHEME;
887
888 if(junkscan(schemep, flags))
889 return CURLUE_MALFORMED_INPUT;
890 }
891 else {
892 /* no scheme! */
893
894 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
895 return CURLUE_MALFORMED_INPUT;
896 if(flags & CURLU_DEFAULT_SCHEME)
897 schemep = DEFAULT_SCHEME;
898
899 /*
900 * The URL was badly formatted, let's try without scheme specified.
901 */
902 p = url;
903 }
904 hostp = p; /* host name starts here */
905
906 while(*p && !HOSTNAME_END(*p)) /* find end of host name */
907 p++;
908
909 len = p - hostp;
910 if(len) {
911 memcpy(hostname, hostp, len);
912 hostname[len] = 0;
913 }
914 else {
915 if(!(flags & CURLU_NO_AUTHORITY))
916 return CURLUE_MALFORMED_INPUT;
917 }
918
919 len = strlen(p);
920 memcpy(path, p, len);
921 path[len] = 0;
922
923 if(schemep) {
924 u->scheme = strdup(schemep);
925 if(!u->scheme)
926 return CURLUE_OUT_OF_MEMORY;
927 }
928 }
929
930 if(junkscan(path, flags))
931 return CURLUE_MALFORMED_INPUT;
932
933 if((flags & CURLU_URLENCODE) && path[0]) {
934 /* worst case output length is 3x the original! */
935 char *newp = malloc(strlen(path) * 3);
936 if(!newp)
937 return CURLUE_OUT_OF_MEMORY;
938 path_alloced = TRUE;
939 strcpy_url(newp, path, TRUE); /* consider it relative */
940 u->temppath = path = newp;
941 }
942
943 fragment = strchr(path, '#');
944 if(fragment) {
945 *fragment++ = 0;
946 if(fragment[0]) {
947 u->fragment = strdup(fragment);
948 if(!u->fragment)
949 return CURLUE_OUT_OF_MEMORY;
950 }
951 }
952
953 query = strchr(path, '?');
954 if(query) {
955 *query++ = 0;
956 /* done even if the query part is a blank string */
957 u->query = strdup(query);
958 if(!u->query)
959 return CURLUE_OUT_OF_MEMORY;
960 }
961
962 if(!path[0])
963 /* if there's no path left set, unset */
964 path = NULL;
965 else {
966 if(!(flags & CURLU_PATH_AS_IS)) {
967 /* remove ../ and ./ sequences according to RFC3986 */
968 char *newp = Curl_dedotdotify(path);
969 if(!newp)
970 return CURLUE_OUT_OF_MEMORY;
971
972 if(strcmp(newp, path)) {
973 /* if we got a new version */
974 if(path_alloced)
975 Curl_safefree(u->temppath);
976 u->temppath = path = newp;
977 path_alloced = TRUE;
978 }
979 else
980 free(newp);
981 }
982
983 u->path = path_alloced?path:strdup(path);
984 if(!u->path)
985 return CURLUE_OUT_OF_MEMORY;
986 u->temppath = NULL; /* used now */
987 }
988
989 if(hostname) {
990 char normalized_ipv4[sizeof("255.255.255.255") + 1];
991 /*
992 * Parse the login details and strip them out of the host name.
993 */
994 if(junkscan(hostname, flags))
995 return CURLUE_MALFORMED_INPUT;
996
997 result = parse_hostname_login(u, &hostname, flags);
998 if(result)
999 return result;
1000
1001 result = Curl_parse_port(u, hostname, url_has_scheme);
1002 if(result)
1003 return result;
1004
1005 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1006 /* Skip hostname check, it's allowed to be empty. */
1007 }
1008 else {
1009 result = hostname_check(u, hostname);
1010 if(result)
1011 return result;
1012 }
1013
1014 if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1015 u->host = strdup(normalized_ipv4);
1016 else
1017 u->host = strdup(hostname);
1018 if(!u->host)
1019 return CURLUE_OUT_OF_MEMORY;
1020
1021 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1022 /* legacy curl-style guess based on host name */
1023 if(checkprefix("ftp.", hostname))
1024 schemep = "ftp";
1025 else if(checkprefix("dict.", hostname))
1026 schemep = "dict";
1027 else if(checkprefix("ldap.", hostname))
1028 schemep = "ldap";
1029 else if(checkprefix("imap.", hostname))
1030 schemep = "imap";
1031 else if(checkprefix("smtp.", hostname))
1032 schemep = "smtp";
1033 else if(checkprefix("pop3.", hostname))
1034 schemep = "pop3";
1035 else
1036 schemep = "http";
1037
1038 u->scheme = strdup(schemep);
1039 if(!u->scheme)
1040 return CURLUE_OUT_OF_MEMORY;
1041 }
1042 }
1043
1044 Curl_safefree(u->scratch);
1045 Curl_safefree(u->temppath);
1046
1047 return CURLUE_OK;
1048 }
1049
1050 /*
1051 * Parse the URL and set the relevant members of the Curl_URL struct.
1052 */
parseurl(const char * url,CURLU * u,unsigned int flags)1053 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1054 {
1055 CURLUcode result = seturl(url, u, flags);
1056 if(result) {
1057 free_urlhandle(u);
1058 memset(u, 0, sizeof(struct Curl_URL));
1059 }
1060 return result;
1061 }
1062
1063 /*
1064 */
curl_url(void)1065 CURLU *curl_url(void)
1066 {
1067 return calloc(sizeof(struct Curl_URL), 1);
1068 }
1069
curl_url_cleanup(CURLU * u)1070 void curl_url_cleanup(CURLU *u)
1071 {
1072 if(u) {
1073 free_urlhandle(u);
1074 free(u);
1075 }
1076 }
1077
1078 #define DUP(dest, src, name) \
1079 do { \
1080 if(src->name) { \
1081 dest->name = strdup(src->name); \
1082 if(!dest->name) \
1083 goto fail; \
1084 } \
1085 } while(0)
1086
curl_url_dup(CURLU * in)1087 CURLU *curl_url_dup(CURLU *in)
1088 {
1089 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1090 if(u) {
1091 DUP(u, in, scheme);
1092 DUP(u, in, user);
1093 DUP(u, in, password);
1094 DUP(u, in, options);
1095 DUP(u, in, host);
1096 DUP(u, in, port);
1097 DUP(u, in, path);
1098 DUP(u, in, query);
1099 DUP(u, in, fragment);
1100 u->portnum = in->portnum;
1101 }
1102 return u;
1103 fail:
1104 curl_url_cleanup(u);
1105 return NULL;
1106 }
1107
curl_url_get(CURLU * u,CURLUPart what,char ** part,unsigned int flags)1108 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1109 char **part, unsigned int flags)
1110 {
1111 char *ptr;
1112 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1113 char portbuf[7];
1114 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1115 bool plusdecode = FALSE;
1116 (void)flags;
1117 if(!u)
1118 return CURLUE_BAD_HANDLE;
1119 if(!part)
1120 return CURLUE_BAD_PARTPOINTER;
1121 *part = NULL;
1122
1123 switch(what) {
1124 case CURLUPART_SCHEME:
1125 ptr = u->scheme;
1126 ifmissing = CURLUE_NO_SCHEME;
1127 urldecode = FALSE; /* never for schemes */
1128 break;
1129 case CURLUPART_USER:
1130 ptr = u->user;
1131 ifmissing = CURLUE_NO_USER;
1132 break;
1133 case CURLUPART_PASSWORD:
1134 ptr = u->password;
1135 ifmissing = CURLUE_NO_PASSWORD;
1136 break;
1137 case CURLUPART_OPTIONS:
1138 ptr = u->options;
1139 ifmissing = CURLUE_NO_OPTIONS;
1140 break;
1141 case CURLUPART_HOST:
1142 ptr = u->host;
1143 ifmissing = CURLUE_NO_HOST;
1144 break;
1145 case CURLUPART_ZONEID:
1146 ptr = u->zoneid;
1147 break;
1148 case CURLUPART_PORT:
1149 ptr = u->port;
1150 ifmissing = CURLUE_NO_PORT;
1151 urldecode = FALSE; /* never for port */
1152 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1153 /* there's no stored port number, but asked to deliver
1154 a default one for the scheme */
1155 const struct Curl_handler *h =
1156 Curl_builtin_scheme(u->scheme);
1157 if(h) {
1158 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1159 ptr = portbuf;
1160 }
1161 }
1162 else if(ptr && u->scheme) {
1163 /* there is a stored port number, but ask to inhibit if
1164 it matches the default one for the scheme */
1165 const struct Curl_handler *h =
1166 Curl_builtin_scheme(u->scheme);
1167 if(h && (h->defport == u->portnum) &&
1168 (flags & CURLU_NO_DEFAULT_PORT))
1169 ptr = NULL;
1170 }
1171 break;
1172 case CURLUPART_PATH:
1173 ptr = u->path;
1174 if(!ptr) {
1175 ptr = u->path = strdup("/");
1176 if(!u->path)
1177 return CURLUE_OUT_OF_MEMORY;
1178 }
1179 break;
1180 case CURLUPART_QUERY:
1181 ptr = u->query;
1182 ifmissing = CURLUE_NO_QUERY;
1183 plusdecode = urldecode;
1184 break;
1185 case CURLUPART_FRAGMENT:
1186 ptr = u->fragment;
1187 ifmissing = CURLUE_NO_FRAGMENT;
1188 break;
1189 case CURLUPART_URL: {
1190 char *url;
1191 char *scheme;
1192 char *options = u->options;
1193 char *port = u->port;
1194 char *allochost = NULL;
1195 if(u->scheme && strcasecompare("file", u->scheme)) {
1196 url = aprintf("file://%s%s%s",
1197 u->path,
1198 u->fragment? "#": "",
1199 u->fragment? u->fragment : "");
1200 }
1201 else if(!u->host)
1202 return CURLUE_NO_HOST;
1203 else {
1204 const struct Curl_handler *h = NULL;
1205 if(u->scheme)
1206 scheme = u->scheme;
1207 else if(flags & CURLU_DEFAULT_SCHEME)
1208 scheme = (char *) DEFAULT_SCHEME;
1209 else
1210 return CURLUE_NO_SCHEME;
1211
1212 h = Curl_builtin_scheme(scheme);
1213 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1214 /* there's no stored port number, but asked to deliver
1215 a default one for the scheme */
1216 if(h) {
1217 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1218 port = portbuf;
1219 }
1220 }
1221 else if(port) {
1222 /* there is a stored port number, but asked to inhibit if it matches
1223 the default one for the scheme */
1224 if(h && (h->defport == u->portnum) &&
1225 (flags & CURLU_NO_DEFAULT_PORT))
1226 port = NULL;
1227 }
1228
1229 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1230 options = NULL;
1231
1232 if((u->host[0] == '[') && u->zoneid) {
1233 /* make it '[ host %25 zoneid ]' */
1234 size_t hostlen = strlen(u->host);
1235 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1236 allochost = malloc(alen);
1237 if(!allochost)
1238 return CURLUE_OUT_OF_MEMORY;
1239 memcpy(allochost, u->host, hostlen - 1);
1240 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1241 "%%25%s]", u->zoneid);
1242 }
1243
1244 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1245 scheme,
1246 u->user ? u->user : "",
1247 u->password ? ":": "",
1248 u->password ? u->password : "",
1249 options ? ";" : "",
1250 options ? options : "",
1251 (u->user || u->password || options) ? "@": "",
1252 allochost ? allochost : u->host,
1253 port ? ":": "",
1254 port ? port : "",
1255 (u->path && (u->path[0] != '/')) ? "/": "",
1256 u->path ? u->path : "/",
1257 (u->query && u->query[0]) ? "?": "",
1258 (u->query && u->query[0]) ? u->query : "",
1259 u->fragment? "#": "",
1260 u->fragment? u->fragment : "");
1261 free(allochost);
1262 }
1263 if(!url)
1264 return CURLUE_OUT_OF_MEMORY;
1265 *part = url;
1266 return CURLUE_OK;
1267 }
1268 default:
1269 ptr = NULL;
1270 break;
1271 }
1272 if(ptr) {
1273 *part = strdup(ptr);
1274 if(!*part)
1275 return CURLUE_OUT_OF_MEMORY;
1276 if(plusdecode) {
1277 /* convert + to space */
1278 char *plus;
1279 for(plus = *part; *plus; ++plus) {
1280 if(*plus == '+')
1281 *plus = ' ';
1282 }
1283 }
1284 if(urldecode) {
1285 char *decoded;
1286 size_t dlen;
1287 /* this unconditional rejection of control bytes is documented
1288 API behavior */
1289 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen,
1290 REJECT_CTRL);
1291 free(*part);
1292 if(res) {
1293 *part = NULL;
1294 return CURLUE_URLDECODE;
1295 }
1296 *part = decoded;
1297 }
1298 return CURLUE_OK;
1299 }
1300 else
1301 return ifmissing;
1302 }
1303
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1304 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1305 const char *part, unsigned int flags)
1306 {
1307 char **storep = NULL;
1308 long port = 0;
1309 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1310 bool plusencode = FALSE;
1311 bool urlskipslash = FALSE;
1312 bool appendquery = FALSE;
1313 bool equalsencode = FALSE;
1314
1315 if(!u)
1316 return CURLUE_BAD_HANDLE;
1317 if(!part) {
1318 /* setting a part to NULL clears it */
1319 switch(what) {
1320 case CURLUPART_URL:
1321 break;
1322 case CURLUPART_SCHEME:
1323 storep = &u->scheme;
1324 break;
1325 case CURLUPART_USER:
1326 storep = &u->user;
1327 break;
1328 case CURLUPART_PASSWORD:
1329 storep = &u->password;
1330 break;
1331 case CURLUPART_OPTIONS:
1332 storep = &u->options;
1333 break;
1334 case CURLUPART_HOST:
1335 storep = &u->host;
1336 break;
1337 case CURLUPART_ZONEID:
1338 storep = &u->zoneid;
1339 break;
1340 case CURLUPART_PORT:
1341 u->portnum = 0;
1342 storep = &u->port;
1343 break;
1344 case CURLUPART_PATH:
1345 storep = &u->path;
1346 break;
1347 case CURLUPART_QUERY:
1348 storep = &u->query;
1349 break;
1350 case CURLUPART_FRAGMENT:
1351 storep = &u->fragment;
1352 break;
1353 default:
1354 return CURLUE_UNKNOWN_PART;
1355 }
1356 if(storep && *storep) {
1357 Curl_safefree(*storep);
1358 }
1359 return CURLUE_OK;
1360 }
1361
1362 switch(what) {
1363 case CURLUPART_SCHEME:
1364 if(strlen(part) > MAX_SCHEME_LEN)
1365 /* too long */
1366 return CURLUE_MALFORMED_INPUT;
1367 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1368 /* verify that it is a fine scheme */
1369 !Curl_builtin_scheme(part))
1370 return CURLUE_UNSUPPORTED_SCHEME;
1371 storep = &u->scheme;
1372 urlencode = FALSE; /* never */
1373 break;
1374 case CURLUPART_USER:
1375 storep = &u->user;
1376 break;
1377 case CURLUPART_PASSWORD:
1378 storep = &u->password;
1379 break;
1380 case CURLUPART_OPTIONS:
1381 storep = &u->options;
1382 break;
1383 case CURLUPART_HOST:
1384 storep = &u->host;
1385 Curl_safefree(u->zoneid);
1386 break;
1387 case CURLUPART_ZONEID:
1388 storep = &u->zoneid;
1389 break;
1390 case CURLUPART_PORT:
1391 {
1392 char *endp;
1393 urlencode = FALSE; /* never */
1394 port = strtol(part, &endp, 10); /* Port number must be decimal */
1395 if((port <= 0) || (port > 0xffff))
1396 return CURLUE_BAD_PORT_NUMBER;
1397 if(*endp)
1398 /* weirdly provided number, not good! */
1399 return CURLUE_MALFORMED_INPUT;
1400 storep = &u->port;
1401 }
1402 break;
1403 case CURLUPART_PATH:
1404 urlskipslash = TRUE;
1405 storep = &u->path;
1406 break;
1407 case CURLUPART_QUERY:
1408 plusencode = urlencode;
1409 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1410 equalsencode = appendquery;
1411 storep = &u->query;
1412 break;
1413 case CURLUPART_FRAGMENT:
1414 storep = &u->fragment;
1415 break;
1416 case CURLUPART_URL: {
1417 /*
1418 * Allow a new URL to replace the existing (if any) contents.
1419 *
1420 * If the existing contents is enough for a URL, allow a relative URL to
1421 * replace it.
1422 */
1423 CURLUcode result;
1424 char *oldurl;
1425 char *redired_url;
1426 CURLU *handle2;
1427
1428 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) {
1429 handle2 = curl_url();
1430 if(!handle2)
1431 return CURLUE_OUT_OF_MEMORY;
1432 result = parseurl(part, handle2, flags);
1433 if(!result)
1434 mv_urlhandle(handle2, u);
1435 else
1436 curl_url_cleanup(handle2);
1437 return result;
1438 }
1439 /* extract the full "old" URL to do the redirect on */
1440 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1441 if(result) {
1442 /* couldn't get the old URL, just use the new! */
1443 handle2 = curl_url();
1444 if(!handle2)
1445 return CURLUE_OUT_OF_MEMORY;
1446 result = parseurl(part, handle2, flags);
1447 if(!result)
1448 mv_urlhandle(handle2, u);
1449 else
1450 curl_url_cleanup(handle2);
1451 return result;
1452 }
1453
1454 /* apply the relative part to create a new URL */
1455 redired_url = concat_url(oldurl, part);
1456 free(oldurl);
1457 if(!redired_url)
1458 return CURLUE_OUT_OF_MEMORY;
1459
1460 /* now parse the new URL */
1461 handle2 = curl_url();
1462 if(!handle2) {
1463 free(redired_url);
1464 return CURLUE_OUT_OF_MEMORY;
1465 }
1466 result = parseurl(redired_url, handle2, flags);
1467 free(redired_url);
1468 if(!result)
1469 mv_urlhandle(handle2, u);
1470 else
1471 curl_url_cleanup(handle2);
1472 return result;
1473 }
1474 default:
1475 return CURLUE_UNKNOWN_PART;
1476 }
1477 DEBUGASSERT(storep);
1478 {
1479 const char *newp = part;
1480 size_t nalloc = strlen(part);
1481
1482 if(nalloc > CURL_MAX_INPUT_LENGTH)
1483 /* excessive input length */
1484 return CURLUE_MALFORMED_INPUT;
1485
1486 if(urlencode) {
1487 const unsigned char *i;
1488 char *o;
1489 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1490 if(!enc)
1491 return CURLUE_OUT_OF_MEMORY;
1492 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1493 if((*i == ' ') && plusencode) {
1494 *o = '+';
1495 o++;
1496 }
1497 else if(Curl_isunreserved(*i) ||
1498 ((*i == '/') && urlskipslash) ||
1499 ((*i == '=') && equalsencode)) {
1500 if((*i == '=') && equalsencode)
1501 /* only skip the first equals sign */
1502 equalsencode = FALSE;
1503 *o = *i;
1504 o++;
1505 }
1506 else {
1507 msnprintf(o, 4, "%%%02x", *i);
1508 o += 3;
1509 }
1510 }
1511 *o = 0; /* null-terminate */
1512 newp = enc;
1513 }
1514 else {
1515 char *p;
1516 newp = strdup(part);
1517 if(!newp)
1518 return CURLUE_OUT_OF_MEMORY;
1519 p = (char *)newp;
1520 while(*p) {
1521 /* make sure percent encoded are lower case */
1522 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1523 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1524 p[1] = (char)TOLOWER(p[1]);
1525 p[2] = (char)TOLOWER(p[2]);
1526 p += 3;
1527 }
1528 else
1529 p++;
1530 }
1531 }
1532
1533 if(appendquery) {
1534 /* Append the string onto the old query. Add a '&' separator if none is
1535 present at the end of the exsting query already */
1536 size_t querylen = u->query ? strlen(u->query) : 0;
1537 bool addamperand = querylen && (u->query[querylen -1] != '&');
1538 if(querylen) {
1539 size_t newplen = strlen(newp);
1540 char *p = malloc(querylen + addamperand + newplen + 1);
1541 if(!p) {
1542 free((char *)newp);
1543 return CURLUE_OUT_OF_MEMORY;
1544 }
1545 strcpy(p, u->query); /* original query */
1546 if(addamperand)
1547 p[querylen] = '&'; /* ampersand */
1548 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1549 free((char *)newp);
1550 free(*storep);
1551 *storep = p;
1552 return CURLUE_OK;
1553 }
1554 }
1555
1556 if(what == CURLUPART_HOST) {
1557 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1558 /* Skip hostname check, it's allowed to be empty. */
1559 }
1560 else {
1561 if(hostname_check(u, (char *)newp)) {
1562 free((char *)newp);
1563 return CURLUE_MALFORMED_INPUT;
1564 }
1565 }
1566 }
1567
1568 free(*storep);
1569 *storep = (char *)newp;
1570 }
1571 /* set after the string, to make it not assigned if the allocation above
1572 fails */
1573 if(port)
1574 u->portnum = port;
1575 return CURLUE_OK;
1576 }
1577