1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24 #include "tool_setup.h"
25
26 #define ENABLE_CURLX_PRINTF
27 /* use our own printf() functions */
28 #include "curlx.h"
29 #include "tool_cfgable.h"
30 #include "tool_doswin.h"
31 #include "tool_urlglob.h"
32 #include "tool_vms.h"
33 #include "dynbuf.h"
34
35 #include "memdebug.h" /* keep this as LAST include */
36
37 #define GLOBERROR(string, column, code) \
38 glob->error = string, glob->pos = column, code
39
glob_fixed(struct URLGlob * glob,char * fixed,size_t len)40 static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len)
41 {
42 struct URLPattern *pat = &glob->pattern[glob->size];
43 pat->type = UPTSet;
44 pat->content.Set.size = 1;
45 pat->content.Set.ptr_s = 0;
46 pat->globindex = -1;
47
48 pat->content.Set.elements = malloc(sizeof(char *));
49
50 if(!pat->content.Set.elements)
51 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
52
53 pat->content.Set.elements[0] = malloc(len + 1);
54 if(!pat->content.Set.elements[0])
55 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
56
57 memcpy(pat->content.Set.elements[0], fixed, len);
58 pat->content.Set.elements[0][len] = 0;
59
60 return CURLE_OK;
61 }
62
63 /* multiply
64 *
65 * Multiplies and checks for overflow.
66 */
multiply(curl_off_t * amount,curl_off_t with)67 static int multiply(curl_off_t *amount, curl_off_t with)
68 {
69 curl_off_t sum;
70 DEBUGASSERT(*amount >= 0);
71 DEBUGASSERT(with >= 0);
72 if((with <= 0) || (*amount <= 0)) {
73 sum = 0;
74 }
75 else {
76 #if defined(__GNUC__) && \
77 ((__GNUC__ > 5) || ((__GNUC__ == 5) && (__GNUC_MINOR__ >= 1)))
78 if(__builtin_mul_overflow(*amount, with, &sum))
79 return 1;
80 #else
81 sum = *amount * with;
82 if(sum/with != *amount)
83 return 1; /* didn't fit, bail out */
84 #endif
85 }
86 *amount = sum;
87 return 0;
88 }
89
glob_set(struct URLGlob * glob,char ** patternp,size_t * posp,curl_off_t * amount,int globindex)90 static CURLcode glob_set(struct URLGlob *glob, char **patternp,
91 size_t *posp, curl_off_t *amount,
92 int globindex)
93 {
94 /* processes a set expression with the point behind the opening '{'
95 ','-separated elements are collected until the next closing '}'
96 */
97 struct URLPattern *pat;
98 bool done = FALSE;
99 char *buf = glob->glob_buffer;
100 char *pattern = *patternp;
101 char *opattern = pattern;
102 size_t opos = *posp-1;
103
104 pat = &glob->pattern[glob->size];
105 /* patterns 0,1,2,... correspond to size=1,3,5,... */
106 pat->type = UPTSet;
107 pat->content.Set.size = 0;
108 pat->content.Set.ptr_s = 0;
109 pat->content.Set.elements = NULL;
110 pat->globindex = globindex;
111
112 while(!done) {
113 switch(*pattern) {
114 case '\0': /* URL ended while set was still open */
115 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
116
117 case '{':
118 case '[': /* no nested expressions at this time */
119 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
120
121 case '}': /* set element completed */
122 if(opattern == pattern)
123 return GLOBERROR("empty string within braces", *posp,
124 CURLE_URL_MALFORMAT);
125
126 /* add 1 to size since it'll be incremented below */
127 if(multiply(amount, pat->content.Set.size + 1))
128 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
129
130 FALLTHROUGH();
131 case ',':
132
133 *buf = '\0';
134 if(pat->content.Set.elements) {
135 char **new_arr = realloc(pat->content.Set.elements,
136 (size_t)(pat->content.Set.size + 1) *
137 sizeof(char *));
138 if(!new_arr)
139 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
140
141 pat->content.Set.elements = new_arr;
142 }
143 else
144 pat->content.Set.elements = malloc(sizeof(char *));
145
146 if(!pat->content.Set.elements)
147 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
148
149 pat->content.Set.elements[pat->content.Set.size] =
150 strdup(glob->glob_buffer);
151 if(!pat->content.Set.elements[pat->content.Set.size])
152 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
153 ++pat->content.Set.size;
154
155 if(*pattern == '}') {
156 pattern++; /* pass the closing brace */
157 done = TRUE;
158 continue;
159 }
160
161 buf = glob->glob_buffer;
162 ++pattern;
163 ++(*posp);
164 break;
165
166 case ']': /* illegal closing bracket */
167 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
168
169 case '\\': /* escaped character, skip '\' */
170 if(pattern[1]) {
171 ++pattern;
172 ++(*posp);
173 }
174 FALLTHROUGH();
175 default:
176 *buf++ = *pattern++; /* copy character to set element */
177 ++(*posp);
178 }
179 }
180
181 *patternp = pattern; /* return with the new position */
182 return CURLE_OK;
183 }
184
glob_range(struct URLGlob * glob,char ** patternp,size_t * posp,curl_off_t * amount,int globindex)185 static CURLcode glob_range(struct URLGlob *glob, char **patternp,
186 size_t *posp, curl_off_t *amount,
187 int globindex)
188 {
189 /* processes a range expression with the point behind the opening '['
190 - char range: e.g. "a-z]", "B-Q]"
191 - num range: e.g. "0-9]", "17-2000]"
192 - num range with leading zeros: e.g. "001-999]"
193 expression is checked for well-formedness and collected until the next ']'
194 */
195 struct URLPattern *pat;
196 int rc;
197 char *pattern = *patternp;
198 char *c;
199
200 pat = &glob->pattern[glob->size];
201 pat->globindex = globindex;
202
203 if(ISALPHA(*pattern)) {
204 /* character range detected */
205 char min_c;
206 char max_c;
207 char end_c;
208 unsigned long step = 1;
209
210 pat->type = UPTCharRange;
211
212 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
213
214 if(rc == 3) {
215 if(end_c == ':') {
216 char *endp;
217 errno = 0;
218 step = strtoul(&pattern[4], &endp, 10);
219 if(errno || &pattern[4] == endp || *endp != ']')
220 step = 0;
221 else
222 pattern = endp + 1;
223 }
224 else if(end_c != ']')
225 /* then this is wrong */
226 rc = 0;
227 else
228 /* end_c == ']' */
229 pattern += 4;
230 }
231
232 *posp += (pattern - *patternp);
233
234 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
235 (min_c == max_c && step != 1) ||
236 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
237 (max_c - min_c) > ('z' - 'a'))))
238 /* the pattern is not well-formed */
239 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
240
241 /* if there was a ":[num]" thing, use that as step or else use 1 */
242 pat->content.CharRange.step = (int)step;
243 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
244 pat->content.CharRange.max_c = max_c;
245
246 if(multiply(amount, ((pat->content.CharRange.max_c -
247 pat->content.CharRange.min_c) /
248 pat->content.CharRange.step + 1)))
249 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
250 }
251 else if(ISDIGIT(*pattern)) {
252 /* numeric range detected */
253 unsigned long min_n;
254 unsigned long max_n = 0;
255 unsigned long step_n = 0;
256 char *endp;
257
258 pat->type = UPTNumRange;
259 pat->content.NumRange.padlength = 0;
260
261 if(*pattern == '0') {
262 /* leading zero specified, count them! */
263 c = pattern;
264 while(ISDIGIT(*c)) {
265 c++;
266 ++pat->content.NumRange.padlength; /* padding length is set for all
267 instances of this pattern */
268 }
269 }
270
271 errno = 0;
272 min_n = strtoul(pattern, &endp, 10);
273 if(errno || (endp == pattern))
274 endp = NULL;
275 else {
276 if(*endp != '-')
277 endp = NULL;
278 else {
279 pattern = endp + 1;
280 while(*pattern && ISBLANK(*pattern))
281 pattern++;
282 if(!ISDIGIT(*pattern)) {
283 endp = NULL;
284 goto fail;
285 }
286 errno = 0;
287 max_n = strtoul(pattern, &endp, 10);
288 if(errno)
289 /* overflow */
290 endp = NULL;
291 else if(*endp == ':') {
292 pattern = endp + 1;
293 errno = 0;
294 step_n = strtoul(pattern, &endp, 10);
295 if(errno)
296 /* over/underflow situation */
297 endp = NULL;
298 }
299 else
300 step_n = 1;
301 if(endp && (*endp == ']')) {
302 pattern = endp + 1;
303 }
304 else
305 endp = NULL;
306 }
307 }
308
309 fail:
310 *posp += (pattern - *patternp);
311
312 if(!endp || !step_n ||
313 (min_n == max_n && step_n != 1) ||
314 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
315 /* the pattern is not well-formed */
316 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
317
318 /* typecasting to ints are fine here since we make sure above that we
319 are within 31 bits */
320 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
321 pat->content.NumRange.max_n = max_n;
322 pat->content.NumRange.step = step_n;
323
324 if(multiply(amount, ((pat->content.NumRange.max_n -
325 pat->content.NumRange.min_n) /
326 pat->content.NumRange.step + 1)))
327 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
328 }
329 else
330 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
331
332 *patternp = pattern;
333 return CURLE_OK;
334 }
335
336 #define MAX_IP6LEN 128
337
peek_ipv6(const char * str,size_t * skip)338 static bool peek_ipv6(const char *str, size_t *skip)
339 {
340 /*
341 * Scan for a potential IPv6 literal.
342 * - Valid globs contain a hyphen and <= 1 colon.
343 * - IPv6 literals contain no hyphens and >= 2 colons.
344 */
345 char hostname[MAX_IP6LEN];
346 CURLU *u;
347 char *endbr = strchr(str, ']');
348 size_t hlen;
349 CURLUcode rc;
350 if(!endbr)
351 return FALSE;
352
353 hlen = endbr - str + 1;
354 if(hlen >= MAX_IP6LEN)
355 return FALSE;
356
357 u = curl_url();
358 if(!u)
359 return FALSE;
360
361 memcpy(hostname, str, hlen);
362 hostname[hlen] = 0;
363
364 /* ask to "guess scheme" as then it works without an https:// prefix */
365 rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME);
366
367 curl_url_cleanup(u);
368 if(!rc)
369 *skip = hlen;
370 return rc ? FALSE : TRUE;
371 }
372
glob_parse(struct URLGlob * glob,char * pattern,size_t pos,curl_off_t * amount)373 static CURLcode glob_parse(struct URLGlob *glob, char *pattern,
374 size_t pos, curl_off_t *amount)
375 {
376 /* processes a literal string component of a URL
377 special characters '{' and '[' branch to set/range processing functions
378 */
379 CURLcode res = CURLE_OK;
380 int globindex = 0; /* count "actual" globs */
381
382 *amount = 1;
383
384 while(*pattern && !res) {
385 char *buf = glob->glob_buffer;
386 size_t sublen = 0;
387 while(*pattern && *pattern != '{') {
388 if(*pattern == '[') {
389 /* skip over IPv6 literals and [] */
390 size_t skip = 0;
391 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
392 skip = 2;
393 if(skip) {
394 memcpy(buf, pattern, skip);
395 buf += skip;
396 pattern += skip;
397 sublen += skip;
398 continue;
399 }
400 break;
401 }
402 if(*pattern == '}' || *pattern == ']')
403 return GLOBERROR("unmatched close brace/bracket", pos,
404 CURLE_URL_MALFORMAT);
405
406 /* only allow \ to escape known "special letters" */
407 if(*pattern == '\\' &&
408 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
409 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
410
411 /* escape character, skip '\' */
412 ++pattern;
413 ++pos;
414 }
415 *buf++ = *pattern++; /* copy character to literal */
416 ++pos;
417 sublen++;
418 }
419 if(sublen) {
420 /* we got a literal string, add it as a single-item list */
421 *buf = '\0';
422 res = glob_fixed(glob, glob->glob_buffer, sublen);
423 }
424 else {
425 switch(*pattern) {
426 case '\0': /* done */
427 break;
428
429 case '{':
430 /* process set pattern */
431 pattern++;
432 pos++;
433 res = glob_set(glob, &pattern, &pos, amount, globindex++);
434 break;
435
436 case '[':
437 /* process range pattern */
438 pattern++;
439 pos++;
440 res = glob_range(glob, &pattern, &pos, amount, globindex++);
441 break;
442 }
443 }
444
445 if(++glob->size >= GLOB_PATTERN_NUM)
446 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
447 }
448 return res;
449 }
450
glob_url(struct URLGlob ** glob,char * url,curl_off_t * urlnum,FILE * error)451 CURLcode glob_url(struct URLGlob **glob, char *url, curl_off_t *urlnum,
452 FILE *error)
453 {
454 /*
455 * We can deal with any-size, just make a buffer with the same length
456 * as the specified URL!
457 */
458 struct URLGlob *glob_expand;
459 curl_off_t amount = 0;
460 char *glob_buffer;
461 CURLcode res;
462
463 *glob = NULL;
464
465 glob_buffer = malloc(strlen(url) + 1);
466 if(!glob_buffer)
467 return CURLE_OUT_OF_MEMORY;
468 glob_buffer[0] = 0;
469
470 glob_expand = calloc(1, sizeof(struct URLGlob));
471 if(!glob_expand) {
472 Curl_safefree(glob_buffer);
473 return CURLE_OUT_OF_MEMORY;
474 }
475 glob_expand->urllen = strlen(url);
476 glob_expand->glob_buffer = glob_buffer;
477
478 res = glob_parse(glob_expand, url, 1, &amount);
479 if(!res)
480 *urlnum = amount;
481 else {
482 if(error && glob_expand->error) {
483 char text[512];
484 const char *t;
485 if(glob_expand->pos) {
486 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
487 glob_expand->error,
488 glob_expand->pos, url, (int)glob_expand->pos - 1, " ");
489 t = text;
490 }
491 else
492 t = glob_expand->error;
493
494 /* send error description to the error-stream */
495 fprintf(error, "curl: (%d) %s\n", res, t);
496 }
497 /* it failed, we cleanup */
498 glob_cleanup(glob_expand);
499 *urlnum = 1;
500 return res;
501 }
502
503 *glob = glob_expand;
504 return CURLE_OK;
505 }
506
glob_cleanup(struct URLGlob * glob)507 void glob_cleanup(struct URLGlob *glob)
508 {
509 size_t i;
510 curl_off_t elem;
511
512 if(!glob)
513 return;
514
515 for(i = 0; i < glob->size; i++) {
516 if((glob->pattern[i].type == UPTSet) &&
517 (glob->pattern[i].content.Set.elements)) {
518 for(elem = glob->pattern[i].content.Set.size - 1;
519 elem >= 0;
520 --elem) {
521 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
522 }
523 Curl_safefree(glob->pattern[i].content.Set.elements);
524 }
525 }
526 Curl_safefree(glob->glob_buffer);
527 Curl_safefree(glob);
528 }
529
glob_next_url(char ** globbed,struct URLGlob * glob)530 CURLcode glob_next_url(char **globbed, struct URLGlob *glob)
531 {
532 struct URLPattern *pat;
533 size_t i;
534 size_t len;
535 size_t buflen = glob->urllen + 1;
536 char *buf = glob->glob_buffer;
537
538 *globbed = NULL;
539
540 if(!glob->beenhere)
541 glob->beenhere = 1;
542 else {
543 bool carry = TRUE;
544
545 /* implement a counter over the index ranges of all patterns, starting
546 with the rightmost pattern */
547 for(i = 0; carry && (i < glob->size); i++) {
548 carry = FALSE;
549 pat = &glob->pattern[glob->size - 1 - i];
550 switch(pat->type) {
551 case UPTSet:
552 if((pat->content.Set.elements) &&
553 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
554 pat->content.Set.ptr_s = 0;
555 carry = TRUE;
556 }
557 break;
558 case UPTCharRange:
559 pat->content.CharRange.ptr_c =
560 (char)(pat->content.CharRange.step +
561 (int)((unsigned char)pat->content.CharRange.ptr_c));
562 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
563 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
564 carry = TRUE;
565 }
566 break;
567 case UPTNumRange:
568 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
569 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
570 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
571 carry = TRUE;
572 }
573 break;
574 default:
575 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
576 return CURLE_FAILED_INIT;
577 }
578 }
579 if(carry) { /* first pattern ptr has run into overflow, done! */
580 return CURLE_OK;
581 }
582 }
583
584 for(i = 0; i < glob->size; ++i) {
585 pat = &glob->pattern[i];
586 switch(pat->type) {
587 case UPTSet:
588 if(pat->content.Set.elements) {
589 msnprintf(buf, buflen, "%s",
590 pat->content.Set.elements[pat->content.Set.ptr_s]);
591 len = strlen(buf);
592 buf += len;
593 buflen -= len;
594 }
595 break;
596 case UPTCharRange:
597 if(buflen) {
598 *buf++ = pat->content.CharRange.ptr_c;
599 *buf = '\0';
600 buflen--;
601 }
602 break;
603 case UPTNumRange:
604 msnprintf(buf, buflen, "%0*" CURL_FORMAT_CURL_OFF_T,
605 pat->content.NumRange.padlength,
606 pat->content.NumRange.ptr_n);
607 len = strlen(buf);
608 buf += len;
609 buflen -= len;
610 break;
611 default:
612 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
613 return CURLE_FAILED_INIT;
614 }
615 }
616
617 *globbed = strdup(glob->glob_buffer);
618 if(!*globbed)
619 return CURLE_OUT_OF_MEMORY;
620
621 return CURLE_OK;
622 }
623
624 #define MAX_OUTPUT_GLOB_LENGTH (10*1024)
625
glob_match_url(char ** result,char * filename,struct URLGlob * glob)626 CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob)
627 {
628 char numbuf[18];
629 char *appendthis = (char *)"";
630 size_t appendlen = 0;
631 struct curlx_dynbuf dyn;
632
633 *result = NULL;
634
635 /* We cannot use the glob_buffer for storage since the filename may be
636 * longer than the URL we use.
637 */
638 curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH);
639
640 while(*filename) {
641 if(*filename == '#' && ISDIGIT(filename[1])) {
642 char *ptr = filename;
643 unsigned long num = strtoul(&filename[1], &filename, 10);
644 struct URLPattern *pat = NULL;
645
646 if(num && (num < glob->size)) {
647 unsigned long i;
648 num--; /* make it zero based */
649 /* find the correct glob entry */
650 for(i = 0; i<glob->size; i++) {
651 if(glob->pattern[i].globindex == (int)num) {
652 pat = &glob->pattern[i];
653 break;
654 }
655 }
656 }
657
658 if(pat) {
659 switch(pat->type) {
660 case UPTSet:
661 if(pat->content.Set.elements) {
662 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
663 appendlen =
664 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
665 }
666 break;
667 case UPTCharRange:
668 numbuf[0] = pat->content.CharRange.ptr_c;
669 numbuf[1] = 0;
670 appendthis = numbuf;
671 appendlen = 1;
672 break;
673 case UPTNumRange:
674 msnprintf(numbuf, sizeof(numbuf), "%0*" CURL_FORMAT_CURL_OFF_T,
675 pat->content.NumRange.padlength,
676 pat->content.NumRange.ptr_n);
677 appendthis = numbuf;
678 appendlen = strlen(numbuf);
679 break;
680 default:
681 fprintf(tool_stderr, "internal error: invalid pattern type (%d)\n",
682 (int)pat->type);
683 curlx_dyn_free(&dyn);
684 return CURLE_FAILED_INIT;
685 }
686 }
687 else {
688 /* #[num] out of range, use the #[num] in the output */
689 filename = ptr;
690 appendthis = filename++;
691 appendlen = 1;
692 }
693 }
694 else {
695 appendthis = filename++;
696 appendlen = 1;
697 }
698 if(curlx_dyn_addn(&dyn, appendthis, appendlen))
699 return CURLE_OUT_OF_MEMORY;
700 }
701
702 if(curlx_dyn_addn(&dyn, "", 0))
703 return CURLE_OUT_OF_MEMORY;
704
705 #if defined(_WIN32) || defined(MSDOS)
706 {
707 char *sanitized;
708 SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn),
709 (SANITIZE_ALLOW_PATH |
710 SANITIZE_ALLOW_RESERVED));
711 curlx_dyn_free(&dyn);
712 if(sc)
713 return CURLE_URL_MALFORMAT;
714 *result = sanitized;
715 return CURLE_OK;
716 }
717 #else
718 *result = curlx_dyn_ptr(&dyn);
719 return CURLE_OK;
720 #endif /* _WIN32 || MSDOS */
721 }
722