1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22 #include "tool_setup.h"
23
24 #define ENABLE_CURLX_PRINTF
25 /* use our own printf() functions */
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31 #include "dynbuf.h"
32
33 #include "memdebug.h" /* keep this as LAST include */
34
35 #define GLOBERROR(string, column, code) \
36 glob->error = string, glob->pos = column, code
37
glob_fixed(struct URLGlob * glob,char * fixed,size_t len)38 static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len)
39 {
40 struct URLPattern *pat = &glob->pattern[glob->size];
41 pat->type = UPTSet;
42 pat->content.Set.size = 1;
43 pat->content.Set.ptr_s = 0;
44 pat->globindex = -1;
45
46 pat->content.Set.elements = malloc(sizeof(char *));
47
48 if(!pat->content.Set.elements)
49 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
50
51 pat->content.Set.elements[0] = malloc(len + 1);
52 if(!pat->content.Set.elements[0])
53 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
54
55 memcpy(pat->content.Set.elements[0], fixed, len);
56 pat->content.Set.elements[0][len] = 0;
57
58 return CURLE_OK;
59 }
60
61 /* multiply
62 *
63 * Multiplies and checks for overflow.
64 */
multiply(unsigned long * amount,long with)65 static int multiply(unsigned long *amount, long with)
66 {
67 unsigned long sum = *amount * with;
68 if(!with) {
69 *amount = 0;
70 return 0;
71 }
72 if(sum/with != *amount)
73 return 1; /* didn't fit, bail out */
74 *amount = sum;
75 return 0;
76 }
77
glob_set(struct URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)78 static CURLcode glob_set(struct URLGlob *glob, char **patternp,
79 size_t *posp, unsigned long *amount,
80 int globindex)
81 {
82 /* processes a set expression with the point behind the opening '{'
83 ','-separated elements are collected until the next closing '}'
84 */
85 struct URLPattern *pat;
86 bool done = FALSE;
87 char *buf = glob->glob_buffer;
88 char *pattern = *patternp;
89 char *opattern = pattern;
90 size_t opos = *posp-1;
91
92 pat = &glob->pattern[glob->size];
93 /* patterns 0,1,2,... correspond to size=1,3,5,... */
94 pat->type = UPTSet;
95 pat->content.Set.size = 0;
96 pat->content.Set.ptr_s = 0;
97 pat->content.Set.elements = NULL;
98 pat->globindex = globindex;
99
100 while(!done) {
101 switch (*pattern) {
102 case '\0': /* URL ended while set was still open */
103 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
104
105 case '{':
106 case '[': /* no nested expressions at this time */
107 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
108
109 case '}': /* set element completed */
110 if(opattern == pattern)
111 return GLOBERROR("empty string within braces", *posp,
112 CURLE_URL_MALFORMAT);
113
114 /* add 1 to size since it'll be incremented below */
115 if(multiply(amount, pat->content.Set.size + 1))
116 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
117
118 /* FALLTHROUGH */
119 case ',':
120
121 *buf = '\0';
122 if(pat->content.Set.elements) {
123 char **new_arr = realloc(pat->content.Set.elements,
124 (pat->content.Set.size + 1) * sizeof(char *));
125 if(!new_arr)
126 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
127
128 pat->content.Set.elements = new_arr;
129 }
130 else
131 pat->content.Set.elements = malloc(sizeof(char *));
132
133 if(!pat->content.Set.elements)
134 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
135
136 pat->content.Set.elements[pat->content.Set.size] =
137 strdup(glob->glob_buffer);
138 if(!pat->content.Set.elements[pat->content.Set.size])
139 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
140 ++pat->content.Set.size;
141
142 if(*pattern == '}') {
143 pattern++; /* pass the closing brace */
144 done = TRUE;
145 continue;
146 }
147
148 buf = glob->glob_buffer;
149 ++pattern;
150 ++(*posp);
151 break;
152
153 case ']': /* illegal closing bracket */
154 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
155
156 case '\\': /* escaped character, skip '\' */
157 if(pattern[1]) {
158 ++pattern;
159 ++(*posp);
160 }
161 /* FALLTHROUGH */
162 default:
163 *buf++ = *pattern++; /* copy character to set element */
164 ++(*posp);
165 }
166 }
167
168 *patternp = pattern; /* return with the new position */
169 return CURLE_OK;
170 }
171
glob_range(struct URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)172 static CURLcode glob_range(struct URLGlob *glob, char **patternp,
173 size_t *posp, unsigned long *amount,
174 int globindex)
175 {
176 /* processes a range expression with the point behind the opening '['
177 - char range: e.g. "a-z]", "B-Q]"
178 - num range: e.g. "0-9]", "17-2000]"
179 - num range with leading zeros: e.g. "001-999]"
180 expression is checked for well-formedness and collected until the next ']'
181 */
182 struct URLPattern *pat;
183 int rc;
184 char *pattern = *patternp;
185 char *c;
186
187 pat = &glob->pattern[glob->size];
188 pat->globindex = globindex;
189
190 if(ISALPHA(*pattern)) {
191 /* character range detected */
192 char min_c;
193 char max_c;
194 char end_c;
195 unsigned long step = 1;
196
197 pat->type = UPTCharRange;
198
199 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
200
201 if(rc == 3) {
202 if(end_c == ':') {
203 char *endp;
204 errno = 0;
205 step = strtoul(&pattern[4], &endp, 10);
206 if(errno || &pattern[4] == endp || *endp != ']')
207 step = 0;
208 else
209 pattern = endp + 1;
210 }
211 else if(end_c != ']')
212 /* then this is wrong */
213 rc = 0;
214 else
215 /* end_c == ']' */
216 pattern += 4;
217 }
218
219 *posp += (pattern - *patternp);
220
221 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
222 (min_c == max_c && step != 1) ||
223 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
224 (max_c - min_c) > ('z' - 'a'))))
225 /* the pattern is not well-formed */
226 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
227
228 /* if there was a ":[num]" thing, use that as step or else use 1 */
229 pat->content.CharRange.step = (int)step;
230 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
231 pat->content.CharRange.max_c = max_c;
232
233 if(multiply(amount, ((pat->content.CharRange.max_c -
234 pat->content.CharRange.min_c) /
235 pat->content.CharRange.step + 1)))
236 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
237 }
238 else if(ISDIGIT(*pattern)) {
239 /* numeric range detected */
240 unsigned long min_n;
241 unsigned long max_n = 0;
242 unsigned long step_n = 0;
243 char *endp;
244
245 pat->type = UPTNumRange;
246 pat->content.NumRange.padlength = 0;
247
248 if(*pattern == '0') {
249 /* leading zero specified, count them! */
250 c = pattern;
251 while(ISDIGIT(*c)) {
252 c++;
253 ++pat->content.NumRange.padlength; /* padding length is set for all
254 instances of this pattern */
255 }
256 }
257
258 errno = 0;
259 min_n = strtoul(pattern, &endp, 10);
260 if(errno || (endp == pattern))
261 endp = NULL;
262 else {
263 if(*endp != '-')
264 endp = NULL;
265 else {
266 pattern = endp + 1;
267 while(*pattern && ISBLANK(*pattern))
268 pattern++;
269 if(!ISDIGIT(*pattern)) {
270 endp = NULL;
271 goto fail;
272 }
273 errno = 0;
274 max_n = strtoul(pattern, &endp, 10);
275 if(errno)
276 /* overflow */
277 endp = NULL;
278 else if(*endp == ':') {
279 pattern = endp + 1;
280 errno = 0;
281 step_n = strtoul(pattern, &endp, 10);
282 if(errno)
283 /* over/underflow situation */
284 endp = NULL;
285 }
286 else
287 step_n = 1;
288 if(endp && (*endp == ']')) {
289 pattern = endp + 1;
290 }
291 else
292 endp = NULL;
293 }
294 }
295
296 fail:
297 *posp += (pattern - *patternp);
298
299 if(!endp || !step_n ||
300 (min_n == max_n && step_n != 1) ||
301 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
302 /* the pattern is not well-formed */
303 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
304
305 /* typecasting to ints are fine here since we make sure above that we
306 are within 31 bits */
307 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
308 pat->content.NumRange.max_n = max_n;
309 pat->content.NumRange.step = step_n;
310
311 if(multiply(amount, ((pat->content.NumRange.max_n -
312 pat->content.NumRange.min_n) /
313 pat->content.NumRange.step + 1)))
314 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
315 }
316 else
317 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
318
319 *patternp = pattern;
320 return CURLE_OK;
321 }
322
323 #define MAX_IP6LEN 128
324
peek_ipv6(const char * str,size_t * skip)325 static bool peek_ipv6(const char *str, size_t *skip)
326 {
327 /*
328 * Scan for a potential IPv6 literal.
329 * - Valid globs contain a hyphen and <= 1 colon.
330 * - IPv6 literals contain no hyphens and >= 2 colons.
331 */
332 char hostname[MAX_IP6LEN];
333 CURLU *u;
334 char *endbr = strchr(str, ']');
335 size_t hlen;
336 CURLUcode rc;
337 if(!endbr)
338 return FALSE;
339
340 hlen = endbr - str + 1;
341 if(hlen >= MAX_IP6LEN)
342 return FALSE;
343
344 u = curl_url();
345 if(!u)
346 return FALSE;
347
348 memcpy(hostname, str, hlen);
349 hostname[hlen] = 0;
350
351 /* ask to "guess scheme" as then it works without a https:// prefix */
352 rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME);
353
354 curl_url_cleanup(u);
355 if(!rc)
356 *skip = hlen;
357 return rc ? FALSE : TRUE;
358 }
359
glob_parse(struct URLGlob * glob,char * pattern,size_t pos,unsigned long * amount)360 static CURLcode glob_parse(struct URLGlob *glob, char *pattern,
361 size_t pos, unsigned long *amount)
362 {
363 /* processes a literal string component of a URL
364 special characters '{' and '[' branch to set/range processing functions
365 */
366 CURLcode res = CURLE_OK;
367 int globindex = 0; /* count "actual" globs */
368
369 *amount = 1;
370
371 while(*pattern && !res) {
372 char *buf = glob->glob_buffer;
373 size_t sublen = 0;
374 while(*pattern && *pattern != '{') {
375 if(*pattern == '[') {
376 /* skip over IPv6 literals and [] */
377 size_t skip = 0;
378 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
379 skip = 2;
380 if(skip) {
381 memcpy(buf, pattern, skip);
382 buf += skip;
383 pattern += skip;
384 sublen += skip;
385 continue;
386 }
387 break;
388 }
389 if(*pattern == '}' || *pattern == ']')
390 return GLOBERROR("unmatched close brace/bracket", pos,
391 CURLE_URL_MALFORMAT);
392
393 /* only allow \ to escape known "special letters" */
394 if(*pattern == '\\' &&
395 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
396 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
397
398 /* escape character, skip '\' */
399 ++pattern;
400 ++pos;
401 }
402 *buf++ = *pattern++; /* copy character to literal */
403 ++pos;
404 sublen++;
405 }
406 if(sublen) {
407 /* we got a literal string, add it as a single-item list */
408 *buf = '\0';
409 res = glob_fixed(glob, glob->glob_buffer, sublen);
410 }
411 else {
412 switch (*pattern) {
413 case '\0': /* done */
414 break;
415
416 case '{':
417 /* process set pattern */
418 pattern++;
419 pos++;
420 res = glob_set(glob, &pattern, &pos, amount, globindex++);
421 break;
422
423 case '[':
424 /* process range pattern */
425 pattern++;
426 pos++;
427 res = glob_range(glob, &pattern, &pos, amount, globindex++);
428 break;
429 }
430 }
431
432 if(++glob->size >= GLOB_PATTERN_NUM)
433 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
434 }
435 return res;
436 }
437
glob_url(struct URLGlob ** glob,char * url,unsigned long * urlnum,FILE * error)438 CURLcode glob_url(struct URLGlob **glob, char *url, unsigned long *urlnum,
439 FILE *error)
440 {
441 /*
442 * We can deal with any-size, just make a buffer with the same length
443 * as the specified URL!
444 */
445 struct URLGlob *glob_expand;
446 unsigned long amount = 0;
447 char *glob_buffer;
448 CURLcode res;
449
450 *glob = NULL;
451
452 glob_buffer = malloc(strlen(url) + 1);
453 if(!glob_buffer)
454 return CURLE_OUT_OF_MEMORY;
455 glob_buffer[0] = 0;
456
457 glob_expand = calloc(1, sizeof(struct URLGlob));
458 if(!glob_expand) {
459 Curl_safefree(glob_buffer);
460 return CURLE_OUT_OF_MEMORY;
461 }
462 glob_expand->urllen = strlen(url);
463 glob_expand->glob_buffer = glob_buffer;
464
465 res = glob_parse(glob_expand, url, 1, &amount);
466 if(!res)
467 *urlnum = amount;
468 else {
469 if(error && glob_expand->error) {
470 char text[512];
471 const char *t;
472 if(glob_expand->pos) {
473 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
474 glob_expand->error,
475 glob_expand->pos, url, (int)glob_expand->pos - 1, " ");
476 t = text;
477 }
478 else
479 t = glob_expand->error;
480
481 /* send error description to the error-stream */
482 fprintf(error, "curl: (%d) %s\n", res, t);
483 }
484 /* it failed, we cleanup */
485 glob_cleanup(glob_expand);
486 *urlnum = 1;
487 return res;
488 }
489
490 *glob = glob_expand;
491 return CURLE_OK;
492 }
493
glob_cleanup(struct URLGlob * glob)494 void glob_cleanup(struct URLGlob *glob)
495 {
496 size_t i;
497 int elem;
498
499 if(!glob)
500 return;
501
502 for(i = 0; i < glob->size; i++) {
503 if((glob->pattern[i].type == UPTSet) &&
504 (glob->pattern[i].content.Set.elements)) {
505 for(elem = glob->pattern[i].content.Set.size - 1;
506 elem >= 0;
507 --elem) {
508 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
509 }
510 Curl_safefree(glob->pattern[i].content.Set.elements);
511 }
512 }
513 Curl_safefree(glob->glob_buffer);
514 Curl_safefree(glob);
515 }
516
glob_next_url(char ** globbed,struct URLGlob * glob)517 CURLcode glob_next_url(char **globbed, struct URLGlob *glob)
518 {
519 struct URLPattern *pat;
520 size_t i;
521 size_t len;
522 size_t buflen = glob->urllen + 1;
523 char *buf = glob->glob_buffer;
524
525 *globbed = NULL;
526
527 if(!glob->beenhere)
528 glob->beenhere = 1;
529 else {
530 bool carry = TRUE;
531
532 /* implement a counter over the index ranges of all patterns, starting
533 with the rightmost pattern */
534 for(i = 0; carry && (i < glob->size); i++) {
535 carry = FALSE;
536 pat = &glob->pattern[glob->size - 1 - i];
537 switch(pat->type) {
538 case UPTSet:
539 if((pat->content.Set.elements) &&
540 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
541 pat->content.Set.ptr_s = 0;
542 carry = TRUE;
543 }
544 break;
545 case UPTCharRange:
546 pat->content.CharRange.ptr_c =
547 (char)(pat->content.CharRange.step +
548 (int)((unsigned char)pat->content.CharRange.ptr_c));
549 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
550 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
551 carry = TRUE;
552 }
553 break;
554 case UPTNumRange:
555 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
556 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
557 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
558 carry = TRUE;
559 }
560 break;
561 default:
562 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
563 return CURLE_FAILED_INIT;
564 }
565 }
566 if(carry) { /* first pattern ptr has run into overflow, done! */
567 return CURLE_OK;
568 }
569 }
570
571 for(i = 0; i < glob->size; ++i) {
572 pat = &glob->pattern[i];
573 switch(pat->type) {
574 case UPTSet:
575 if(pat->content.Set.elements) {
576 msnprintf(buf, buflen, "%s",
577 pat->content.Set.elements[pat->content.Set.ptr_s]);
578 len = strlen(buf);
579 buf += len;
580 buflen -= len;
581 }
582 break;
583 case UPTCharRange:
584 if(buflen) {
585 *buf++ = pat->content.CharRange.ptr_c;
586 *buf = '\0';
587 buflen--;
588 }
589 break;
590 case UPTNumRange:
591 msnprintf(buf, buflen, "%0*lu",
592 pat->content.NumRange.padlength,
593 pat->content.NumRange.ptr_n);
594 len = strlen(buf);
595 buf += len;
596 buflen -= len;
597 break;
598 default:
599 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
600 return CURLE_FAILED_INIT;
601 }
602 }
603
604 *globbed = strdup(glob->glob_buffer);
605 if(!*globbed)
606 return CURLE_OUT_OF_MEMORY;
607
608 return CURLE_OK;
609 }
610
611 #define MAX_OUTPUT_GLOB_LENGTH (10*1024)
612
glob_match_url(char ** result,char * filename,struct URLGlob * glob)613 CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob)
614 {
615 char numbuf[18];
616 char *appendthis = (char *)"";
617 size_t appendlen = 0;
618 struct curlx_dynbuf dyn;
619
620 *result = NULL;
621
622 /* We cannot use the glob_buffer for storage since the filename may be
623 * longer than the URL we use.
624 */
625 curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH);
626
627 while(*filename) {
628 if(*filename == '#' && ISDIGIT(filename[1])) {
629 char *ptr = filename;
630 unsigned long num = strtoul(&filename[1], &filename, 10);
631 struct URLPattern *pat = NULL;
632
633 if(num && (num < glob->size)) {
634 unsigned long i;
635 num--; /* make it zero based */
636 /* find the correct glob entry */
637 for(i = 0; i<glob->size; i++) {
638 if(glob->pattern[i].globindex == (int)num) {
639 pat = &glob->pattern[i];
640 break;
641 }
642 }
643 }
644
645 if(pat) {
646 switch(pat->type) {
647 case UPTSet:
648 if(pat->content.Set.elements) {
649 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
650 appendlen =
651 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
652 }
653 break;
654 case UPTCharRange:
655 numbuf[0] = pat->content.CharRange.ptr_c;
656 numbuf[1] = 0;
657 appendthis = numbuf;
658 appendlen = 1;
659 break;
660 case UPTNumRange:
661 msnprintf(numbuf, sizeof(numbuf), "%0*lu",
662 pat->content.NumRange.padlength,
663 pat->content.NumRange.ptr_n);
664 appendthis = numbuf;
665 appendlen = strlen(numbuf);
666 break;
667 default:
668 fprintf(stderr, "internal error: invalid pattern type (%d)\n",
669 (int)pat->type);
670 curlx_dyn_free(&dyn);
671 return CURLE_FAILED_INIT;
672 }
673 }
674 else {
675 /* #[num] out of range, use the #[num] in the output */
676 filename = ptr;
677 appendthis = filename++;
678 appendlen = 1;
679 }
680 }
681 else {
682 appendthis = filename++;
683 appendlen = 1;
684 }
685 if(curlx_dyn_addn(&dyn, appendthis, appendlen))
686 return CURLE_OUT_OF_MEMORY;
687 }
688
689 #if defined(MSDOS) || defined(WIN32)
690 {
691 char *sanitized;
692 SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn),
693 (SANITIZE_ALLOW_PATH |
694 SANITIZE_ALLOW_RESERVED));
695 curlx_dyn_free(&dyn);
696 if(sc)
697 return CURLE_URL_MALFORMAT;
698 *result = sanitized;
699 return CURLE_OK;
700 }
701 #else
702 *result = curlx_dyn_ptr(&dyn);
703 return CURLE_OK;
704 #endif /* MSDOS || WIN32 */
705 }
706