1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22 #include "tool_setup.h"
23
24 #define ENABLE_CURLX_PRINTF
25 /* use our own printf() functions */
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31
32 #include "memdebug.h" /* keep this as LAST include */
33
34 #define GLOBERROR(string, column, code) \
35 glob->error = string, glob->pos = column, code
36
glob_fixed(URLGlob * glob,char * fixed,size_t len)37 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
38 {
39 URLPattern *pat = &glob->pattern[glob->size];
40 pat->type = UPTSet;
41 pat->content.Set.size = 1;
42 pat->content.Set.ptr_s = 0;
43 pat->globindex = -1;
44
45 pat->content.Set.elements = malloc(sizeof(char *));
46
47 if(!pat->content.Set.elements)
48 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
49
50 pat->content.Set.elements[0] = malloc(len + 1);
51 if(!pat->content.Set.elements[0])
52 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
53
54 memcpy(pat->content.Set.elements[0], fixed, len);
55 pat->content.Set.elements[0][len] = 0;
56
57 return CURLE_OK;
58 }
59
60 /* multiply
61 *
62 * Multiplies and checks for overflow.
63 */
multiply(unsigned long * amount,long with)64 static int multiply(unsigned long *amount, long with)
65 {
66 unsigned long sum = *amount * with;
67 if(!with) {
68 *amount = 0;
69 return 0;
70 }
71 if(sum/with != *amount)
72 return 1; /* didn't fit, bail out */
73 *amount = sum;
74 return 0;
75 }
76
glob_set(URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)77 static CURLcode glob_set(URLGlob *glob, char **patternp,
78 size_t *posp, unsigned long *amount,
79 int globindex)
80 {
81 /* processes a set expression with the point behind the opening '{'
82 ','-separated elements are collected until the next closing '}'
83 */
84 URLPattern *pat;
85 bool done = FALSE;
86 char *buf = glob->glob_buffer;
87 char *pattern = *patternp;
88 char *opattern = pattern;
89 size_t opos = *posp-1;
90
91 pat = &glob->pattern[glob->size];
92 /* patterns 0,1,2,... correspond to size=1,3,5,... */
93 pat->type = UPTSet;
94 pat->content.Set.size = 0;
95 pat->content.Set.ptr_s = 0;
96 pat->content.Set.elements = NULL;
97 pat->globindex = globindex;
98
99 while(!done) {
100 switch (*pattern) {
101 case '\0': /* URL ended while set was still open */
102 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
103
104 case '{':
105 case '[': /* no nested expressions at this time */
106 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
107
108 case '}': /* set element completed */
109 if(opattern == pattern)
110 return GLOBERROR("empty string within braces", *posp,
111 CURLE_URL_MALFORMAT);
112
113 /* add 1 to size since it'll be incremented below */
114 if(multiply(amount, pat->content.Set.size + 1))
115 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
116
117 /* FALLTHROUGH */
118 case ',':
119
120 *buf = '\0';
121 if(pat->content.Set.elements) {
122 char **new_arr = realloc(pat->content.Set.elements,
123 (pat->content.Set.size + 1) * sizeof(char *));
124 if(!new_arr)
125 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
126
127 pat->content.Set.elements = new_arr;
128 }
129 else
130 pat->content.Set.elements = malloc(sizeof(char *));
131
132 if(!pat->content.Set.elements)
133 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
134
135 pat->content.Set.elements[pat->content.Set.size] =
136 strdup(glob->glob_buffer);
137 if(!pat->content.Set.elements[pat->content.Set.size])
138 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
139 ++pat->content.Set.size;
140
141 if(*pattern == '}') {
142 pattern++; /* pass the closing brace */
143 done = TRUE;
144 continue;
145 }
146
147 buf = glob->glob_buffer;
148 ++pattern;
149 ++(*posp);
150 break;
151
152 case ']': /* illegal closing bracket */
153 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
154
155 case '\\': /* escaped character, skip '\' */
156 if(pattern[1]) {
157 ++pattern;
158 ++(*posp);
159 }
160 /* FALLTHROUGH */
161 default:
162 *buf++ = *pattern++; /* copy character to set element */
163 ++(*posp);
164 }
165 }
166
167 *patternp = pattern; /* return with the new position */
168 return CURLE_OK;
169 }
170
glob_range(URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)171 static CURLcode glob_range(URLGlob *glob, char **patternp,
172 size_t *posp, unsigned long *amount,
173 int globindex)
174 {
175 /* processes a range expression with the point behind the opening '['
176 - char range: e.g. "a-z]", "B-Q]"
177 - num range: e.g. "0-9]", "17-2000]"
178 - num range with leading zeros: e.g. "001-999]"
179 expression is checked for well-formedness and collected until the next ']'
180 */
181 URLPattern *pat;
182 int rc;
183 char *pattern = *patternp;
184 char *c;
185
186 pat = &glob->pattern[glob->size];
187 pat->globindex = globindex;
188
189 if(ISALPHA(*pattern)) {
190 /* character range detected */
191 char min_c;
192 char max_c;
193 char end_c;
194 unsigned long step = 1;
195
196 pat->type = UPTCharRange;
197
198 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
199
200 if(rc == 3) {
201 if(end_c == ':') {
202 char *endp;
203 errno = 0;
204 step = strtoul(&pattern[4], &endp, 10);
205 if(errno || &pattern[4] == endp || *endp != ']')
206 step = 0;
207 else
208 pattern = endp + 1;
209 }
210 else if(end_c != ']')
211 /* then this is wrong */
212 rc = 0;
213 else
214 /* end_c == ']' */
215 pattern += 4;
216 }
217
218 *posp += (pattern - *patternp);
219
220 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
221 (min_c == max_c && step != 1) ||
222 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
223 (max_c - min_c) > ('z' - 'a'))))
224 /* the pattern is not well-formed */
225 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
226
227 /* if there was a ":[num]" thing, use that as step or else use 1 */
228 pat->content.CharRange.step = (int)step;
229 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
230 pat->content.CharRange.max_c = max_c;
231
232 if(multiply(amount, ((pat->content.CharRange.max_c -
233 pat->content.CharRange.min_c) /
234 pat->content.CharRange.step + 1)))
235 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
236 }
237 else if(ISDIGIT(*pattern)) {
238 /* numeric range detected */
239 unsigned long min_n;
240 unsigned long max_n = 0;
241 unsigned long step_n = 0;
242 char *endp;
243
244 pat->type = UPTNumRange;
245 pat->content.NumRange.padlength = 0;
246
247 if(*pattern == '0') {
248 /* leading zero specified, count them! */
249 c = pattern;
250 while(ISDIGIT(*c)) {
251 c++;
252 ++pat->content.NumRange.padlength; /* padding length is set for all
253 instances of this pattern */
254 }
255 }
256
257 errno = 0;
258 min_n = strtoul(pattern, &endp, 10);
259 if(errno || (endp == pattern))
260 endp = NULL;
261 else {
262 if(*endp != '-')
263 endp = NULL;
264 else {
265 pattern = endp + 1;
266 while(*pattern && ISBLANK(*pattern))
267 pattern++;
268 if(!ISDIGIT(*pattern)) {
269 endp = NULL;
270 goto fail;
271 }
272 errno = 0;
273 max_n = strtoul(pattern, &endp, 10);
274 if(errno)
275 /* overflow */
276 endp = NULL;
277 else if(*endp == ':') {
278 pattern = endp + 1;
279 errno = 0;
280 step_n = strtoul(pattern, &endp, 10);
281 if(errno)
282 /* over/underflow situation */
283 endp = NULL;
284 }
285 else
286 step_n = 1;
287 if(endp && (*endp == ']')) {
288 pattern = endp + 1;
289 }
290 else
291 endp = NULL;
292 }
293 }
294
295 fail:
296 *posp += (pattern - *patternp);
297
298 if(!endp || !step_n ||
299 (min_n == max_n && step_n != 1) ||
300 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
301 /* the pattern is not well-formed */
302 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
303
304 /* typecasting to ints are fine here since we make sure above that we
305 are within 31 bits */
306 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
307 pat->content.NumRange.max_n = max_n;
308 pat->content.NumRange.step = step_n;
309
310 if(multiply(amount, ((pat->content.NumRange.max_n -
311 pat->content.NumRange.min_n) /
312 pat->content.NumRange.step + 1)))
313 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
314 }
315 else
316 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
317
318 *patternp = pattern;
319 return CURLE_OK;
320 }
321
peek_ipv6(const char * str,size_t * skip)322 static bool peek_ipv6(const char *str, size_t *skip)
323 {
324 /*
325 * Scan for a potential IPv6 literal.
326 * - Valid globs contain a hyphen and <= 1 colon.
327 * - IPv6 literals contain no hyphens and >= 2 colons.
328 */
329 size_t i = 0;
330 size_t colons = 0;
331 if(str[i++] != '[') {
332 return FALSE;
333 }
334 for(;;) {
335 const char c = str[i++];
336 if(ISALNUM(c) || c == '.' || c == '%') {
337 /* ok */
338 }
339 else if(c == ':') {
340 colons++;
341 }
342 else if(c == ']') {
343 *skip = i;
344 return colons >= 2 ? TRUE : FALSE;
345 }
346 else {
347 return FALSE;
348 }
349 }
350 }
351
glob_parse(URLGlob * glob,char * pattern,size_t pos,unsigned long * amount)352 static CURLcode glob_parse(URLGlob *glob, char *pattern,
353 size_t pos, unsigned long *amount)
354 {
355 /* processes a literal string component of a URL
356 special characters '{' and '[' branch to set/range processing functions
357 */
358 CURLcode res = CURLE_OK;
359 int globindex = 0; /* count "actual" globs */
360
361 *amount = 1;
362
363 while(*pattern && !res) {
364 char *buf = glob->glob_buffer;
365 size_t sublen = 0;
366 while(*pattern && *pattern != '{') {
367 if(*pattern == '[') {
368 /* skip over IPv6 literals and [] */
369 size_t skip = 0;
370 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
371 skip = 2;
372 if(skip) {
373 memcpy(buf, pattern, skip);
374 buf += skip;
375 pattern += skip;
376 sublen += skip;
377 continue;
378 }
379 break;
380 }
381 if(*pattern == '}' || *pattern == ']')
382 return GLOBERROR("unmatched close brace/bracket", pos,
383 CURLE_URL_MALFORMAT);
384
385 /* only allow \ to escape known "special letters" */
386 if(*pattern == '\\' &&
387 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
388 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
389
390 /* escape character, skip '\' */
391 ++pattern;
392 ++pos;
393 }
394 *buf++ = *pattern++; /* copy character to literal */
395 ++pos;
396 sublen++;
397 }
398 if(sublen) {
399 /* we got a literal string, add it as a single-item list */
400 *buf = '\0';
401 res = glob_fixed(glob, glob->glob_buffer, sublen);
402 }
403 else {
404 switch (*pattern) {
405 case '\0': /* done */
406 break;
407
408 case '{':
409 /* process set pattern */
410 pattern++;
411 pos++;
412 res = glob_set(glob, &pattern, &pos, amount, globindex++);
413 break;
414
415 case '[':
416 /* process range pattern */
417 pattern++;
418 pos++;
419 res = glob_range(glob, &pattern, &pos, amount, globindex++);
420 break;
421 }
422 }
423
424 if(++glob->size >= GLOB_PATTERN_NUM)
425 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
426 }
427 return res;
428 }
429
glob_url(URLGlob ** glob,char * url,unsigned long * urlnum,FILE * error)430 CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
431 FILE *error)
432 {
433 /*
434 * We can deal with any-size, just make a buffer with the same length
435 * as the specified URL!
436 */
437 URLGlob *glob_expand;
438 unsigned long amount = 0;
439 char *glob_buffer;
440 CURLcode res;
441
442 *glob = NULL;
443
444 glob_buffer = malloc(strlen(url) + 1);
445 if(!glob_buffer)
446 return CURLE_OUT_OF_MEMORY;
447 glob_buffer[0] = 0;
448
449 glob_expand = calloc(1, sizeof(URLGlob));
450 if(!glob_expand) {
451 Curl_safefree(glob_buffer);
452 return CURLE_OUT_OF_MEMORY;
453 }
454 glob_expand->urllen = strlen(url);
455 glob_expand->glob_buffer = glob_buffer;
456
457 res = glob_parse(glob_expand, url, 1, &amount);
458 if(!res)
459 *urlnum = amount;
460 else {
461 if(error && glob_expand->error) {
462 char text[512];
463 const char *t;
464 if(glob_expand->pos) {
465 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
466 glob_expand->error,
467 glob_expand->pos, url, glob_expand->pos - 1, " ");
468 t = text;
469 }
470 else
471 t = glob_expand->error;
472
473 /* send error description to the error-stream */
474 fprintf(error, "curl: (%d) %s\n", res, t);
475 }
476 /* it failed, we cleanup */
477 glob_cleanup(glob_expand);
478 *urlnum = 1;
479 return res;
480 }
481
482 *glob = glob_expand;
483 return CURLE_OK;
484 }
485
glob_cleanup(URLGlob * glob)486 void glob_cleanup(URLGlob* glob)
487 {
488 size_t i;
489 int elem;
490
491 for(i = 0; i < glob->size; i++) {
492 if((glob->pattern[i].type == UPTSet) &&
493 (glob->pattern[i].content.Set.elements)) {
494 for(elem = glob->pattern[i].content.Set.size - 1;
495 elem >= 0;
496 --elem) {
497 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
498 }
499 Curl_safefree(glob->pattern[i].content.Set.elements);
500 }
501 }
502 Curl_safefree(glob->glob_buffer);
503 Curl_safefree(glob);
504 }
505
glob_next_url(char ** globbed,URLGlob * glob)506 CURLcode glob_next_url(char **globbed, URLGlob *glob)
507 {
508 URLPattern *pat;
509 size_t i;
510 size_t len;
511 size_t buflen = glob->urllen + 1;
512 char *buf = glob->glob_buffer;
513
514 *globbed = NULL;
515
516 if(!glob->beenhere)
517 glob->beenhere = 1;
518 else {
519 bool carry = TRUE;
520
521 /* implement a counter over the index ranges of all patterns, starting
522 with the rightmost pattern */
523 for(i = 0; carry && (i < glob->size); i++) {
524 carry = FALSE;
525 pat = &glob->pattern[glob->size - 1 - i];
526 switch(pat->type) {
527 case UPTSet:
528 if((pat->content.Set.elements) &&
529 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
530 pat->content.Set.ptr_s = 0;
531 carry = TRUE;
532 }
533 break;
534 case UPTCharRange:
535 pat->content.CharRange.ptr_c =
536 (char)(pat->content.CharRange.step +
537 (int)((unsigned char)pat->content.CharRange.ptr_c));
538 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
539 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
540 carry = TRUE;
541 }
542 break;
543 case UPTNumRange:
544 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
545 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
546 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
547 carry = TRUE;
548 }
549 break;
550 default:
551 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
552 return CURLE_FAILED_INIT;
553 }
554 }
555 if(carry) { /* first pattern ptr has run into overflow, done! */
556 /* TODO: verify if this should actually return CURLE_OK. */
557 return CURLE_OK; /* CURLE_OK to match previous behavior */
558 }
559 }
560
561 for(i = 0; i < glob->size; ++i) {
562 pat = &glob->pattern[i];
563 switch(pat->type) {
564 case UPTSet:
565 if(pat->content.Set.elements) {
566 msnprintf(buf, buflen, "%s",
567 pat->content.Set.elements[pat->content.Set.ptr_s]);
568 len = strlen(buf);
569 buf += len;
570 buflen -= len;
571 }
572 break;
573 case UPTCharRange:
574 if(buflen) {
575 *buf++ = pat->content.CharRange.ptr_c;
576 *buf = '\0';
577 buflen--;
578 }
579 break;
580 case UPTNumRange:
581 msnprintf(buf, buflen, "%0*lu",
582 pat->content.NumRange.padlength,
583 pat->content.NumRange.ptr_n);
584 len = strlen(buf);
585 buf += len;
586 buflen -= len;
587 break;
588 default:
589 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
590 return CURLE_FAILED_INIT;
591 }
592 }
593
594 *globbed = strdup(glob->glob_buffer);
595 if(!*globbed)
596 return CURLE_OUT_OF_MEMORY;
597
598 return CURLE_OK;
599 }
600
glob_match_url(char ** result,char * filename,URLGlob * glob)601 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
602 {
603 char *target;
604 size_t allocsize;
605 char numbuf[18];
606 char *appendthis = (char *)"";
607 size_t appendlen = 0;
608 size_t stringlen = 0;
609
610 *result = NULL;
611
612 /* We cannot use the glob_buffer for storage here since the filename may
613 * be longer than the URL we use. We allocate a good start size, then
614 * we need to realloc in case of need.
615 */
616 allocsize = strlen(filename) + 1; /* make it at least one byte to store the
617 trailing zero */
618 target = malloc(allocsize);
619 if(!target)
620 return CURLE_OUT_OF_MEMORY;
621
622 while(*filename) {
623 if(*filename == '#' && ISDIGIT(filename[1])) {
624 char *ptr = filename;
625 unsigned long num = strtoul(&filename[1], &filename, 10);
626 URLPattern *pat = NULL;
627
628 if(num < glob->size) {
629 unsigned long i;
630 num--; /* make it zero based */
631 /* find the correct glob entry */
632 for(i = 0; i<glob->size; i++) {
633 if(glob->pattern[i].globindex == (int)num) {
634 pat = &glob->pattern[i];
635 break;
636 }
637 }
638 }
639
640 if(pat) {
641 switch(pat->type) {
642 case UPTSet:
643 if(pat->content.Set.elements) {
644 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
645 appendlen =
646 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
647 }
648 break;
649 case UPTCharRange:
650 numbuf[0] = pat->content.CharRange.ptr_c;
651 numbuf[1] = 0;
652 appendthis = numbuf;
653 appendlen = 1;
654 break;
655 case UPTNumRange:
656 msnprintf(numbuf, sizeof(numbuf), "%0*lu",
657 pat->content.NumRange.padlength,
658 pat->content.NumRange.ptr_n);
659 appendthis = numbuf;
660 appendlen = strlen(numbuf);
661 break;
662 default:
663 fprintf(stderr, "internal error: invalid pattern type (%d)\n",
664 (int)pat->type);
665 Curl_safefree(target);
666 return CURLE_FAILED_INIT;
667 }
668 }
669 else {
670 /* #[num] out of range, use the #[num] in the output */
671 filename = ptr;
672 appendthis = filename++;
673 appendlen = 1;
674 }
675 }
676 else {
677 appendthis = filename++;
678 appendlen = 1;
679 }
680 if(appendlen + stringlen >= allocsize) {
681 char *newstr;
682 /* we append a single byte to allow for the trailing byte to be appended
683 at the end of this function outside the while() loop */
684 allocsize = (appendlen + stringlen) * 2;
685 newstr = realloc(target, allocsize + 1);
686 if(!newstr) {
687 Curl_safefree(target);
688 return CURLE_OUT_OF_MEMORY;
689 }
690 target = newstr;
691 }
692 memcpy(&target[stringlen], appendthis, appendlen);
693 stringlen += appendlen;
694 }
695 target[stringlen]= '\0';
696
697 #if defined(MSDOS) || defined(WIN32)
698 {
699 char *sanitized;
700 SANITIZEcode sc = sanitize_file_name(&sanitized, target,
701 (SANITIZE_ALLOW_PATH |
702 SANITIZE_ALLOW_RESERVED));
703 Curl_safefree(target);
704 if(sc)
705 return CURLE_URL_MALFORMAT;
706 target = sanitized;
707 }
708 #endif /* MSDOS || WIN32 */
709
710 *result = target;
711 return CURLE_OK;
712 }
713