1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24 #include "tool_setup.h"
25
26 #define ENABLE_CURLX_PRINTF
27 /* use our own printf() functions */
28 #include "curlx.h"
29 #include "tool_cfgable.h"
30 #include "tool_doswin.h"
31 #include "tool_urlglob.h"
32 #include "tool_vms.h"
33 #include "dynbuf.h"
34
35 #include "memdebug.h" /* keep this as LAST include */
36
37 #define GLOBERROR(string, column, code) \
38 glob->error = string, glob->pos = column, code
39
glob_fixed(struct URLGlob * glob,char * fixed,size_t len)40 static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len)
41 {
42 struct URLPattern *pat = &glob->pattern[glob->size];
43 pat->type = UPTSet;
44 pat->content.Set.size = 1;
45 pat->content.Set.ptr_s = 0;
46 pat->globindex = -1;
47
48 pat->content.Set.elements = malloc(sizeof(char *));
49
50 if(!pat->content.Set.elements)
51 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
52
53 pat->content.Set.elements[0] = malloc(len + 1);
54 if(!pat->content.Set.elements[0])
55 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
56
57 memcpy(pat->content.Set.elements[0], fixed, len);
58 pat->content.Set.elements[0][len] = 0;
59
60 return CURLE_OK;
61 }
62
63 /* multiply
64 *
65 * Multiplies and checks for overflow.
66 */
multiply(unsigned long * amount,long with)67 static int multiply(unsigned long *amount, long with)
68 {
69 unsigned long sum = *amount * with;
70 if(!with) {
71 *amount = 0;
72 return 0;
73 }
74 if(sum/with != *amount)
75 return 1; /* didn't fit, bail out */
76 *amount = sum;
77 return 0;
78 }
79
glob_set(struct URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)80 static CURLcode glob_set(struct URLGlob *glob, char **patternp,
81 size_t *posp, unsigned long *amount,
82 int globindex)
83 {
84 /* processes a set expression with the point behind the opening '{'
85 ','-separated elements are collected until the next closing '}'
86 */
87 struct URLPattern *pat;
88 bool done = FALSE;
89 char *buf = glob->glob_buffer;
90 char *pattern = *patternp;
91 char *opattern = pattern;
92 size_t opos = *posp-1;
93
94 pat = &glob->pattern[glob->size];
95 /* patterns 0,1,2,... correspond to size=1,3,5,... */
96 pat->type = UPTSet;
97 pat->content.Set.size = 0;
98 pat->content.Set.ptr_s = 0;
99 pat->content.Set.elements = NULL;
100 pat->globindex = globindex;
101
102 while(!done) {
103 switch (*pattern) {
104 case '\0': /* URL ended while set was still open */
105 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
106
107 case '{':
108 case '[': /* no nested expressions at this time */
109 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
110
111 case '}': /* set element completed */
112 if(opattern == pattern)
113 return GLOBERROR("empty string within braces", *posp,
114 CURLE_URL_MALFORMAT);
115
116 /* add 1 to size since it'll be incremented below */
117 if(multiply(amount, pat->content.Set.size + 1))
118 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
119
120 /* FALLTHROUGH */
121 case ',':
122
123 *buf = '\0';
124 if(pat->content.Set.elements) {
125 char **new_arr = realloc(pat->content.Set.elements,
126 (pat->content.Set.size + 1) * sizeof(char *));
127 if(!new_arr)
128 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
129
130 pat->content.Set.elements = new_arr;
131 }
132 else
133 pat->content.Set.elements = malloc(sizeof(char *));
134
135 if(!pat->content.Set.elements)
136 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
137
138 pat->content.Set.elements[pat->content.Set.size] =
139 strdup(glob->glob_buffer);
140 if(!pat->content.Set.elements[pat->content.Set.size])
141 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
142 ++pat->content.Set.size;
143
144 if(*pattern == '}') {
145 pattern++; /* pass the closing brace */
146 done = TRUE;
147 continue;
148 }
149
150 buf = glob->glob_buffer;
151 ++pattern;
152 ++(*posp);
153 break;
154
155 case ']': /* illegal closing bracket */
156 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
157
158 case '\\': /* escaped character, skip '\' */
159 if(pattern[1]) {
160 ++pattern;
161 ++(*posp);
162 }
163 /* FALLTHROUGH */
164 default:
165 *buf++ = *pattern++; /* copy character to set element */
166 ++(*posp);
167 }
168 }
169
170 *patternp = pattern; /* return with the new position */
171 return CURLE_OK;
172 }
173
glob_range(struct URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)174 static CURLcode glob_range(struct URLGlob *glob, char **patternp,
175 size_t *posp, unsigned long *amount,
176 int globindex)
177 {
178 /* processes a range expression with the point behind the opening '['
179 - char range: e.g. "a-z]", "B-Q]"
180 - num range: e.g. "0-9]", "17-2000]"
181 - num range with leading zeros: e.g. "001-999]"
182 expression is checked for well-formedness and collected until the next ']'
183 */
184 struct URLPattern *pat;
185 int rc;
186 char *pattern = *patternp;
187 char *c;
188
189 pat = &glob->pattern[glob->size];
190 pat->globindex = globindex;
191
192 if(ISALPHA(*pattern)) {
193 /* character range detected */
194 char min_c;
195 char max_c;
196 char end_c;
197 unsigned long step = 1;
198
199 pat->type = UPTCharRange;
200
201 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
202
203 if(rc == 3) {
204 if(end_c == ':') {
205 char *endp;
206 errno = 0;
207 step = strtoul(&pattern[4], &endp, 10);
208 if(errno || &pattern[4] == endp || *endp != ']')
209 step = 0;
210 else
211 pattern = endp + 1;
212 }
213 else if(end_c != ']')
214 /* then this is wrong */
215 rc = 0;
216 else
217 /* end_c == ']' */
218 pattern += 4;
219 }
220
221 *posp += (pattern - *patternp);
222
223 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
224 (min_c == max_c && step != 1) ||
225 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
226 (max_c - min_c) > ('z' - 'a'))))
227 /* the pattern is not well-formed */
228 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
229
230 /* if there was a ":[num]" thing, use that as step or else use 1 */
231 pat->content.CharRange.step = (int)step;
232 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
233 pat->content.CharRange.max_c = max_c;
234
235 if(multiply(amount, ((pat->content.CharRange.max_c -
236 pat->content.CharRange.min_c) /
237 pat->content.CharRange.step + 1)))
238 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
239 }
240 else if(ISDIGIT(*pattern)) {
241 /* numeric range detected */
242 unsigned long min_n;
243 unsigned long max_n = 0;
244 unsigned long step_n = 0;
245 char *endp;
246
247 pat->type = UPTNumRange;
248 pat->content.NumRange.padlength = 0;
249
250 if(*pattern == '0') {
251 /* leading zero specified, count them! */
252 c = pattern;
253 while(ISDIGIT(*c)) {
254 c++;
255 ++pat->content.NumRange.padlength; /* padding length is set for all
256 instances of this pattern */
257 }
258 }
259
260 errno = 0;
261 min_n = strtoul(pattern, &endp, 10);
262 if(errno || (endp == pattern))
263 endp = NULL;
264 else {
265 if(*endp != '-')
266 endp = NULL;
267 else {
268 pattern = endp + 1;
269 while(*pattern && ISBLANK(*pattern))
270 pattern++;
271 if(!ISDIGIT(*pattern)) {
272 endp = NULL;
273 goto fail;
274 }
275 errno = 0;
276 max_n = strtoul(pattern, &endp, 10);
277 if(errno)
278 /* overflow */
279 endp = NULL;
280 else if(*endp == ':') {
281 pattern = endp + 1;
282 errno = 0;
283 step_n = strtoul(pattern, &endp, 10);
284 if(errno)
285 /* over/underflow situation */
286 endp = NULL;
287 }
288 else
289 step_n = 1;
290 if(endp && (*endp == ']')) {
291 pattern = endp + 1;
292 }
293 else
294 endp = NULL;
295 }
296 }
297
298 fail:
299 *posp += (pattern - *patternp);
300
301 if(!endp || !step_n ||
302 (min_n == max_n && step_n != 1) ||
303 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
304 /* the pattern is not well-formed */
305 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
306
307 /* typecasting to ints are fine here since we make sure above that we
308 are within 31 bits */
309 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
310 pat->content.NumRange.max_n = max_n;
311 pat->content.NumRange.step = step_n;
312
313 if(multiply(amount, ((pat->content.NumRange.max_n -
314 pat->content.NumRange.min_n) /
315 pat->content.NumRange.step + 1)))
316 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
317 }
318 else
319 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
320
321 *patternp = pattern;
322 return CURLE_OK;
323 }
324
325 #define MAX_IP6LEN 128
326
peek_ipv6(const char * str,size_t * skip)327 static bool peek_ipv6(const char *str, size_t *skip)
328 {
329 /*
330 * Scan for a potential IPv6 literal.
331 * - Valid globs contain a hyphen and <= 1 colon.
332 * - IPv6 literals contain no hyphens and >= 2 colons.
333 */
334 char hostname[MAX_IP6LEN];
335 CURLU *u;
336 char *endbr = strchr(str, ']');
337 size_t hlen;
338 CURLUcode rc;
339 if(!endbr)
340 return FALSE;
341
342 hlen = endbr - str + 1;
343 if(hlen >= MAX_IP6LEN)
344 return FALSE;
345
346 u = curl_url();
347 if(!u)
348 return FALSE;
349
350 memcpy(hostname, str, hlen);
351 hostname[hlen] = 0;
352
353 /* ask to "guess scheme" as then it works without an https:// prefix */
354 rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME);
355
356 curl_url_cleanup(u);
357 if(!rc)
358 *skip = hlen;
359 return rc ? FALSE : TRUE;
360 }
361
glob_parse(struct URLGlob * glob,char * pattern,size_t pos,unsigned long * amount)362 static CURLcode glob_parse(struct URLGlob *glob, char *pattern,
363 size_t pos, unsigned long *amount)
364 {
365 /* processes a literal string component of a URL
366 special characters '{' and '[' branch to set/range processing functions
367 */
368 CURLcode res = CURLE_OK;
369 int globindex = 0; /* count "actual" globs */
370
371 *amount = 1;
372
373 while(*pattern && !res) {
374 char *buf = glob->glob_buffer;
375 size_t sublen = 0;
376 while(*pattern && *pattern != '{') {
377 if(*pattern == '[') {
378 /* skip over IPv6 literals and [] */
379 size_t skip = 0;
380 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
381 skip = 2;
382 if(skip) {
383 memcpy(buf, pattern, skip);
384 buf += skip;
385 pattern += skip;
386 sublen += skip;
387 continue;
388 }
389 break;
390 }
391 if(*pattern == '}' || *pattern == ']')
392 return GLOBERROR("unmatched close brace/bracket", pos,
393 CURLE_URL_MALFORMAT);
394
395 /* only allow \ to escape known "special letters" */
396 if(*pattern == '\\' &&
397 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
398 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
399
400 /* escape character, skip '\' */
401 ++pattern;
402 ++pos;
403 }
404 *buf++ = *pattern++; /* copy character to literal */
405 ++pos;
406 sublen++;
407 }
408 if(sublen) {
409 /* we got a literal string, add it as a single-item list */
410 *buf = '\0';
411 res = glob_fixed(glob, glob->glob_buffer, sublen);
412 }
413 else {
414 switch (*pattern) {
415 case '\0': /* done */
416 break;
417
418 case '{':
419 /* process set pattern */
420 pattern++;
421 pos++;
422 res = glob_set(glob, &pattern, &pos, amount, globindex++);
423 break;
424
425 case '[':
426 /* process range pattern */
427 pattern++;
428 pos++;
429 res = glob_range(glob, &pattern, &pos, amount, globindex++);
430 break;
431 }
432 }
433
434 if(++glob->size >= GLOB_PATTERN_NUM)
435 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
436 }
437 return res;
438 }
439
glob_url(struct URLGlob ** glob,char * url,unsigned long * urlnum,FILE * error)440 CURLcode glob_url(struct URLGlob **glob, char *url, unsigned long *urlnum,
441 FILE *error)
442 {
443 /*
444 * We can deal with any-size, just make a buffer with the same length
445 * as the specified URL!
446 */
447 struct URLGlob *glob_expand;
448 unsigned long amount = 0;
449 char *glob_buffer;
450 CURLcode res;
451
452 *glob = NULL;
453
454 glob_buffer = malloc(strlen(url) + 1);
455 if(!glob_buffer)
456 return CURLE_OUT_OF_MEMORY;
457 glob_buffer[0] = 0;
458
459 glob_expand = calloc(1, sizeof(struct URLGlob));
460 if(!glob_expand) {
461 Curl_safefree(glob_buffer);
462 return CURLE_OUT_OF_MEMORY;
463 }
464 glob_expand->urllen = strlen(url);
465 glob_expand->glob_buffer = glob_buffer;
466
467 res = glob_parse(glob_expand, url, 1, &amount);
468 if(!res)
469 *urlnum = amount;
470 else {
471 if(error && glob_expand->error) {
472 char text[512];
473 const char *t;
474 if(glob_expand->pos) {
475 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
476 glob_expand->error,
477 glob_expand->pos, url, (int)glob_expand->pos - 1, " ");
478 t = text;
479 }
480 else
481 t = glob_expand->error;
482
483 /* send error description to the error-stream */
484 fprintf(error, "curl: (%d) %s\n", res, t);
485 }
486 /* it failed, we cleanup */
487 glob_cleanup(glob_expand);
488 *urlnum = 1;
489 return res;
490 }
491
492 *glob = glob_expand;
493 return CURLE_OK;
494 }
495
glob_cleanup(struct URLGlob * glob)496 void glob_cleanup(struct URLGlob *glob)
497 {
498 size_t i;
499 int elem;
500
501 if(!glob)
502 return;
503
504 for(i = 0; i < glob->size; i++) {
505 if((glob->pattern[i].type == UPTSet) &&
506 (glob->pattern[i].content.Set.elements)) {
507 for(elem = glob->pattern[i].content.Set.size - 1;
508 elem >= 0;
509 --elem) {
510 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
511 }
512 Curl_safefree(glob->pattern[i].content.Set.elements);
513 }
514 }
515 Curl_safefree(glob->glob_buffer);
516 Curl_safefree(glob);
517 }
518
glob_next_url(char ** globbed,struct URLGlob * glob)519 CURLcode glob_next_url(char **globbed, struct URLGlob *glob)
520 {
521 struct URLPattern *pat;
522 size_t i;
523 size_t len;
524 size_t buflen = glob->urllen + 1;
525 char *buf = glob->glob_buffer;
526
527 *globbed = NULL;
528
529 if(!glob->beenhere)
530 glob->beenhere = 1;
531 else {
532 bool carry = TRUE;
533
534 /* implement a counter over the index ranges of all patterns, starting
535 with the rightmost pattern */
536 for(i = 0; carry && (i < glob->size); i++) {
537 carry = FALSE;
538 pat = &glob->pattern[glob->size - 1 - i];
539 switch(pat->type) {
540 case UPTSet:
541 if((pat->content.Set.elements) &&
542 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
543 pat->content.Set.ptr_s = 0;
544 carry = TRUE;
545 }
546 break;
547 case UPTCharRange:
548 pat->content.CharRange.ptr_c =
549 (char)(pat->content.CharRange.step +
550 (int)((unsigned char)pat->content.CharRange.ptr_c));
551 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
552 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
553 carry = TRUE;
554 }
555 break;
556 case UPTNumRange:
557 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
558 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
559 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
560 carry = TRUE;
561 }
562 break;
563 default:
564 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
565 return CURLE_FAILED_INIT;
566 }
567 }
568 if(carry) { /* first pattern ptr has run into overflow, done! */
569 return CURLE_OK;
570 }
571 }
572
573 for(i = 0; i < glob->size; ++i) {
574 pat = &glob->pattern[i];
575 switch(pat->type) {
576 case UPTSet:
577 if(pat->content.Set.elements) {
578 msnprintf(buf, buflen, "%s",
579 pat->content.Set.elements[pat->content.Set.ptr_s]);
580 len = strlen(buf);
581 buf += len;
582 buflen -= len;
583 }
584 break;
585 case UPTCharRange:
586 if(buflen) {
587 *buf++ = pat->content.CharRange.ptr_c;
588 *buf = '\0';
589 buflen--;
590 }
591 break;
592 case UPTNumRange:
593 msnprintf(buf, buflen, "%0*lu",
594 pat->content.NumRange.padlength,
595 pat->content.NumRange.ptr_n);
596 len = strlen(buf);
597 buf += len;
598 buflen -= len;
599 break;
600 default:
601 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
602 return CURLE_FAILED_INIT;
603 }
604 }
605
606 *globbed = strdup(glob->glob_buffer);
607 if(!*globbed)
608 return CURLE_OUT_OF_MEMORY;
609
610 return CURLE_OK;
611 }
612
613 #define MAX_OUTPUT_GLOB_LENGTH (10*1024)
614
glob_match_url(char ** result,char * filename,struct URLGlob * glob)615 CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob)
616 {
617 char numbuf[18];
618 char *appendthis = (char *)"";
619 size_t appendlen = 0;
620 struct curlx_dynbuf dyn;
621
622 *result = NULL;
623
624 /* We cannot use the glob_buffer for storage since the filename may be
625 * longer than the URL we use.
626 */
627 curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH);
628
629 while(*filename) {
630 if(*filename == '#' && ISDIGIT(filename[1])) {
631 char *ptr = filename;
632 unsigned long num = strtoul(&filename[1], &filename, 10);
633 struct URLPattern *pat = NULL;
634
635 if(num && (num < glob->size)) {
636 unsigned long i;
637 num--; /* make it zero based */
638 /* find the correct glob entry */
639 for(i = 0; i<glob->size; i++) {
640 if(glob->pattern[i].globindex == (int)num) {
641 pat = &glob->pattern[i];
642 break;
643 }
644 }
645 }
646
647 if(pat) {
648 switch(pat->type) {
649 case UPTSet:
650 if(pat->content.Set.elements) {
651 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
652 appendlen =
653 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
654 }
655 break;
656 case UPTCharRange:
657 numbuf[0] = pat->content.CharRange.ptr_c;
658 numbuf[1] = 0;
659 appendthis = numbuf;
660 appendlen = 1;
661 break;
662 case UPTNumRange:
663 msnprintf(numbuf, sizeof(numbuf), "%0*lu",
664 pat->content.NumRange.padlength,
665 pat->content.NumRange.ptr_n);
666 appendthis = numbuf;
667 appendlen = strlen(numbuf);
668 break;
669 default:
670 fprintf(stderr, "internal error: invalid pattern type (%d)\n",
671 (int)pat->type);
672 curlx_dyn_free(&dyn);
673 return CURLE_FAILED_INIT;
674 }
675 }
676 else {
677 /* #[num] out of range, use the #[num] in the output */
678 filename = ptr;
679 appendthis = filename++;
680 appendlen = 1;
681 }
682 }
683 else {
684 appendthis = filename++;
685 appendlen = 1;
686 }
687 if(curlx_dyn_addn(&dyn, appendthis, appendlen))
688 return CURLE_OUT_OF_MEMORY;
689 }
690
691 if(curlx_dyn_addn(&dyn, "", 0))
692 return CURLE_OUT_OF_MEMORY;
693
694 #if defined(MSDOS) || defined(WIN32)
695 {
696 char *sanitized;
697 SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn),
698 (SANITIZE_ALLOW_PATH |
699 SANITIZE_ALLOW_RESERVED));
700 curlx_dyn_free(&dyn);
701 if(sc)
702 return CURLE_URL_MALFORMAT;
703 *result = sanitized;
704 return CURLE_OK;
705 }
706 #else
707 *result = curlx_dyn_ptr(&dyn);
708 return CURLE_OK;
709 #endif /* MSDOS || WIN32 */
710 }
711