• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2016, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.haxx.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  ***************************************************************************/
22 #include "tool_setup.h"
23 
24 #define ENABLE_CURLX_PRINTF
25 /* use our own printf() functions */
26 #include "curlx.h"
27 #include "tool_cfgable.h"
28 #include "tool_doswin.h"
29 #include "tool_urlglob.h"
30 #include "tool_vms.h"
31 
32 #include "memdebug.h" /* keep this as LAST include */
33 
34 #define GLOBERROR(string, column, code) \
35   glob->error = string, glob->pos = column, code
36 
37 void glob_cleanup(URLGlob* glob);
38 
glob_fixed(URLGlob * glob,char * fixed,size_t len)39 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
40 {
41   URLPattern *pat = &glob->pattern[glob->size];
42   pat->type = UPTSet;
43   pat->content.Set.size = 1;
44   pat->content.Set.ptr_s = 0;
45   pat->globindex = -1;
46 
47   pat->content.Set.elements = malloc(sizeof(char *));
48 
49   if(!pat->content.Set.elements)
50     return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
51 
52   pat->content.Set.elements[0] = malloc(len+1);
53   if(!pat->content.Set.elements[0])
54     return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
55 
56   memcpy(pat->content.Set.elements[0], fixed, len);
57   pat->content.Set.elements[0][len] = 0;
58 
59   return CURLE_OK;
60 }
61 
62 /* multiply
63  *
64  * Multiplies and checks for overflow.
65  */
multiply(unsigned long * amount,long with)66 static int multiply(unsigned long *amount, long with)
67 {
68   unsigned long sum = *amount * with;
69   if(sum/with != *amount)
70     return 1; /* didn't fit, bail out */
71   *amount = sum;
72   return 0;
73 }
74 
glob_set(URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)75 static CURLcode glob_set(URLGlob *glob, char **patternp,
76                          size_t *posp, unsigned long *amount,
77                          int globindex)
78 {
79   /* processes a set expression with the point behind the opening '{'
80      ','-separated elements are collected until the next closing '}'
81   */
82   URLPattern *pat;
83   bool done = FALSE;
84   char *buf = glob->glob_buffer;
85   char *pattern = *patternp;
86   char *opattern = pattern;
87   size_t opos = *posp-1;
88 
89   pat = &glob->pattern[glob->size];
90   /* patterns 0,1,2,... correspond to size=1,3,5,... */
91   pat->type = UPTSet;
92   pat->content.Set.size = 0;
93   pat->content.Set.ptr_s = 0;
94   pat->content.Set.elements = NULL;
95   pat->globindex = globindex;
96 
97   while(!done) {
98     switch (*pattern) {
99     case '\0':                  /* URL ended while set was still open */
100       return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
101 
102     case '{':
103     case '[':                   /* no nested expressions at this time */
104       return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
105 
106     case '}':                           /* set element completed */
107       if(opattern == pattern)
108         return GLOBERROR("empty string within braces", *posp,
109                          CURLE_URL_MALFORMAT);
110 
111       /* add 1 to size since it'll be incremented below */
112       if(multiply(amount, pat->content.Set.size+1))
113         return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
114 
115       /* fall-through */
116     case ',':
117 
118       *buf = '\0';
119       if(pat->content.Set.elements) {
120         char **new_arr = realloc(pat->content.Set.elements,
121                                  (pat->content.Set.size + 1) * sizeof(char *));
122         if(!new_arr)
123           return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
124 
125         pat->content.Set.elements = new_arr;
126       }
127       else
128         pat->content.Set.elements = malloc(sizeof(char *));
129 
130       if(!pat->content.Set.elements)
131         return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
132 
133       pat->content.Set.elements[pat->content.Set.size] =
134         strdup(glob->glob_buffer);
135       if(!pat->content.Set.elements[pat->content.Set.size])
136         return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
137       ++pat->content.Set.size;
138 
139       if(*pattern == '}') {
140         pattern++; /* pass the closing brace */
141         done = TRUE;
142         continue;
143       }
144 
145       buf = glob->glob_buffer;
146       ++pattern;
147       ++(*posp);
148       break;
149 
150     case ']':                           /* illegal closing bracket */
151       return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
152 
153     case '\\':                          /* escaped character, skip '\' */
154       if(pattern[1]) {
155         ++pattern;
156         ++(*posp);
157       }
158       /* intentional fallthrough */
159     default:
160       *buf++ = *pattern++;              /* copy character to set element */
161       ++(*posp);
162     }
163   }
164 
165   *patternp = pattern; /* return with the new position */
166   return CURLE_OK;
167 }
168 
glob_range(URLGlob * glob,char ** patternp,size_t * posp,unsigned long * amount,int globindex)169 static CURLcode glob_range(URLGlob *glob, char **patternp,
170                            size_t *posp, unsigned long *amount,
171                            int globindex)
172 {
173   /* processes a range expression with the point behind the opening '['
174      - char range: e.g. "a-z]", "B-Q]"
175      - num range: e.g. "0-9]", "17-2000]"
176      - num range with leading zeros: e.g. "001-999]"
177      expression is checked for well-formedness and collected until the next ']'
178   */
179   URLPattern *pat;
180   int rc;
181   char *pattern = *patternp;
182   char *c;
183 
184   pat = &glob->pattern[glob->size];
185   pat->globindex = globindex;
186 
187   if(ISALPHA(*pattern)) {
188     /* character range detected */
189     char min_c;
190     char max_c;
191     char end_c;
192     int step=1;
193 
194     pat->type = UPTCharRange;
195 
196     rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
197 
198     if(rc == 3) {
199       if(end_c == ':') {
200         char *endp;
201         unsigned long lstep;
202         errno = 0;
203         lstep = strtoul(&pattern[4], &endp, 10);
204         if(errno || &pattern[4] == endp || *endp != ']')
205           step = -1;
206         else {
207           pattern = endp+1;
208           step = (int)lstep;
209           if(step > (max_c - min_c))
210             step = -1;
211         }
212       }
213       else if(end_c != ']')
214         /* then this is wrong */
215         rc = 0;
216       else
217         /* end_c == ']' */
218         pattern += 4;
219     }
220 
221     *posp += (pattern - *patternp);
222 
223     if((rc != 3) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) ||
224        (step <= 0) )
225       /* the pattern is not well-formed */
226       return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
227 
228     /* if there was a ":[num]" thing, use that as step or else use 1 */
229     pat->content.CharRange.step = step;
230     pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
231     pat->content.CharRange.max_c = max_c;
232 
233     if(multiply(amount, (pat->content.CharRange.max_c -
234                           pat->content.CharRange.min_c) /
235                          pat->content.CharRange.step + 1) )
236       return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
237   }
238   else if(ISDIGIT(*pattern)) {
239     /* numeric range detected */
240     unsigned long min_n;
241     unsigned long max_n = 0;
242     unsigned long step_n = 0;
243     char *endp;
244 
245     pat->type = UPTNumRange;
246     pat->content.NumRange.padlength = 0;
247 
248     if(*pattern == '0') {
249       /* leading zero specified, count them! */
250       c = pattern;
251       while(ISDIGIT(*c)) {
252         c++;
253         ++pat->content.NumRange.padlength; /* padding length is set for all
254                                               instances of this pattern */
255       }
256     }
257 
258     errno = 0;
259     min_n = strtoul(pattern, &endp, 10);
260     if(errno || (endp == pattern))
261       endp=NULL;
262     else {
263       if(*endp != '-')
264         endp = NULL;
265       else {
266         pattern = endp+1;
267         while(*pattern && ISBLANK(*pattern))
268           pattern++;
269         if(!ISDIGIT(*pattern)) {
270           endp = NULL;
271           goto fail;
272         }
273         errno = 0;
274         max_n = strtoul(pattern, &endp, 10);
275         if(errno || (*endp == ':')) {
276           pattern = endp+1;
277           errno = 0;
278           step_n = strtoul(pattern, &endp, 10);
279           if(errno)
280             /* over/underflow situation */
281             endp = NULL;
282         }
283         else
284           step_n = 1;
285         if(endp && (*endp == ']')) {
286           pattern= endp+1;
287         }
288         else
289           endp = NULL;
290       }
291     }
292 
293     fail:
294     *posp += (pattern - *patternp);
295 
296     if(!endp || (min_n > max_n) || (step_n > (max_n - min_n)) || !step_n)
297       /* the pattern is not well-formed */
298       return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
299 
300     /* typecasting to ints are fine here since we make sure above that we
301        are within 31 bits */
302     pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
303     pat->content.NumRange.max_n = max_n;
304     pat->content.NumRange.step = step_n;
305 
306     if(multiply(amount, (pat->content.NumRange.max_n -
307                          pat->content.NumRange.min_n) /
308                         pat->content.NumRange.step + 1) )
309       return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
310   }
311   else
312     return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
313 
314   *patternp = pattern;
315   return CURLE_OK;
316 }
317 
peek_ipv6(const char * str,size_t * skip)318 static bool peek_ipv6(const char *str, size_t *skip)
319 {
320   /*
321    * Scan for a potential IPv6 literal.
322    * - Valid globs contain a hyphen and <= 1 colon.
323    * - IPv6 literals contain no hyphens and >= 2 colons.
324    */
325   size_t i = 0;
326   size_t colons = 0;
327   if(str[i++] != '[') {
328     return FALSE;
329   }
330   for(;;) {
331     const char c = str[i++];
332     if(ISALNUM(c) || c == '.' || c == '%') {
333       /* ok */
334     }
335     else if(c == ':') {
336       colons++;
337     }
338     else if(c == ']') {
339       *skip = i;
340       return colons >= 2 ? TRUE : FALSE;
341     }
342     else {
343       return FALSE;
344     }
345   }
346 }
347 
glob_parse(URLGlob * glob,char * pattern,size_t pos,unsigned long * amount)348 static CURLcode glob_parse(URLGlob *glob, char *pattern,
349                            size_t pos, unsigned long *amount)
350 {
351   /* processes a literal string component of a URL
352      special characters '{' and '[' branch to set/range processing functions
353    */
354   CURLcode res = CURLE_OK;
355   int globindex = 0; /* count "actual" globs */
356 
357   *amount = 1;
358 
359   while(*pattern && !res) {
360     char *buf = glob->glob_buffer;
361     size_t sublen = 0;
362     while(*pattern && *pattern != '{') {
363       if(*pattern == '[') {
364         /* Skip over potential IPv6 literals. */
365         size_t skip;
366         if(peek_ipv6(pattern, &skip)) {
367           memcpy(buf, pattern, skip);
368           buf += skip;
369           pattern += skip;
370           sublen += skip;
371           continue;
372         }
373         break;
374       }
375       if(*pattern == '}' || *pattern == ']')
376         return GLOBERROR("unmatched close brace/bracket", pos,
377                          CURLE_URL_MALFORMAT);
378 
379       /* only allow \ to escape known "special letters" */
380       if(*pattern == '\\' &&
381          (*(pattern+1) == '{' || *(pattern+1) == '[' ||
382           *(pattern+1) == '}' || *(pattern+1) == ']') ) {
383 
384         /* escape character, skip '\' */
385         ++pattern;
386         ++pos;
387       }
388       *buf++ = *pattern++; /* copy character to literal */
389       ++pos;
390       sublen++;
391     }
392     if(sublen) {
393       /* we got a literal string, add it as a single-item list */
394       *buf = '\0';
395       res = glob_fixed(glob, glob->glob_buffer, sublen);
396     }
397     else {
398       switch (*pattern) {
399       case '\0': /* done  */
400         break;
401 
402       case '{':
403         /* process set pattern */
404         pattern++;
405         pos++;
406         res = glob_set(glob, &pattern, &pos, amount, globindex++);
407         break;
408 
409       case '[':
410         /* process range pattern */
411         pattern++;
412         pos++;
413         res = glob_range(glob, &pattern, &pos, amount, globindex++);
414         break;
415       }
416     }
417 
418     if(++glob->size >= GLOB_PATTERN_NUM)
419       return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
420   }
421   return res;
422 }
423 
glob_url(URLGlob ** glob,char * url,unsigned long * urlnum,FILE * error)424 CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
425                   FILE *error)
426 {
427   /*
428    * We can deal with any-size, just make a buffer with the same length
429    * as the specified URL!
430    */
431   URLGlob *glob_expand;
432   unsigned long amount = 0;
433   char *glob_buffer;
434   CURLcode res;
435 
436   *glob = NULL;
437 
438   glob_buffer = malloc(strlen(url) + 1);
439   if(!glob_buffer)
440     return CURLE_OUT_OF_MEMORY;
441   glob_buffer[0]=0;
442 
443   glob_expand = calloc(1, sizeof(URLGlob));
444   if(!glob_expand) {
445     Curl_safefree(glob_buffer);
446     return CURLE_OUT_OF_MEMORY;
447   }
448   glob_expand->urllen = strlen(url);
449   glob_expand->glob_buffer = glob_buffer;
450 
451   res = glob_parse(glob_expand, url, 1, &amount);
452   if(!res)
453     *urlnum = amount;
454   else {
455     if(error && glob_expand->error) {
456       char text[128];
457       const char *t;
458       if(glob_expand->pos) {
459         snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error,
460                  glob_expand->pos);
461         t = text;
462       }
463       else
464         t = glob_expand->error;
465 
466       /* send error description to the error-stream */
467       fprintf(error, "curl: (%d) [globbing] %s\n", res, t);
468     }
469     /* it failed, we cleanup */
470     glob_cleanup(glob_expand);
471     *urlnum = 1;
472     return res;
473   }
474 
475   *glob = glob_expand;
476   return CURLE_OK;
477 }
478 
glob_cleanup(URLGlob * glob)479 void glob_cleanup(URLGlob* glob)
480 {
481   size_t i;
482   int elem;
483 
484   for(i = 0; i < glob->size; i++) {
485     if((glob->pattern[i].type == UPTSet) &&
486        (glob->pattern[i].content.Set.elements)) {
487       for(elem = glob->pattern[i].content.Set.size - 1;
488           elem >= 0;
489           --elem) {
490         Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
491       }
492       Curl_safefree(glob->pattern[i].content.Set.elements);
493     }
494   }
495   Curl_safefree(glob->glob_buffer);
496   Curl_safefree(glob);
497 }
498 
glob_next_url(char ** globbed,URLGlob * glob)499 CURLcode glob_next_url(char **globbed, URLGlob *glob)
500 {
501   URLPattern *pat;
502   size_t i;
503   size_t len;
504   size_t buflen = glob->urllen + 1;
505   char *buf = glob->glob_buffer;
506 
507   *globbed = NULL;
508 
509   if(!glob->beenhere)
510     glob->beenhere = 1;
511   else {
512     bool carry = TRUE;
513 
514     /* implement a counter over the index ranges of all patterns, starting
515        with the rightmost pattern */
516     for(i = 0; carry && (i < glob->size); i++) {
517       carry = FALSE;
518       pat = &glob->pattern[glob->size - 1 - i];
519       switch (pat->type) {
520       case UPTSet:
521         if((pat->content.Set.elements) &&
522            (++pat->content.Set.ptr_s == pat->content.Set.size)) {
523           pat->content.Set.ptr_s = 0;
524           carry = TRUE;
525         }
526         break;
527       case UPTCharRange:
528         pat->content.CharRange.ptr_c =
529           (char)(pat->content.CharRange.step +
530                  (int)((unsigned char)pat->content.CharRange.ptr_c));
531         if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
532           pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
533           carry = TRUE;
534         }
535         break;
536       case UPTNumRange:
537         pat->content.NumRange.ptr_n += pat->content.NumRange.step;
538         if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
539           pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
540           carry = TRUE;
541         }
542         break;
543       default:
544         printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
545         return CURLE_FAILED_INIT;
546       }
547     }
548     if(carry) {         /* first pattern ptr has run into overflow, done! */
549       /* TODO: verify if this should actally return CURLE_OK. */
550       return CURLE_OK; /* CURLE_OK to match previous behavior */
551     }
552   }
553 
554   for(i = 0; i < glob->size; ++i) {
555     pat = &glob->pattern[i];
556     switch(pat->type) {
557     case UPTSet:
558       if(pat->content.Set.elements) {
559         snprintf(buf, buflen, "%s",
560                  pat->content.Set.elements[pat->content.Set.ptr_s]);
561         len = strlen(buf);
562         buf += len;
563         buflen -= len;
564       }
565       break;
566     case UPTCharRange:
567       if(buflen) {
568         *buf++ = pat->content.CharRange.ptr_c;
569         *buf = '\0';
570         buflen--;
571       }
572       break;
573     case UPTNumRange:
574       snprintf(buf, buflen, "%0*ld",
575                pat->content.NumRange.padlength,
576                pat->content.NumRange.ptr_n);
577       len = strlen(buf);
578       buf += len;
579       buflen -= len;
580       break;
581     default:
582       printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
583       return CURLE_FAILED_INIT;
584     }
585   }
586 
587   *globbed = strdup(glob->glob_buffer);
588   if(!*globbed)
589     return CURLE_OUT_OF_MEMORY;
590 
591   return CURLE_OK;
592 }
593 
glob_match_url(char ** result,char * filename,URLGlob * glob)594 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
595 {
596   char *target;
597   size_t allocsize;
598   char numbuf[18];
599   char *appendthis = NULL;
600   size_t appendlen = 0;
601   size_t stringlen = 0;
602 
603   *result = NULL;
604 
605   /* We cannot use the glob_buffer for storage here since the filename may
606    * be longer than the URL we use. We allocate a good start size, then
607    * we need to realloc in case of need.
608    */
609   allocsize = strlen(filename) + 1; /* make it at least one byte to store the
610                                        trailing zero */
611   target = malloc(allocsize);
612   if(!target)
613     return CURLE_OUT_OF_MEMORY;
614 
615   while(*filename) {
616     if(*filename == '#' && ISDIGIT(filename[1])) {
617       unsigned long i;
618       char *ptr = filename;
619       unsigned long num = strtoul(&filename[1], &filename, 10);
620       URLPattern *pat =NULL;
621 
622       if(num < glob->size) {
623         num--; /* make it zero based */
624         /* find the correct glob entry */
625         for(i=0; i<glob->size; i++) {
626           if(glob->pattern[i].globindex == (int)num) {
627             pat = &glob->pattern[i];
628             break;
629           }
630         }
631       }
632 
633       if(pat) {
634         switch (pat->type) {
635         case UPTSet:
636           if(pat->content.Set.elements) {
637             appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
638             appendlen =
639               strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
640           }
641           break;
642         case UPTCharRange:
643           numbuf[0] = pat->content.CharRange.ptr_c;
644           numbuf[1] = 0;
645           appendthis = numbuf;
646           appendlen = 1;
647           break;
648         case UPTNumRange:
649           snprintf(numbuf, sizeof(numbuf), "%0*d",
650                    pat->content.NumRange.padlength,
651                    pat->content.NumRange.ptr_n);
652           appendthis = numbuf;
653           appendlen = strlen(numbuf);
654           break;
655         default:
656           fprintf(stderr, "internal error: invalid pattern type (%d)\n",
657                   (int)pat->type);
658           Curl_safefree(target);
659           return CURLE_FAILED_INIT;
660         }
661       }
662       else {
663         /* #[num] out of range, use the #[num] in the output */
664         filename = ptr;
665         appendthis = filename++;
666         appendlen = 1;
667       }
668     }
669     else {
670       appendthis = filename++;
671       appendlen = 1;
672     }
673     if(appendlen + stringlen >= allocsize) {
674       char *newstr;
675       /* we append a single byte to allow for the trailing byte to be appended
676          at the end of this function outside the while() loop */
677       allocsize = (appendlen + stringlen) * 2;
678       newstr = realloc(target, allocsize + 1);
679       if(!newstr) {
680         Curl_safefree(target);
681         return CURLE_OUT_OF_MEMORY;
682       }
683       target = newstr;
684     }
685     memcpy(&target[stringlen], appendthis, appendlen);
686     stringlen += appendlen;
687   }
688   target[stringlen]= '\0';
689 
690 #if defined(MSDOS) || defined(WIN32)
691   {
692     char *sanitized;
693     SANITIZEcode sc = sanitize_file_name(&sanitized, target,
694                                          (SANITIZE_ALLOW_PATH |
695                                           SANITIZE_ALLOW_RESERVED));
696     Curl_safefree(target);
697     if(sc)
698       return CURLE_URL_MALFORMAT;
699     target = sanitized;
700   }
701 #endif /* MSDOS || WIN32 */
702 
703   *result = target;
704   return CURLE_OK;
705 }
706