• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Copyright (c) 2013 Daniel James
3 
4     Use, modification and distribution is subject to the Boost Software
5     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6     http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 
9 #include "glob.hpp"
10 #include <cassert>
11 
12 namespace quickbook
13 {
14     typedef string_iterator glob_iterator;
15 
16     void check_glob_range(glob_iterator&, glob_iterator);
17     void check_glob_escape(glob_iterator&, glob_iterator);
18 
19     bool match_section(
20         glob_iterator& pattern_begin,
21         glob_iterator pattern_end,
22         glob_iterator& filename_begin,
23         glob_iterator& filename_end);
24     bool match_range(
25         glob_iterator& pattern_begin, glob_iterator pattern_end, char x);
26 
27     // Is pattern a glob or a plain file name?
28     // Throws glob_error if pattern is an invalid glob.
check_glob(quickbook::string_view pattern)29     bool check_glob(quickbook::string_view pattern)
30     {
31         bool is_glob = false;
32         bool is_ascii = true;
33 
34         glob_iterator begin = pattern.begin();
35         glob_iterator end = pattern.end();
36 
37         while (begin != end) {
38             if (*begin < 32 || (*begin & 0x80)) is_ascii = false;
39 
40             switch (*begin) {
41             case '\\':
42                 check_glob_escape(begin, end);
43                 break;
44 
45             case '[':
46                 check_glob_range(begin, end);
47                 is_glob = true;
48                 break;
49 
50             case ']':
51                 throw glob_error("uneven square brackets");
52 
53             case '?':
54                 is_glob = true;
55                 ++begin;
56                 break;
57 
58             case '*':
59                 is_glob = true;
60                 ++begin;
61 
62                 if (begin != end && *begin == '*') {
63                     throw glob_error("'**' not supported");
64                 }
65                 break;
66 
67             default:
68                 ++begin;
69             }
70         }
71 
72         if (is_glob && !is_ascii)
73             throw glob_error("invalid character, globs are ascii only");
74 
75         return is_glob;
76     }
77 
check_glob_range(glob_iterator & begin,glob_iterator end)78     void check_glob_range(glob_iterator& begin, glob_iterator end)
79     {
80         assert(begin != end && *begin == '[');
81         ++begin;
82 
83         if (*begin == ']') throw glob_error("empty range");
84 
85         while (begin != end) {
86             switch (*begin) {
87             case '\\':
88                 ++begin;
89 
90                 if (begin == end) {
91                     throw glob_error("trailing escape");
92                 }
93                 else if (*begin == '\\' || *begin == '/') {
94                     throw glob_error("contains escaped slash");
95                 }
96 
97                 ++begin;
98                 break;
99             case '[':
100                 throw glob_error("nested square brackets");
101             case ']':
102                 ++begin;
103                 return;
104             case '/':
105                 throw glob_error("slash in square brackets");
106             default:
107                 ++begin;
108             }
109         }
110 
111         throw glob_error("uneven square brackets");
112     }
113 
check_glob_escape(glob_iterator & begin,glob_iterator end)114     void check_glob_escape(glob_iterator& begin, glob_iterator end)
115     {
116         assert(begin != end && *begin == '\\');
117 
118         ++begin;
119 
120         if (begin == end) {
121             throw glob_error("trailing escape");
122         }
123         else if (*begin == '\\' || *begin == '/') {
124             throw glob_error("contains escaped slash");
125         }
126 
127         ++begin;
128     }
129 
130     // Does filename match pattern?
131     // Might throw glob_error if pattern is an invalid glob,
132     // but should call check_glob first to validate the glob.
glob(quickbook::string_view const & pattern,quickbook::string_view const & filename)133     bool glob(
134         quickbook::string_view const& pattern,
135         quickbook::string_view const& filename)
136     {
137         // If there wasn't this special case then '*' would match an
138         // empty string.
139         if (filename.empty()) return pattern.empty();
140 
141         glob_iterator pattern_it = pattern.begin();
142         glob_iterator pattern_end = pattern.end();
143 
144         glob_iterator filename_it = filename.begin();
145         glob_iterator filename_end = filename.end();
146 
147         if (!match_section(pattern_it, pattern_end, filename_it, filename_end))
148             return false;
149 
150         while (pattern_it != pattern_end) {
151             assert(*pattern_it == '*');
152             ++pattern_it;
153 
154             if (pattern_it == pattern_end) return true;
155 
156             if (*pattern_it == '*') {
157                 throw glob_error("'**' not supported");
158             }
159 
160             for (;;) {
161                 if (filename_it == filename_end) return false;
162                 if (match_section(
163                         pattern_it, pattern_end, filename_it, filename_end))
164                     break;
165                 ++filename_it;
166             }
167         }
168 
169         return filename_it == filename_end;
170     }
171 
match_section(glob_iterator & pattern_begin,glob_iterator pattern_end,glob_iterator & filename_begin,glob_iterator & filename_end)172     bool match_section(
173         glob_iterator& pattern_begin,
174         glob_iterator pattern_end,
175         glob_iterator& filename_begin,
176         glob_iterator& filename_end)
177     {
178         glob_iterator pattern_it = pattern_begin;
179         glob_iterator filename_it = filename_begin;
180 
181         while (pattern_it != pattern_end && *pattern_it != '*') {
182             if (filename_it == filename_end) return false;
183 
184             switch (*pattern_it) {
185             case '*':
186                 assert(false);
187                 throw new glob_error("Internal error");
188             case '[':
189                 if (!match_range(pattern_it, pattern_end, *filename_it))
190                     return false;
191                 ++filename_it;
192                 break;
193             case ']':
194                 throw glob_error("uneven square brackets");
195             case '?':
196                 ++pattern_it;
197                 ++filename_it;
198                 break;
199             case '\\':
200                 ++pattern_it;
201                 if (pattern_it == pattern_end) {
202                     throw glob_error("trailing escape");
203                 }
204                 else if (*pattern_it == '\\' || *pattern_it == '/') {
205                     throw glob_error("contains escaped slash");
206                 }
207                 BOOST_FALLTHROUGH;
208             default:
209                 if (*pattern_it != *filename_it) return false;
210                 ++pattern_it;
211                 ++filename_it;
212             }
213         }
214 
215         if (pattern_it == pattern_end && filename_it != filename_end)
216             return false;
217 
218         pattern_begin = pattern_it;
219         filename_begin = filename_it;
220         return true;
221     }
222 
match_range(glob_iterator & pattern_begin,glob_iterator pattern_end,char x)223     bool match_range(
224         glob_iterator& pattern_begin, glob_iterator pattern_end, char x)
225     {
226         assert(pattern_begin != pattern_end && *pattern_begin == '[');
227         ++pattern_begin;
228         if (pattern_begin == pattern_end) {
229             throw glob_error("uneven square brackets");
230         }
231 
232         bool invert_match = false;
233         bool matched = false;
234 
235         if (*pattern_begin == '^') {
236             invert_match = true;
237             ++pattern_begin;
238             if (pattern_begin == pattern_end) {
239                 throw glob_error("uneven square brackets");
240             }
241         }
242         else if (*pattern_begin == ']') {
243             throw glob_error("empty range");
244         }
245 
246         // Search for a match
247         for (;;) {
248             unsigned char first = *pattern_begin;
249             ++pattern_begin;
250             if (first == ']') break;
251             if (first == '[') {
252                 throw glob_error("nested square brackets");
253             }
254             if (pattern_begin == pattern_end) {
255                 throw glob_error("uneven square brackets");
256             }
257 
258             if (first == '\\') {
259                 first = *pattern_begin;
260                 if (first == '\\' || first == '/') {
261                     throw glob_error("contains escaped slash");
262                 }
263                 ++pattern_begin;
264                 if (pattern_begin == pattern_end) {
265                     throw glob_error("uneven square brackets");
266                 }
267             }
268             else if (first == '/') {
269                 throw glob_error("slash in square brackets");
270             }
271 
272             if (*pattern_begin != '-') {
273                 matched = matched || (first == x);
274             }
275             else {
276                 ++pattern_begin;
277                 if (pattern_begin == pattern_end) {
278                     throw glob_error("uneven square brackets");
279                 }
280 
281                 unsigned char second = *pattern_begin;
282                 ++pattern_begin;
283                 if (second == ']') {
284                     matched = matched || (first == x) || (x == '-');
285                     break;
286                 }
287                 if (pattern_begin == pattern_end) {
288                     throw glob_error("uneven square brackets");
289                 }
290 
291                 if (second == '\\') {
292                     second = *pattern_begin;
293                     if (second == '\\' || second == '/') {
294                         throw glob_error("contains escaped slash");
295                     }
296                     ++pattern_begin;
297                     if (pattern_begin == pattern_end) {
298                         throw glob_error("uneven square brackets");
299                     }
300                 }
301                 else if (second == '/') {
302                     throw glob_error("slash in square brackets");
303                 }
304 
305                 matched = matched || (first <= x && x <= second);
306             }
307         }
308 
309         return invert_match != matched;
310     }
311 
find_glob_char(quickbook::string_view pattern,std::size_t pos)312     std::size_t find_glob_char(quickbook::string_view pattern, std::size_t pos)
313     {
314         // Weird style is because quickbook::string_view's find_first_of
315         // doesn't take a position argument.
316         std::size_t removed = 0;
317 
318         for (;;) {
319             pos = pattern.find_first_of("[]?*\\");
320             if (pos == quickbook::string_view::npos) return pos;
321             if (pattern[pos] != '\\') return pos + removed;
322             pattern.remove_prefix(pos + 2);
323             removed += pos + 2;
324         }
325     }
326 
glob_unescape(quickbook::string_view pattern)327     std::string glob_unescape(quickbook::string_view pattern)
328     {
329         std::string result;
330 
331         for (;;) {
332             std::size_t pos = pattern.find("\\");
333             if (pos == quickbook::string_view::npos) {
334                 result.append(pattern.data(), pattern.size());
335                 break;
336             }
337 
338             result.append(pattern.data(), pos);
339             ++pos;
340             if (pos < pattern.size()) {
341                 result += pattern[pos];
342                 ++pos;
343             }
344             pattern.remove_prefix(pos);
345         }
346 
347         return result;
348     }
349 }
350