1 /*============================================================================= 2 Copyright (c) 2013 Daniel James 3 4 Use, modification and distribution is subject to the Boost Software 5 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 6 http://www.boost.org/LICENSE_1_0.txt) 7 =============================================================================*/ 8 9 #include "glob.hpp" 10 #include <cassert> 11 12 namespace quickbook 13 { 14 typedef string_iterator glob_iterator; 15 16 void check_glob_range(glob_iterator&, glob_iterator); 17 void check_glob_escape(glob_iterator&, glob_iterator); 18 19 bool match_section( 20 glob_iterator& pattern_begin, 21 glob_iterator pattern_end, 22 glob_iterator& filename_begin, 23 glob_iterator& filename_end); 24 bool match_range( 25 glob_iterator& pattern_begin, glob_iterator pattern_end, char x); 26 27 // Is pattern a glob or a plain file name? 28 // Throws glob_error if pattern is an invalid glob. check_glob(quickbook::string_view pattern)29 bool check_glob(quickbook::string_view pattern) 30 { 31 bool is_glob = false; 32 bool is_ascii = true; 33 34 glob_iterator begin = pattern.begin(); 35 glob_iterator end = pattern.end(); 36 37 while (begin != end) { 38 if (*begin < 32 || (*begin & 0x80)) is_ascii = false; 39 40 switch (*begin) { 41 case '\\': 42 check_glob_escape(begin, end); 43 break; 44 45 case '[': 46 check_glob_range(begin, end); 47 is_glob = true; 48 break; 49 50 case ']': 51 throw glob_error("uneven square brackets"); 52 53 case '?': 54 is_glob = true; 55 ++begin; 56 break; 57 58 case '*': 59 is_glob = true; 60 ++begin; 61 62 if (begin != end && *begin == '*') { 63 throw glob_error("'**' not supported"); 64 } 65 break; 66 67 default: 68 ++begin; 69 } 70 } 71 72 if (is_glob && !is_ascii) 73 throw glob_error("invalid character, globs are ascii only"); 74 75 return is_glob; 76 } 77 check_glob_range(glob_iterator & begin,glob_iterator end)78 void check_glob_range(glob_iterator& begin, glob_iterator end) 79 { 80 assert(begin != end && *begin == '['); 81 ++begin; 82 83 if (*begin == ']') throw glob_error("empty range"); 84 85 while (begin != end) { 86 switch (*begin) { 87 case '\\': 88 ++begin; 89 90 if (begin == end) { 91 throw glob_error("trailing escape"); 92 } 93 else if (*begin == '\\' || *begin == '/') { 94 throw glob_error("contains escaped slash"); 95 } 96 97 ++begin; 98 break; 99 case '[': 100 throw glob_error("nested square brackets"); 101 case ']': 102 ++begin; 103 return; 104 case '/': 105 throw glob_error("slash in square brackets"); 106 default: 107 ++begin; 108 } 109 } 110 111 throw glob_error("uneven square brackets"); 112 } 113 check_glob_escape(glob_iterator & begin,glob_iterator end)114 void check_glob_escape(glob_iterator& begin, glob_iterator end) 115 { 116 assert(begin != end && *begin == '\\'); 117 118 ++begin; 119 120 if (begin == end) { 121 throw glob_error("trailing escape"); 122 } 123 else if (*begin == '\\' || *begin == '/') { 124 throw glob_error("contains escaped slash"); 125 } 126 127 ++begin; 128 } 129 130 // Does filename match pattern? 131 // Might throw glob_error if pattern is an invalid glob, 132 // but should call check_glob first to validate the glob. glob(quickbook::string_view const & pattern,quickbook::string_view const & filename)133 bool glob( 134 quickbook::string_view const& pattern, 135 quickbook::string_view const& filename) 136 { 137 // If there wasn't this special case then '*' would match an 138 // empty string. 139 if (filename.empty()) return pattern.empty(); 140 141 glob_iterator pattern_it = pattern.begin(); 142 glob_iterator pattern_end = pattern.end(); 143 144 glob_iterator filename_it = filename.begin(); 145 glob_iterator filename_end = filename.end(); 146 147 if (!match_section(pattern_it, pattern_end, filename_it, filename_end)) 148 return false; 149 150 while (pattern_it != pattern_end) { 151 assert(*pattern_it == '*'); 152 ++pattern_it; 153 154 if (pattern_it == pattern_end) return true; 155 156 if (*pattern_it == '*') { 157 throw glob_error("'**' not supported"); 158 } 159 160 for (;;) { 161 if (filename_it == filename_end) return false; 162 if (match_section( 163 pattern_it, pattern_end, filename_it, filename_end)) 164 break; 165 ++filename_it; 166 } 167 } 168 169 return filename_it == filename_end; 170 } 171 match_section(glob_iterator & pattern_begin,glob_iterator pattern_end,glob_iterator & filename_begin,glob_iterator & filename_end)172 bool match_section( 173 glob_iterator& pattern_begin, 174 glob_iterator pattern_end, 175 glob_iterator& filename_begin, 176 glob_iterator& filename_end) 177 { 178 glob_iterator pattern_it = pattern_begin; 179 glob_iterator filename_it = filename_begin; 180 181 while (pattern_it != pattern_end && *pattern_it != '*') { 182 if (filename_it == filename_end) return false; 183 184 switch (*pattern_it) { 185 case '*': 186 assert(false); 187 throw new glob_error("Internal error"); 188 case '[': 189 if (!match_range(pattern_it, pattern_end, *filename_it)) 190 return false; 191 ++filename_it; 192 break; 193 case ']': 194 throw glob_error("uneven square brackets"); 195 case '?': 196 ++pattern_it; 197 ++filename_it; 198 break; 199 case '\\': 200 ++pattern_it; 201 if (pattern_it == pattern_end) { 202 throw glob_error("trailing escape"); 203 } 204 else if (*pattern_it == '\\' || *pattern_it == '/') { 205 throw glob_error("contains escaped slash"); 206 } 207 BOOST_FALLTHROUGH; 208 default: 209 if (*pattern_it != *filename_it) return false; 210 ++pattern_it; 211 ++filename_it; 212 } 213 } 214 215 if (pattern_it == pattern_end && filename_it != filename_end) 216 return false; 217 218 pattern_begin = pattern_it; 219 filename_begin = filename_it; 220 return true; 221 } 222 match_range(glob_iterator & pattern_begin,glob_iterator pattern_end,char x)223 bool match_range( 224 glob_iterator& pattern_begin, glob_iterator pattern_end, char x) 225 { 226 assert(pattern_begin != pattern_end && *pattern_begin == '['); 227 ++pattern_begin; 228 if (pattern_begin == pattern_end) { 229 throw glob_error("uneven square brackets"); 230 } 231 232 bool invert_match = false; 233 bool matched = false; 234 235 if (*pattern_begin == '^') { 236 invert_match = true; 237 ++pattern_begin; 238 if (pattern_begin == pattern_end) { 239 throw glob_error("uneven square brackets"); 240 } 241 } 242 else if (*pattern_begin == ']') { 243 throw glob_error("empty range"); 244 } 245 246 // Search for a match 247 for (;;) { 248 unsigned char first = *pattern_begin; 249 ++pattern_begin; 250 if (first == ']') break; 251 if (first == '[') { 252 throw glob_error("nested square brackets"); 253 } 254 if (pattern_begin == pattern_end) { 255 throw glob_error("uneven square brackets"); 256 } 257 258 if (first == '\\') { 259 first = *pattern_begin; 260 if (first == '\\' || first == '/') { 261 throw glob_error("contains escaped slash"); 262 } 263 ++pattern_begin; 264 if (pattern_begin == pattern_end) { 265 throw glob_error("uneven square brackets"); 266 } 267 } 268 else if (first == '/') { 269 throw glob_error("slash in square brackets"); 270 } 271 272 if (*pattern_begin != '-') { 273 matched = matched || (first == x); 274 } 275 else { 276 ++pattern_begin; 277 if (pattern_begin == pattern_end) { 278 throw glob_error("uneven square brackets"); 279 } 280 281 unsigned char second = *pattern_begin; 282 ++pattern_begin; 283 if (second == ']') { 284 matched = matched || (first == x) || (x == '-'); 285 break; 286 } 287 if (pattern_begin == pattern_end) { 288 throw glob_error("uneven square brackets"); 289 } 290 291 if (second == '\\') { 292 second = *pattern_begin; 293 if (second == '\\' || second == '/') { 294 throw glob_error("contains escaped slash"); 295 } 296 ++pattern_begin; 297 if (pattern_begin == pattern_end) { 298 throw glob_error("uneven square brackets"); 299 } 300 } 301 else if (second == '/') { 302 throw glob_error("slash in square brackets"); 303 } 304 305 matched = matched || (first <= x && x <= second); 306 } 307 } 308 309 return invert_match != matched; 310 } 311 find_glob_char(quickbook::string_view pattern,std::size_t pos)312 std::size_t find_glob_char(quickbook::string_view pattern, std::size_t pos) 313 { 314 // Weird style is because quickbook::string_view's find_first_of 315 // doesn't take a position argument. 316 std::size_t removed = 0; 317 318 for (;;) { 319 pos = pattern.find_first_of("[]?*\\"); 320 if (pos == quickbook::string_view::npos) return pos; 321 if (pattern[pos] != '\\') return pos + removed; 322 pattern.remove_prefix(pos + 2); 323 removed += pos + 2; 324 } 325 } 326 glob_unescape(quickbook::string_view pattern)327 std::string glob_unescape(quickbook::string_view pattern) 328 { 329 std::string result; 330 331 for (;;) { 332 std::size_t pos = pattern.find("\\"); 333 if (pos == quickbook::string_view::npos) { 334 result.append(pattern.data(), pattern.size()); 335 break; 336 } 337 338 result.append(pattern.data(), pos); 339 ++pos; 340 if (pos < pattern.size()) { 341 result += pattern[pos]; 342 ++pos; 343 } 344 pattern.remove_prefix(pos); 345 } 346 347 return result; 348 } 349 } 350