1"""Filename globbing utility.""" 2 3import contextlib 4import os 5import re 6import fnmatch 7import functools 8import itertools 9import operator 10import stat 11import sys 12 13 14__all__ = ["glob", "iglob", "escape", "translate"] 15 16def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, 17 include_hidden=False): 18 """Return a list of paths matching a pathname pattern. 19 20 The pattern may contain simple shell-style wildcards a la 21 fnmatch. Unlike fnmatch, filenames starting with a 22 dot are special cases that are not matched by '*' and '?' 23 patterns by default. 24 25 If `include_hidden` is true, the patterns '*', '?', '**' will match hidden 26 directories. 27 28 If `recursive` is true, the pattern '**' will match any files and 29 zero or more directories and subdirectories. 30 """ 31 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive, 32 include_hidden=include_hidden)) 33 34def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, 35 include_hidden=False): 36 """Return an iterator which yields the paths matching a pathname pattern. 37 38 The pattern may contain simple shell-style wildcards a la 39 fnmatch. However, unlike fnmatch, filenames starting with a 40 dot are special cases that are not matched by '*' and '?' 41 patterns. 42 43 If recursive is true, the pattern '**' will match any files and 44 zero or more directories and subdirectories. 45 """ 46 sys.audit("glob.glob", pathname, recursive) 47 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) 48 if root_dir is not None: 49 root_dir = os.fspath(root_dir) 50 else: 51 root_dir = pathname[:0] 52 it = _iglob(pathname, root_dir, dir_fd, recursive, False, 53 include_hidden=include_hidden) 54 if not pathname or recursive and _isrecursive(pathname[:2]): 55 try: 56 s = next(it) # skip empty string 57 if s: 58 it = itertools.chain((s,), it) 59 except StopIteration: 60 pass 61 return it 62 63def _iglob(pathname, root_dir, dir_fd, recursive, dironly, 64 include_hidden=False): 65 dirname, basename = os.path.split(pathname) 66 if not has_magic(pathname): 67 assert not dironly 68 if basename: 69 if _lexists(_join(root_dir, pathname), dir_fd): 70 yield pathname 71 else: 72 # Patterns ending with a slash should match only directories 73 if _isdir(_join(root_dir, dirname), dir_fd): 74 yield pathname 75 return 76 if not dirname: 77 if recursive and _isrecursive(basename): 78 yield from _glob2(root_dir, basename, dir_fd, dironly, 79 include_hidden=include_hidden) 80 else: 81 yield from _glob1(root_dir, basename, dir_fd, dironly, 82 include_hidden=include_hidden) 83 return 84 # `os.path.split()` returns the argument itself as a dirname if it is a 85 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 86 # contains magic characters (i.e. r'\\?\C:'). 87 if dirname != pathname and has_magic(dirname): 88 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True, 89 include_hidden=include_hidden) 90 else: 91 dirs = [dirname] 92 if has_magic(basename): 93 if recursive and _isrecursive(basename): 94 glob_in_dir = _glob2 95 else: 96 glob_in_dir = _glob1 97 else: 98 glob_in_dir = _glob0 99 for dirname in dirs: 100 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly, 101 include_hidden=include_hidden): 102 yield os.path.join(dirname, name) 103 104# These 2 helper functions non-recursively glob inside a literal directory. 105# They return a list of basenames. _glob1 accepts a pattern while _glob0 106# takes a literal basename (so it only has to check for its existence). 107 108def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): 109 names = _listdir(dirname, dir_fd, dironly) 110 if not (include_hidden or _ishidden(pattern)): 111 names = (x for x in names if not _ishidden(x)) 112 return fnmatch.filter(names, pattern) 113 114def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): 115 if basename: 116 if _lexists(_join(dirname, basename), dir_fd): 117 return [basename] 118 else: 119 # `os.path.split()` returns an empty basename for paths ending with a 120 # directory separator. 'q*x/' should match only directories. 121 if _isdir(dirname, dir_fd): 122 return [basename] 123 return [] 124 125_deprecated_function_message = ( 126 "{name} is deprecated and will be removed in Python {remove}. Use " 127 "glob.glob and pass a directory to its root_dir argument instead." 128) 129 130def glob0(dirname, pattern): 131 import warnings 132 warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15)) 133 return _glob0(dirname, pattern, None, False) 134 135def glob1(dirname, pattern): 136 import warnings 137 warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15)) 138 return _glob1(dirname, pattern, None, False) 139 140# This helper function recursively yields relative pathnames inside a literal 141# directory. 142 143def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): 144 assert _isrecursive(pattern) 145 if not dirname or _isdir(dirname, dir_fd): 146 yield pattern[:0] 147 yield from _rlistdir(dirname, dir_fd, dironly, 148 include_hidden=include_hidden) 149 150# If dironly is false, yields all file names inside a directory. 151# If dironly is true, yields only directory names. 152def _iterdir(dirname, dir_fd, dironly): 153 try: 154 fd = None 155 fsencode = None 156 if dir_fd is not None: 157 if dirname: 158 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd) 159 else: 160 arg = dir_fd 161 if isinstance(dirname, bytes): 162 fsencode = os.fsencode 163 elif dirname: 164 arg = dirname 165 elif isinstance(dirname, bytes): 166 arg = bytes(os.curdir, 'ASCII') 167 else: 168 arg = os.curdir 169 try: 170 with os.scandir(arg) as it: 171 for entry in it: 172 try: 173 if not dironly or entry.is_dir(): 174 if fsencode is not None: 175 yield fsencode(entry.name) 176 else: 177 yield entry.name 178 except OSError: 179 pass 180 finally: 181 if fd is not None: 182 os.close(fd) 183 except OSError: 184 return 185 186def _listdir(dirname, dir_fd, dironly): 187 with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it: 188 return list(it) 189 190# Recursively yields relative pathnames inside a literal directory. 191def _rlistdir(dirname, dir_fd, dironly, include_hidden=False): 192 names = _listdir(dirname, dir_fd, dironly) 193 for x in names: 194 if include_hidden or not _ishidden(x): 195 yield x 196 path = _join(dirname, x) if dirname else x 197 for y in _rlistdir(path, dir_fd, dironly, 198 include_hidden=include_hidden): 199 yield _join(x, y) 200 201 202def _lexists(pathname, dir_fd): 203 # Same as os.path.lexists(), but with dir_fd 204 if dir_fd is None: 205 return os.path.lexists(pathname) 206 try: 207 os.lstat(pathname, dir_fd=dir_fd) 208 except (OSError, ValueError): 209 return False 210 else: 211 return True 212 213def _isdir(pathname, dir_fd): 214 # Same as os.path.isdir(), but with dir_fd 215 if dir_fd is None: 216 return os.path.isdir(pathname) 217 try: 218 st = os.stat(pathname, dir_fd=dir_fd) 219 except (OSError, ValueError): 220 return False 221 else: 222 return stat.S_ISDIR(st.st_mode) 223 224def _join(dirname, basename): 225 # It is common if dirname or basename is empty 226 if not dirname or not basename: 227 return dirname or basename 228 return os.path.join(dirname, basename) 229 230magic_check = re.compile('([*?[])') 231magic_check_bytes = re.compile(b'([*?[])') 232 233def has_magic(s): 234 if isinstance(s, bytes): 235 match = magic_check_bytes.search(s) 236 else: 237 match = magic_check.search(s) 238 return match is not None 239 240def _ishidden(path): 241 return path[0] in ('.', b'.'[0]) 242 243def _isrecursive(pattern): 244 if isinstance(pattern, bytes): 245 return pattern == b'**' 246 else: 247 return pattern == '**' 248 249def escape(pathname): 250 """Escape all special characters. 251 """ 252 # Escaping is done by wrapping any of "*?[" between square brackets. 253 # Metacharacters do not work in the drive part and shouldn't be escaped. 254 drive, pathname = os.path.splitdrive(pathname) 255 if isinstance(pathname, bytes): 256 pathname = magic_check_bytes.sub(br'[\1]', pathname) 257 else: 258 pathname = magic_check.sub(r'[\1]', pathname) 259 return drive + pathname 260 261 262_special_parts = ('', '.', '..') 263_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) 264_no_recurse_symlinks = object() 265 266 267def translate(pat, *, recursive=False, include_hidden=False, seps=None): 268 """Translate a pathname with shell wildcards to a regular expression. 269 270 If `recursive` is true, the pattern segment '**' will match any number of 271 path segments. 272 273 If `include_hidden` is true, wildcards can match path segments beginning 274 with a dot ('.'). 275 276 If a sequence of separator characters is given to `seps`, they will be 277 used to split the pattern into segments and match path separators. If not 278 given, os.path.sep and os.path.altsep (where available) are used. 279 """ 280 if not seps: 281 if os.path.altsep: 282 seps = (os.path.sep, os.path.altsep) 283 else: 284 seps = os.path.sep 285 escaped_seps = ''.join(map(re.escape, seps)) 286 any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps 287 not_sep = f'[^{escaped_seps}]' 288 if include_hidden: 289 one_last_segment = f'{not_sep}+' 290 one_segment = f'{one_last_segment}{any_sep}' 291 any_segments = f'(?:.+{any_sep})?' 292 any_last_segments = '.*' 293 else: 294 one_last_segment = f'[^{escaped_seps}.]{not_sep}*' 295 one_segment = f'{one_last_segment}{any_sep}' 296 any_segments = f'(?:{one_segment})*' 297 any_last_segments = f'{any_segments}(?:{one_last_segment})?' 298 299 results = [] 300 parts = re.split(any_sep, pat) 301 last_part_idx = len(parts) - 1 302 for idx, part in enumerate(parts): 303 if part == '*': 304 results.append(one_segment if idx < last_part_idx else one_last_segment) 305 elif recursive and part == '**': 306 if idx < last_part_idx: 307 if parts[idx + 1] != '**': 308 results.append(any_segments) 309 else: 310 results.append(any_last_segments) 311 else: 312 if part: 313 if not include_hidden and part[0] in '*?': 314 results.append(r'(?!\.)') 315 results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) 316 if idx < last_part_idx: 317 results.append(any_sep) 318 res = ''.join(results) 319 return fr'(?s:{res})\Z' 320 321 322@functools.lru_cache(maxsize=512) 323def _compile_pattern(pat, sep, case_sensitive, recursive=True): 324 """Compile given glob pattern to a re.Pattern object (observing case 325 sensitivity).""" 326 flags = re.NOFLAG if case_sensitive else re.IGNORECASE 327 regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) 328 return re.compile(regex, flags=flags).match 329 330 331class _Globber: 332 """Class providing shell-style pattern matching and globbing. 333 """ 334 335 def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): 336 self.sep = sep 337 self.case_sensitive = case_sensitive 338 self.case_pedantic = case_pedantic 339 self.recursive = recursive 340 341 # Low-level methods 342 343 lstat = operator.methodcaller('lstat') 344 add_slash = operator.methodcaller('joinpath', '') 345 346 @staticmethod 347 def scandir(path): 348 """Emulates os.scandir(), which returns an object that can be used as 349 a context manager. This method is called by walk() and glob(). 350 """ 351 return contextlib.nullcontext(path.iterdir()) 352 353 @staticmethod 354 def concat_path(path, text): 355 """Appends text to the given path. 356 """ 357 return path.with_segments(path._raw_path + text) 358 359 @staticmethod 360 def parse_entry(entry): 361 """Returns the path of an entry yielded from scandir(). 362 """ 363 return entry 364 365 # High-level methods 366 367 def compile(self, pat): 368 return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) 369 370 def selector(self, parts): 371 """Returns a function that selects from a given path, walking and 372 filtering according to the glob-style pattern parts in *parts*. 373 """ 374 if not parts: 375 return self.select_exists 376 part = parts.pop() 377 if self.recursive and part == '**': 378 selector = self.recursive_selector 379 elif part in _special_parts: 380 selector = self.special_selector 381 elif not self.case_pedantic and magic_check.search(part) is None: 382 selector = self.literal_selector 383 else: 384 selector = self.wildcard_selector 385 return selector(part, parts) 386 387 def special_selector(self, part, parts): 388 """Returns a function that selects special children of the given path. 389 """ 390 select_next = self.selector(parts) 391 392 def select_special(path, exists=False): 393 path = self.concat_path(self.add_slash(path), part) 394 return select_next(path, exists) 395 return select_special 396 397 def literal_selector(self, part, parts): 398 """Returns a function that selects a literal descendant of a path. 399 """ 400 401 # Optimization: consume and join any subsequent literal parts here, 402 # rather than leaving them for the next selector. This reduces the 403 # number of string concatenation operations and calls to add_slash(). 404 while parts and magic_check.search(parts[-1]) is None: 405 part += self.sep + parts.pop() 406 407 select_next = self.selector(parts) 408 409 def select_literal(path, exists=False): 410 path = self.concat_path(self.add_slash(path), part) 411 return select_next(path, exists=False) 412 return select_literal 413 414 def wildcard_selector(self, part, parts): 415 """Returns a function that selects direct children of a given path, 416 filtering by pattern. 417 """ 418 419 match = None if part == '*' else self.compile(part) 420 dir_only = bool(parts) 421 if dir_only: 422 select_next = self.selector(parts) 423 424 def select_wildcard(path, exists=False): 425 try: 426 # We must close the scandir() object before proceeding to 427 # avoid exhausting file descriptors when globbing deep trees. 428 with self.scandir(path) as scandir_it: 429 entries = list(scandir_it) 430 except OSError: 431 pass 432 else: 433 for entry in entries: 434 if match is None or match(entry.name): 435 if dir_only: 436 try: 437 if not entry.is_dir(): 438 continue 439 except OSError: 440 continue 441 entry_path = self.parse_entry(entry) 442 if dir_only: 443 yield from select_next(entry_path, exists=True) 444 else: 445 yield entry_path 446 return select_wildcard 447 448 def recursive_selector(self, part, parts): 449 """Returns a function that selects a given path and all its children, 450 recursively, filtering by pattern. 451 """ 452 # Optimization: consume following '**' parts, which have no effect. 453 while parts and parts[-1] == '**': 454 parts.pop() 455 456 # Optimization: consume and join any following non-special parts here, 457 # rather than leaving them for the next selector. They're used to 458 # build a regular expression, which we use to filter the results of 459 # the recursive walk. As a result, non-special pattern segments 460 # following a '**' wildcard don't require additional filesystem access 461 # to expand. 462 follow_symlinks = self.recursive is not _no_recurse_symlinks 463 if follow_symlinks: 464 while parts and parts[-1] not in _special_parts: 465 part += self.sep + parts.pop() 466 467 match = None if part == '**' else self.compile(part) 468 dir_only = bool(parts) 469 select_next = self.selector(parts) 470 471 def select_recursive(path, exists=False): 472 path = self.add_slash(path) 473 match_pos = len(str(path)) 474 if match is None or match(str(path), match_pos): 475 yield from select_next(path, exists) 476 stack = [path] 477 while stack: 478 yield from select_recursive_step(stack, match_pos) 479 480 def select_recursive_step(stack, match_pos): 481 path = stack.pop() 482 try: 483 # We must close the scandir() object before proceeding to 484 # avoid exhausting file descriptors when globbing deep trees. 485 with self.scandir(path) as scandir_it: 486 entries = list(scandir_it) 487 except OSError: 488 pass 489 else: 490 for entry in entries: 491 is_dir = False 492 try: 493 if entry.is_dir(follow_symlinks=follow_symlinks): 494 is_dir = True 495 except OSError: 496 pass 497 498 if is_dir or not dir_only: 499 entry_path = self.parse_entry(entry) 500 if match is None or match(str(entry_path), match_pos): 501 if dir_only: 502 yield from select_next(entry_path, exists=True) 503 else: 504 # Optimization: directly yield the path if this is 505 # last pattern part. 506 yield entry_path 507 if is_dir: 508 stack.append(entry_path) 509 510 return select_recursive 511 512 def select_exists(self, path, exists=False): 513 """Yields the given path, if it exists. 514 """ 515 if exists: 516 # Optimization: this path is already known to exist, e.g. because 517 # it was returned from os.scandir(), so we skip calling lstat(). 518 yield path 519 else: 520 try: 521 self.lstat(path) 522 yield path 523 except OSError: 524 pass 525 526 527class _StringGlobber(_Globber): 528 lstat = staticmethod(os.lstat) 529 scandir = staticmethod(os.scandir) 530 parse_entry = operator.attrgetter('path') 531 concat_path = operator.add 532 533 if os.name == 'nt': 534 @staticmethod 535 def add_slash(pathname): 536 tail = os.path.splitroot(pathname)[2] 537 if not tail or tail[-1] in '\\/': 538 return pathname 539 return f'{pathname}\\' 540 else: 541 @staticmethod 542 def add_slash(pathname): 543 if not pathname or pathname[-1] == '/': 544 return pathname 545 return f'{pathname}/' 546