1"""Filename globbing utility.""" 2 3import os 4import re 5import fnmatch 6 7__all__ = ["glob", "iglob", "escape"] 8 9def glob(pathname, *, recursive=False): 10 """Return a list of paths matching a pathname pattern. 11 12 The pattern may contain simple shell-style wildcards a la 13 fnmatch. However, unlike fnmatch, filenames starting with a 14 dot are special cases that are not matched by '*' and '?' 15 patterns. 16 17 If recursive is true, the pattern '**' will match any files and 18 zero or more directories and subdirectories. 19 """ 20 return list(iglob(pathname, recursive=recursive)) 21 22def iglob(pathname, *, recursive=False): 23 """Return an iterator which yields the paths matching a pathname pattern. 24 25 The pattern may contain simple shell-style wildcards a la 26 fnmatch. However, unlike fnmatch, filenames starting with a 27 dot are special cases that are not matched by '*' and '?' 28 patterns. 29 30 If recursive is true, the pattern '**' will match any files and 31 zero or more directories and subdirectories. 32 """ 33 it = _iglob(pathname, recursive, False) 34 if recursive and _isrecursive(pathname): 35 s = next(it) # skip empty string 36 assert not s 37 return it 38 39def _iglob(pathname, recursive, dironly): 40 dirname, basename = os.path.split(pathname) 41 if not has_magic(pathname): 42 assert not dironly 43 if basename: 44 if os.path.lexists(pathname): 45 yield pathname 46 else: 47 # Patterns ending with a slash should match only directories 48 if os.path.isdir(dirname): 49 yield pathname 50 return 51 if not dirname: 52 if recursive and _isrecursive(basename): 53 yield from _glob2(dirname, basename, dironly) 54 else: 55 yield from _glob1(dirname, basename, dironly) 56 return 57 # `os.path.split()` returns the argument itself as a dirname if it is a 58 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 59 # contains magic characters (i.e. r'\\?\C:'). 60 if dirname != pathname and has_magic(dirname): 61 dirs = _iglob(dirname, recursive, True) 62 else: 63 dirs = [dirname] 64 if has_magic(basename): 65 if recursive and _isrecursive(basename): 66 glob_in_dir = _glob2 67 else: 68 glob_in_dir = _glob1 69 else: 70 glob_in_dir = _glob0 71 for dirname in dirs: 72 for name in glob_in_dir(dirname, basename, dironly): 73 yield os.path.join(dirname, name) 74 75# These 2 helper functions non-recursively glob inside a literal directory. 76# They return a list of basenames. _glob1 accepts a pattern while _glob0 77# takes a literal basename (so it only has to check for its existence). 78 79def _glob1(dirname, pattern, dironly): 80 names = list(_iterdir(dirname, dironly)) 81 if not _ishidden(pattern): 82 names = (x for x in names if not _ishidden(x)) 83 return fnmatch.filter(names, pattern) 84 85def _glob0(dirname, basename, dironly): 86 if not basename: 87 # `os.path.split()` returns an empty basename for paths ending with a 88 # directory separator. 'q*x/' should match only directories. 89 if os.path.isdir(dirname): 90 return [basename] 91 else: 92 if os.path.lexists(os.path.join(dirname, basename)): 93 return [basename] 94 return [] 95 96# Following functions are not public but can be used by third-party code. 97 98def glob0(dirname, pattern): 99 return _glob0(dirname, pattern, False) 100 101def glob1(dirname, pattern): 102 return _glob1(dirname, pattern, False) 103 104# This helper function recursively yields relative pathnames inside a literal 105# directory. 106 107def _glob2(dirname, pattern, dironly): 108 assert _isrecursive(pattern) 109 yield pattern[:0] 110 yield from _rlistdir(dirname, dironly) 111 112# If dironly is false, yields all file names inside a directory. 113# If dironly is true, yields only directory names. 114def _iterdir(dirname, dironly): 115 if not dirname: 116 if isinstance(dirname, bytes): 117 dirname = bytes(os.curdir, 'ASCII') 118 else: 119 dirname = os.curdir 120 try: 121 with os.scandir(dirname) as it: 122 for entry in it: 123 try: 124 if not dironly or entry.is_dir(): 125 yield entry.name 126 except OSError: 127 pass 128 except OSError: 129 return 130 131# Recursively yields relative pathnames inside a literal directory. 132def _rlistdir(dirname, dironly): 133 names = list(_iterdir(dirname, dironly)) 134 for x in names: 135 if not _ishidden(x): 136 yield x 137 path = os.path.join(dirname, x) if dirname else x 138 for y in _rlistdir(path, dironly): 139 yield os.path.join(x, y) 140 141 142magic_check = re.compile('([*?[])') 143magic_check_bytes = re.compile(b'([*?[])') 144 145def has_magic(s): 146 if isinstance(s, bytes): 147 match = magic_check_bytes.search(s) 148 else: 149 match = magic_check.search(s) 150 return match is not None 151 152def _ishidden(path): 153 return path[0] in ('.', b'.'[0]) 154 155def _isrecursive(pattern): 156 if isinstance(pattern, bytes): 157 return pattern == b'**' 158 else: 159 return pattern == '**' 160 161def escape(pathname): 162 """Escape all special characters. 163 """ 164 # Escaping is done by wrapping any of "*?[" between square brackets. 165 # Metacharacters do not work in the drive part and shouldn't be escaped. 166 drive, pathname = os.path.splitdrive(pathname) 167 if isinstance(pathname, bytes): 168 pathname = magic_check_bytes.sub(br'[\1]', pathname) 169 else: 170 pathname = magic_check.sub(r'[\1]', pathname) 171 return drive + pathname 172