1"""Filename globbing utility.""" 2 3import os 4import re 5import fnmatch 6import sys 7 8__all__ = ["glob", "iglob", "escape"] 9 10def glob(pathname, *, recursive=False): 11 """Return a list of paths matching a pathname pattern. 12 13 The pattern may contain simple shell-style wildcards a la 14 fnmatch. However, unlike fnmatch, filenames starting with a 15 dot are special cases that are not matched by '*' and '?' 16 patterns. 17 18 If recursive is true, the pattern '**' will match any files and 19 zero or more directories and subdirectories. 20 """ 21 return list(iglob(pathname, recursive=recursive)) 22 23def iglob(pathname, *, recursive=False): 24 """Return an iterator which yields the paths matching a pathname pattern. 25 26 The pattern may contain simple shell-style wildcards a la 27 fnmatch. However, unlike fnmatch, filenames starting with a 28 dot are special cases that are not matched by '*' and '?' 29 patterns. 30 31 If recursive is true, the pattern '**' will match any files and 32 zero or more directories and subdirectories. 33 """ 34 sys.audit("glob.glob", pathname, recursive) 35 it = _iglob(pathname, recursive, False) 36 if recursive and _isrecursive(pathname): 37 s = next(it) # skip empty string 38 assert not s 39 return it 40 41def _iglob(pathname, recursive, dironly): 42 dirname, basename = os.path.split(pathname) 43 if not has_magic(pathname): 44 assert not dironly 45 if basename: 46 if os.path.lexists(pathname): 47 yield pathname 48 else: 49 # Patterns ending with a slash should match only directories 50 if os.path.isdir(dirname): 51 yield pathname 52 return 53 if not dirname: 54 if recursive and _isrecursive(basename): 55 yield from _glob2(dirname, basename, dironly) 56 else: 57 yield from _glob1(dirname, basename, dironly) 58 return 59 # `os.path.split()` returns the argument itself as a dirname if it is a 60 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 61 # contains magic characters (i.e. r'\\?\C:'). 62 if dirname != pathname and has_magic(dirname): 63 dirs = _iglob(dirname, recursive, True) 64 else: 65 dirs = [dirname] 66 if has_magic(basename): 67 if recursive and _isrecursive(basename): 68 glob_in_dir = _glob2 69 else: 70 glob_in_dir = _glob1 71 else: 72 glob_in_dir = _glob0 73 for dirname in dirs: 74 for name in glob_in_dir(dirname, basename, dironly): 75 yield os.path.join(dirname, name) 76 77# These 2 helper functions non-recursively glob inside a literal directory. 78# They return a list of basenames. _glob1 accepts a pattern while _glob0 79# takes a literal basename (so it only has to check for its existence). 80 81def _glob1(dirname, pattern, dironly): 82 names = list(_iterdir(dirname, dironly)) 83 if not _ishidden(pattern): 84 names = (x for x in names if not _ishidden(x)) 85 return fnmatch.filter(names, pattern) 86 87def _glob0(dirname, basename, dironly): 88 if not basename: 89 # `os.path.split()` returns an empty basename for paths ending with a 90 # directory separator. 'q*x/' should match only directories. 91 if os.path.isdir(dirname): 92 return [basename] 93 else: 94 if os.path.lexists(os.path.join(dirname, basename)): 95 return [basename] 96 return [] 97 98# Following functions are not public but can be used by third-party code. 99 100def glob0(dirname, pattern): 101 return _glob0(dirname, pattern, False) 102 103def glob1(dirname, pattern): 104 return _glob1(dirname, pattern, False) 105 106# This helper function recursively yields relative pathnames inside a literal 107# directory. 108 109def _glob2(dirname, pattern, dironly): 110 assert _isrecursive(pattern) 111 yield pattern[:0] 112 yield from _rlistdir(dirname, dironly) 113 114# If dironly is false, yields all file names inside a directory. 115# If dironly is true, yields only directory names. 116def _iterdir(dirname, dironly): 117 if not dirname: 118 if isinstance(dirname, bytes): 119 dirname = bytes(os.curdir, 'ASCII') 120 else: 121 dirname = os.curdir 122 try: 123 with os.scandir(dirname) as it: 124 for entry in it: 125 try: 126 if not dironly or entry.is_dir(): 127 yield entry.name 128 except OSError: 129 pass 130 except OSError: 131 return 132 133# Recursively yields relative pathnames inside a literal directory. 134def _rlistdir(dirname, dironly): 135 names = list(_iterdir(dirname, dironly)) 136 for x in names: 137 if not _ishidden(x): 138 yield x 139 path = os.path.join(dirname, x) if dirname else x 140 for y in _rlistdir(path, dironly): 141 yield os.path.join(x, y) 142 143 144magic_check = re.compile('([*?[])') 145magic_check_bytes = re.compile(b'([*?[])') 146 147def has_magic(s): 148 if isinstance(s, bytes): 149 match = magic_check_bytes.search(s) 150 else: 151 match = magic_check.search(s) 152 return match is not None 153 154def _ishidden(path): 155 return path[0] in ('.', b'.'[0]) 156 157def _isrecursive(pattern): 158 if isinstance(pattern, bytes): 159 return pattern == b'**' 160 else: 161 return pattern == '**' 162 163def escape(pathname): 164 """Escape all special characters. 165 """ 166 # Escaping is done by wrapping any of "*?[" between square brackets. 167 # Metacharacters do not work in the drive part and shouldn't be escaped. 168 drive, pathname = os.path.splitdrive(pathname) 169 if isinstance(pathname, bytes): 170 pathname = magic_check_bytes.sub(br'[\1]', pathname) 171 else: 172 pathname = magic_check.sub(r'[\1]', pathname) 173 return drive + pathname 174