1"""Filename matching with shell patterns. 2 3fnmatch(FILENAME, PATTERN) matches according to the local convention. 4fnmatchcase(FILENAME, PATTERN) always takes case in account. 5 6The functions operate by translating the pattern into a regular 7expression. They cache the compiled regular expressions for speed. 8 9The function translate(PATTERN) returns a regular expression 10corresponding to PATTERN. (It does not compile it.) 11""" 12import os 13import posixpath 14import re 15import functools 16 17__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] 18 19# Build a thread-safe incrementing counter to help create unique regexp group 20# names across calls. 21from itertools import count 22_nextgroupnum = count().__next__ 23del count 24 25def fnmatch(name, pat): 26 """Test whether FILENAME matches PATTERN. 27 28 Patterns are Unix shell style: 29 30 * matches everything 31 ? matches any single character 32 [seq] matches any character in seq 33 [!seq] matches any char not in seq 34 35 An initial period in FILENAME is not special. 36 Both FILENAME and PATTERN are first case-normalized 37 if the operating system requires it. 38 If you don't want this, use fnmatchcase(FILENAME, PATTERN). 39 """ 40 name = os.path.normcase(name) 41 pat = os.path.normcase(pat) 42 return fnmatchcase(name, pat) 43 44@functools.lru_cache(maxsize=256, typed=True) 45def _compile_pattern(pat): 46 if isinstance(pat, bytes): 47 pat_str = str(pat, 'ISO-8859-1') 48 res_str = translate(pat_str) 49 res = bytes(res_str, 'ISO-8859-1') 50 else: 51 res = translate(pat) 52 return re.compile(res).match 53 54def filter(names, pat): 55 """Construct a list from those elements of the iterable NAMES that match PAT.""" 56 result = [] 57 pat = os.path.normcase(pat) 58 match = _compile_pattern(pat) 59 if os.path is posixpath: 60 # normcase on posix is NOP. Optimize it away from the loop. 61 for name in names: 62 if match(name): 63 result.append(name) 64 else: 65 for name in names: 66 if match(os.path.normcase(name)): 67 result.append(name) 68 return result 69 70def fnmatchcase(name, pat): 71 """Test whether FILENAME matches PATTERN, including case. 72 73 This is a version of fnmatch() which doesn't case-normalize 74 its arguments. 75 """ 76 match = _compile_pattern(pat) 77 return match(name) is not None 78 79 80def translate(pat): 81 """Translate a shell PATTERN to a regular expression. 82 83 There is no way to quote meta-characters. 84 """ 85 86 STAR = object() 87 res = [] 88 add = res.append 89 i, n = 0, len(pat) 90 while i < n: 91 c = pat[i] 92 i = i+1 93 if c == '*': 94 # compress consecutive `*` into one 95 if (not res) or res[-1] is not STAR: 96 add(STAR) 97 elif c == '?': 98 add('.') 99 elif c == '[': 100 j = i 101 if j < n and pat[j] == '!': 102 j = j+1 103 if j < n and pat[j] == ']': 104 j = j+1 105 while j < n and pat[j] != ']': 106 j = j+1 107 if j >= n: 108 add('\\[') 109 else: 110 stuff = pat[i:j] 111 if '--' not in stuff: 112 stuff = stuff.replace('\\', r'\\') 113 else: 114 chunks = [] 115 k = i+2 if pat[i] == '!' else i+1 116 while True: 117 k = pat.find('-', k, j) 118 if k < 0: 119 break 120 chunks.append(pat[i:k]) 121 i = k+1 122 k = k+3 123 chunks.append(pat[i:j]) 124 # Escape backslashes and hyphens for set difference (--). 125 # Hyphens that create ranges shouldn't be escaped. 126 stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') 127 for s in chunks) 128 # Escape set operations (&&, ~~ and ||). 129 stuff = re.sub(r'([&~|])', r'\\\1', stuff) 130 i = j+1 131 if stuff[0] == '!': 132 stuff = '^' + stuff[1:] 133 elif stuff[0] in ('^', '['): 134 stuff = '\\' + stuff 135 add(f'[{stuff}]') 136 else: 137 add(re.escape(c)) 138 assert i == n 139 140 # Deal with STARs. 141 inp = res 142 res = [] 143 add = res.append 144 i, n = 0, len(inp) 145 # Fixed pieces at the start? 146 while i < n and inp[i] is not STAR: 147 add(inp[i]) 148 i += 1 149 # Now deal with STAR fixed STAR fixed ... 150 # For an interior `STAR fixed` pairing, we want to do a minimal 151 # .*? match followed by `fixed`, with no possibility of backtracking. 152 # We can't spell that directly, but can trick it into working by matching 153 # .*?fixed 154 # in a lookahead assertion, save the matched part in a group, then 155 # consume that group via a backreference. If the overall match fails, 156 # the lookahead assertion won't try alternatives. So the translation is: 157 # (?=(?P<name>.*?fixed))(?P=name) 158 # Group names are created as needed: g0, g1, g2, ... 159 # The numbers are obtained from _nextgroupnum() to ensure they're unique 160 # across calls and across threads. This is because people rely on the 161 # undocumented ability to join multiple translate() results together via 162 # "|" to build large regexps matching "one of many" shell patterns. 163 while i < n: 164 assert inp[i] is STAR 165 i += 1 166 if i == n: 167 add(".*") 168 break 169 assert inp[i] is not STAR 170 fixed = [] 171 while i < n and inp[i] is not STAR: 172 fixed.append(inp[i]) 173 i += 1 174 fixed = "".join(fixed) 175 if i == n: 176 add(".*") 177 add(fixed) 178 else: 179 groupnum = _nextgroupnum() 180 add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})") 181 assert i == n 182 res = "".join(res) 183 return fr'(?s:{res})\Z' 184