1"""Filename matching with shell patterns. 2 3fnmatch(FILENAME, PATTERN) matches according to the local convention. 4fnmatchcase(FILENAME, PATTERN) always takes case in account. 5 6The functions operate by translating the pattern into a regular 7expression. They cache the compiled regular expressions for speed. 8 9The function translate(PATTERN) returns a regular expression 10corresponding to PATTERN. (It does not compile it.) 11""" 12import os 13import posixpath 14import re 15import functools 16 17__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] 18 19def fnmatch(name, pat): 20 """Test whether FILENAME matches PATTERN. 21 22 Patterns are Unix shell style: 23 24 * matches everything 25 ? matches any single character 26 [seq] matches any character in seq 27 [!seq] matches any char not in seq 28 29 An initial period in FILENAME is not special. 30 Both FILENAME and PATTERN are first case-normalized 31 if the operating system requires it. 32 If you don't want this, use fnmatchcase(FILENAME, PATTERN). 33 """ 34 name = os.path.normcase(name) 35 pat = os.path.normcase(pat) 36 return fnmatchcase(name, pat) 37 38@functools.lru_cache(maxsize=32768, typed=True) 39def _compile_pattern(pat): 40 if isinstance(pat, bytes): 41 pat_str = str(pat, 'ISO-8859-1') 42 res_str = translate(pat_str) 43 res = bytes(res_str, 'ISO-8859-1') 44 else: 45 res = translate(pat) 46 return re.compile(res).match 47 48def filter(names, pat): 49 """Construct a list from those elements of the iterable NAMES that match PAT.""" 50 result = [] 51 pat = os.path.normcase(pat) 52 match = _compile_pattern(pat) 53 if os.path is posixpath: 54 # normcase on posix is NOP. Optimize it away from the loop. 55 for name in names: 56 if match(name): 57 result.append(name) 58 else: 59 for name in names: 60 if match(os.path.normcase(name)): 61 result.append(name) 62 return result 63 64def fnmatchcase(name, pat): 65 """Test whether FILENAME matches PATTERN, including case. 66 67 This is a version of fnmatch() which doesn't case-normalize 68 its arguments. 69 """ 70 match = _compile_pattern(pat) 71 return match(name) is not None 72 73 74def translate(pat): 75 """Translate a shell PATTERN to a regular expression. 76 77 There is no way to quote meta-characters. 78 """ 79 80 STAR = object() 81 parts = _translate(pat, STAR, '.') 82 return _join_translated_parts(parts, STAR) 83 84 85def _translate(pat, STAR, QUESTION_MARK): 86 res = [] 87 add = res.append 88 i, n = 0, len(pat) 89 while i < n: 90 c = pat[i] 91 i = i+1 92 if c == '*': 93 # compress consecutive `*` into one 94 if (not res) or res[-1] is not STAR: 95 add(STAR) 96 elif c == '?': 97 add(QUESTION_MARK) 98 elif c == '[': 99 j = i 100 if j < n and pat[j] == '!': 101 j = j+1 102 if j < n and pat[j] == ']': 103 j = j+1 104 while j < n and pat[j] != ']': 105 j = j+1 106 if j >= n: 107 add('\\[') 108 else: 109 stuff = pat[i:j] 110 if '-' not in stuff: 111 stuff = stuff.replace('\\', r'\\') 112 else: 113 chunks = [] 114 k = i+2 if pat[i] == '!' else i+1 115 while True: 116 k = pat.find('-', k, j) 117 if k < 0: 118 break 119 chunks.append(pat[i:k]) 120 i = k+1 121 k = k+3 122 chunk = pat[i:j] 123 if chunk: 124 chunks.append(chunk) 125 else: 126 chunks[-1] += '-' 127 # Remove empty ranges -- invalid in RE. 128 for k in range(len(chunks)-1, 0, -1): 129 if chunks[k-1][-1] > chunks[k][0]: 130 chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] 131 del chunks[k] 132 # Escape backslashes and hyphens for set difference (--). 133 # Hyphens that create ranges shouldn't be escaped. 134 stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') 135 for s in chunks) 136 # Escape set operations (&&, ~~ and ||). 137 stuff = re.sub(r'([&~|])', r'\\\1', stuff) 138 i = j+1 139 if not stuff: 140 # Empty range: never match. 141 add('(?!)') 142 elif stuff == '!': 143 # Negated empty range: match any character. 144 add('.') 145 else: 146 if stuff[0] == '!': 147 stuff = '^' + stuff[1:] 148 elif stuff[0] in ('^', '['): 149 stuff = '\\' + stuff 150 add(f'[{stuff}]') 151 else: 152 add(re.escape(c)) 153 assert i == n 154 return res 155 156 157def _join_translated_parts(inp, STAR): 158 # Deal with STARs. 159 res = [] 160 add = res.append 161 i, n = 0, len(inp) 162 # Fixed pieces at the start? 163 while i < n and inp[i] is not STAR: 164 add(inp[i]) 165 i += 1 166 # Now deal with STAR fixed STAR fixed ... 167 # For an interior `STAR fixed` pairing, we want to do a minimal 168 # .*? match followed by `fixed`, with no possibility of backtracking. 169 # Atomic groups ("(?>...)") allow us to spell that directly. 170 # Note: people rely on the undocumented ability to join multiple 171 # translate() results together via "|" to build large regexps matching 172 # "one of many" shell patterns. 173 while i < n: 174 assert inp[i] is STAR 175 i += 1 176 if i == n: 177 add(".*") 178 break 179 assert inp[i] is not STAR 180 fixed = [] 181 while i < n and inp[i] is not STAR: 182 fixed.append(inp[i]) 183 i += 1 184 fixed = "".join(fixed) 185 if i == n: 186 add(".*") 187 add(fixed) 188 else: 189 add(f"(?>.*?{fixed})") 190 assert i == n 191 res = "".join(res) 192 return fr'(?s:{res})\Z' 193