1"""distutils.filelist 2 3Provides the FileList class, used for poking about the filesystem 4and building lists of files. 5""" 6 7import os, re 8import fnmatch 9import functools 10from distutils.util import convert_path 11from distutils.errors import DistutilsTemplateError, DistutilsInternalError 12from distutils import log 13 14class FileList: 15 """A list of files built by on exploring the filesystem and filtered by 16 applying various patterns to what we find there. 17 18 Instance attributes: 19 dir 20 directory from which files will be taken -- only used if 21 'allfiles' not supplied to constructor 22 files 23 list of filenames currently being built/filtered/manipulated 24 allfiles 25 complete list of files under consideration (ie. without any 26 filtering applied) 27 """ 28 29 def __init__(self, warn=None, debug_print=None): 30 # ignore argument to FileList, but keep them for backwards 31 # compatibility 32 self.allfiles = None 33 self.files = [] 34 35 def set_allfiles(self, allfiles): 36 self.allfiles = allfiles 37 38 def findall(self, dir=os.curdir): 39 self.allfiles = findall(dir) 40 41 def debug_print(self, msg): 42 """Print 'msg' to stdout if the global DEBUG (taken from the 43 DISTUTILS_DEBUG environment variable) flag is true. 44 """ 45 from distutils.debug import DEBUG 46 if DEBUG: 47 print(msg) 48 49 # -- List-like methods --------------------------------------------- 50 51 def append(self, item): 52 self.files.append(item) 53 54 def extend(self, items): 55 self.files.extend(items) 56 57 def sort(self): 58 # Not a strict lexical sort! 59 sortable_files = sorted(map(os.path.split, self.files)) 60 self.files = [] 61 for sort_tuple in sortable_files: 62 self.files.append(os.path.join(*sort_tuple)) 63 64 65 # -- Other miscellaneous utility methods --------------------------- 66 67 def remove_duplicates(self): 68 # Assumes list has been sorted! 69 for i in range(len(self.files) - 1, 0, -1): 70 if self.files[i] == self.files[i - 1]: 71 del self.files[i] 72 73 74 # -- "File template" methods --------------------------------------- 75 76 def _parse_template_line(self, line): 77 words = line.split() 78 action = words[0] 79 80 patterns = dir = dir_pattern = None 81 82 if action in ('include', 'exclude', 83 'global-include', 'global-exclude'): 84 if len(words) < 2: 85 raise DistutilsTemplateError( 86 "'%s' expects <pattern1> <pattern2> ..." % action) 87 patterns = [convert_path(w) for w in words[1:]] 88 elif action in ('recursive-include', 'recursive-exclude'): 89 if len(words) < 3: 90 raise DistutilsTemplateError( 91 "'%s' expects <dir> <pattern1> <pattern2> ..." % action) 92 dir = convert_path(words[1]) 93 patterns = [convert_path(w) for w in words[2:]] 94 elif action in ('graft', 'prune'): 95 if len(words) != 2: 96 raise DistutilsTemplateError( 97 "'%s' expects a single <dir_pattern>" % action) 98 dir_pattern = convert_path(words[1]) 99 else: 100 raise DistutilsTemplateError("unknown action '%s'" % action) 101 102 return (action, patterns, dir, dir_pattern) 103 104 def process_template_line(self, line): 105 # Parse the line: split it up, make sure the right number of words 106 # is there, and return the relevant words. 'action' is always 107 # defined: it's the first word of the line. Which of the other 108 # three are defined depends on the action; it'll be either 109 # patterns, (dir and patterns), or (dir_pattern). 110 (action, patterns, dir, dir_pattern) = self._parse_template_line(line) 111 112 # OK, now we know that the action is valid and we have the 113 # right number of words on the line for that action -- so we 114 # can proceed with minimal error-checking. 115 if action == 'include': 116 self.debug_print("include " + ' '.join(patterns)) 117 for pattern in patterns: 118 if not self.include_pattern(pattern, anchor=1): 119 log.warn("warning: no files found matching '%s'", 120 pattern) 121 122 elif action == 'exclude': 123 self.debug_print("exclude " + ' '.join(patterns)) 124 for pattern in patterns: 125 if not self.exclude_pattern(pattern, anchor=1): 126 log.warn(("warning: no previously-included files " 127 "found matching '%s'"), pattern) 128 129 elif action == 'global-include': 130 self.debug_print("global-include " + ' '.join(patterns)) 131 for pattern in patterns: 132 if not self.include_pattern(pattern, anchor=0): 133 log.warn(("warning: no files found matching '%s' " 134 "anywhere in distribution"), pattern) 135 136 elif action == 'global-exclude': 137 self.debug_print("global-exclude " + ' '.join(patterns)) 138 for pattern in patterns: 139 if not self.exclude_pattern(pattern, anchor=0): 140 log.warn(("warning: no previously-included files matching " 141 "'%s' found anywhere in distribution"), 142 pattern) 143 144 elif action == 'recursive-include': 145 self.debug_print("recursive-include %s %s" % 146 (dir, ' '.join(patterns))) 147 for pattern in patterns: 148 if not self.include_pattern(pattern, prefix=dir): 149 log.warn(("warning: no files found matching '%s' " 150 "under directory '%s'"), 151 pattern, dir) 152 153 elif action == 'recursive-exclude': 154 self.debug_print("recursive-exclude %s %s" % 155 (dir, ' '.join(patterns))) 156 for pattern in patterns: 157 if not self.exclude_pattern(pattern, prefix=dir): 158 log.warn(("warning: no previously-included files matching " 159 "'%s' found under directory '%s'"), 160 pattern, dir) 161 162 elif action == 'graft': 163 self.debug_print("graft " + dir_pattern) 164 if not self.include_pattern(None, prefix=dir_pattern): 165 log.warn("warning: no directories found matching '%s'", 166 dir_pattern) 167 168 elif action == 'prune': 169 self.debug_print("prune " + dir_pattern) 170 if not self.exclude_pattern(None, prefix=dir_pattern): 171 log.warn(("no previously-included directories found " 172 "matching '%s'"), dir_pattern) 173 else: 174 raise DistutilsInternalError( 175 "this cannot happen: invalid action '%s'" % action) 176 177 178 # -- Filtering/selection methods ----------------------------------- 179 180 def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): 181 """Select strings (presumably filenames) from 'self.files' that 182 match 'pattern', a Unix-style wildcard (glob) pattern. Patterns 183 are not quite the same as implemented by the 'fnmatch' module: '*' 184 and '?' match non-special characters, where "special" is platform- 185 dependent: slash on Unix; colon, slash, and backslash on 186 DOS/Windows; and colon on Mac OS. 187 188 If 'anchor' is true (the default), then the pattern match is more 189 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If 190 'anchor' is false, both of these will match. 191 192 If 'prefix' is supplied, then only filenames starting with 'prefix' 193 (itself a pattern) and ending with 'pattern', with anything in between 194 them, will match. 'anchor' is ignored in this case. 195 196 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and 197 'pattern' is assumed to be either a string containing a regex or a 198 regex object -- no translation is done, the regex is just compiled 199 and used as-is. 200 201 Selected strings will be added to self.files. 202 203 Return True if files are found, False otherwise. 204 """ 205 # XXX docstring lying about what the special chars are? 206 files_found = False 207 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) 208 self.debug_print("include_pattern: applying regex r'%s'" % 209 pattern_re.pattern) 210 211 # delayed loading of allfiles list 212 if self.allfiles is None: 213 self.findall() 214 215 for name in self.allfiles: 216 if pattern_re.search(name): 217 self.debug_print(" adding " + name) 218 self.files.append(name) 219 files_found = True 220 return files_found 221 222 223 def exclude_pattern (self, pattern, 224 anchor=1, prefix=None, is_regex=0): 225 """Remove strings (presumably filenames) from 'files' that match 226 'pattern'. Other parameters are the same as for 227 'include_pattern()', above. 228 The list 'self.files' is modified in place. 229 Return True if files are found, False otherwise. 230 """ 231 files_found = False 232 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) 233 self.debug_print("exclude_pattern: applying regex r'%s'" % 234 pattern_re.pattern) 235 for i in range(len(self.files)-1, -1, -1): 236 if pattern_re.search(self.files[i]): 237 self.debug_print(" removing " + self.files[i]) 238 del self.files[i] 239 files_found = True 240 return files_found 241 242 243# ---------------------------------------------------------------------- 244# Utility functions 245 246def _find_all_simple(path): 247 """ 248 Find all files under 'path' 249 """ 250 results = ( 251 os.path.join(base, file) 252 for base, dirs, files in os.walk(path, followlinks=True) 253 for file in files 254 ) 255 return filter(os.path.isfile, results) 256 257 258def findall(dir=os.curdir): 259 """ 260 Find all files under 'dir' and return the list of full filenames. 261 Unless dir is '.', return full filenames with dir prepended. 262 """ 263 files = _find_all_simple(dir) 264 if dir == os.curdir: 265 make_rel = functools.partial(os.path.relpath, start=dir) 266 files = map(make_rel, files) 267 return list(files) 268 269 270def glob_to_re(pattern): 271 """Translate a shell-like glob pattern to a regular expression; return 272 a string containing the regex. Differs from 'fnmatch.translate()' in 273 that '*' does not match "special characters" (which are 274 platform-specific). 275 """ 276 pattern_re = fnmatch.translate(pattern) 277 278 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which 279 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, 280 # and by extension they shouldn't match such "special characters" under 281 # any OS. So change all non-escaped dots in the RE to match any 282 # character except the special characters (currently: just os.sep). 283 sep = os.sep 284 if os.sep == '\\': 285 # we're using a regex to manipulate a regex, so we need 286 # to escape the backslash twice 287 sep = r'\\\\' 288 escaped = r'\1[^%s]' % sep 289 pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) 290 return pattern_re 291 292 293def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0): 294 """Translate a shell-like wildcard pattern to a compiled regular 295 expression. Return the compiled regex. If 'is_regex' true, 296 then 'pattern' is directly compiled to a regex (if it's a string) 297 or just returned as-is (assumes it's a regex object). 298 """ 299 if is_regex: 300 if isinstance(pattern, str): 301 return re.compile(pattern) 302 else: 303 return pattern 304 305 # ditch start and end characters 306 start, _, end = glob_to_re('_').partition('_') 307 308 if pattern: 309 pattern_re = glob_to_re(pattern) 310 assert pattern_re.startswith(start) and pattern_re.endswith(end) 311 else: 312 pattern_re = '' 313 314 if prefix is not None: 315 prefix_re = glob_to_re(prefix) 316 assert prefix_re.startswith(start) and prefix_re.endswith(end) 317 prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] 318 sep = os.sep 319 if os.sep == '\\': 320 sep = r'\\' 321 pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] 322 pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end) 323 else: # no prefix -- respect anchor flag 324 if anchor: 325 pattern_re = r'%s\A%s' % (start, pattern_re[len(start):]) 326 327 return re.compile(pattern_re) 328