• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Filename globbing utility."""
2
3import contextlib
4import os
5import re
6import fnmatch
7import functools
8import itertools
9import operator
10import stat
11import sys
12
13
14__all__ = ["glob", "iglob", "escape", "translate"]
15
16def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
17        include_hidden=False):
18    """Return a list of paths matching a pathname pattern.
19
20    The pattern may contain simple shell-style wildcards a la
21    fnmatch. Unlike fnmatch, filenames starting with a
22    dot are special cases that are not matched by '*' and '?'
23    patterns by default.
24
25    If `include_hidden` is true, the patterns '*', '?', '**'  will match hidden
26    directories.
27
28    If `recursive` is true, the pattern '**' will match any files and
29    zero or more directories and subdirectories.
30    """
31    return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive,
32                      include_hidden=include_hidden))
33
34def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
35          include_hidden=False):
36    """Return an iterator which yields the paths matching a pathname pattern.
37
38    The pattern may contain simple shell-style wildcards a la
39    fnmatch. However, unlike fnmatch, filenames starting with a
40    dot are special cases that are not matched by '*' and '?'
41    patterns.
42
43    If recursive is true, the pattern '**' will match any files and
44    zero or more directories and subdirectories.
45    """
46    sys.audit("glob.glob", pathname, recursive)
47    sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
48    if root_dir is not None:
49        root_dir = os.fspath(root_dir)
50    else:
51        root_dir = pathname[:0]
52    it = _iglob(pathname, root_dir, dir_fd, recursive, False,
53                include_hidden=include_hidden)
54    if not pathname or recursive and _isrecursive(pathname[:2]):
55        try:
56            s = next(it)  # skip empty string
57            if s:
58                it = itertools.chain((s,), it)
59        except StopIteration:
60            pass
61    return it
62
63def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
64           include_hidden=False):
65    dirname, basename = os.path.split(pathname)
66    if not has_magic(pathname):
67        assert not dironly
68        if basename:
69            if _lexists(_join(root_dir, pathname), dir_fd):
70                yield pathname
71        else:
72            # Patterns ending with a slash should match only directories
73            if _isdir(_join(root_dir, dirname), dir_fd):
74                yield pathname
75        return
76    if not dirname:
77        if recursive and _isrecursive(basename):
78            yield from _glob2(root_dir, basename, dir_fd, dironly,
79                             include_hidden=include_hidden)
80        else:
81            yield from _glob1(root_dir, basename, dir_fd, dironly,
82                              include_hidden=include_hidden)
83        return
84    # `os.path.split()` returns the argument itself as a dirname if it is a
85    # drive or UNC path.  Prevent an infinite recursion if a drive or UNC path
86    # contains magic characters (i.e. r'\\?\C:').
87    if dirname != pathname and has_magic(dirname):
88        dirs = _iglob(dirname, root_dir, dir_fd, recursive, True,
89                      include_hidden=include_hidden)
90    else:
91        dirs = [dirname]
92    if has_magic(basename):
93        if recursive and _isrecursive(basename):
94            glob_in_dir = _glob2
95        else:
96            glob_in_dir = _glob1
97    else:
98        glob_in_dir = _glob0
99    for dirname in dirs:
100        for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly,
101                               include_hidden=include_hidden):
102            yield os.path.join(dirname, name)
103
104# These 2 helper functions non-recursively glob inside a literal directory.
105# They return a list of basenames.  _glob1 accepts a pattern while _glob0
106# takes a literal basename (so it only has to check for its existence).
107
108def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False):
109    names = _listdir(dirname, dir_fd, dironly)
110    if not (include_hidden or _ishidden(pattern)):
111        names = (x for x in names if not _ishidden(x))
112    return fnmatch.filter(names, pattern)
113
114def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
115    if basename:
116        if _lexists(_join(dirname, basename), dir_fd):
117            return [basename]
118    else:
119        # `os.path.split()` returns an empty basename for paths ending with a
120        # directory separator.  'q*x/' should match only directories.
121        if _isdir(dirname, dir_fd):
122            return [basename]
123    return []
124
125_deprecated_function_message = (
126    "{name} is deprecated and will be removed in Python {remove}. Use "
127    "glob.glob and pass a directory to its root_dir argument instead."
128)
129
130def glob0(dirname, pattern):
131    import warnings
132    warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15))
133    return _glob0(dirname, pattern, None, False)
134
135def glob1(dirname, pattern):
136    import warnings
137    warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15))
138    return _glob1(dirname, pattern, None, False)
139
140# This helper function recursively yields relative pathnames inside a literal
141# directory.
142
143def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False):
144    assert _isrecursive(pattern)
145    if not dirname or _isdir(dirname, dir_fd):
146        yield pattern[:0]
147    yield from _rlistdir(dirname, dir_fd, dironly,
148                         include_hidden=include_hidden)
149
150# If dironly is false, yields all file names inside a directory.
151# If dironly is true, yields only directory names.
152def _iterdir(dirname, dir_fd, dironly):
153    try:
154        fd = None
155        fsencode = None
156        if dir_fd is not None:
157            if dirname:
158                fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
159            else:
160                arg = dir_fd
161            if isinstance(dirname, bytes):
162                fsencode = os.fsencode
163        elif dirname:
164            arg = dirname
165        elif isinstance(dirname, bytes):
166            arg = bytes(os.curdir, 'ASCII')
167        else:
168            arg = os.curdir
169        try:
170            with os.scandir(arg) as it:
171                for entry in it:
172                    try:
173                        if not dironly or entry.is_dir():
174                            if fsencode is not None:
175                                yield fsencode(entry.name)
176                            else:
177                                yield entry.name
178                    except OSError:
179                        pass
180        finally:
181            if fd is not None:
182                os.close(fd)
183    except OSError:
184        return
185
186def _listdir(dirname, dir_fd, dironly):
187    with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it:
188        return list(it)
189
190# Recursively yields relative pathnames inside a literal directory.
191def _rlistdir(dirname, dir_fd, dironly, include_hidden=False):
192    names = _listdir(dirname, dir_fd, dironly)
193    for x in names:
194        if include_hidden or not _ishidden(x):
195            yield x
196            path = _join(dirname, x) if dirname else x
197            for y in _rlistdir(path, dir_fd, dironly,
198                               include_hidden=include_hidden):
199                yield _join(x, y)
200
201
202def _lexists(pathname, dir_fd):
203    # Same as os.path.lexists(), but with dir_fd
204    if dir_fd is None:
205        return os.path.lexists(pathname)
206    try:
207        os.lstat(pathname, dir_fd=dir_fd)
208    except (OSError, ValueError):
209        return False
210    else:
211        return True
212
213def _isdir(pathname, dir_fd):
214    # Same as os.path.isdir(), but with dir_fd
215    if dir_fd is None:
216        return os.path.isdir(pathname)
217    try:
218        st = os.stat(pathname, dir_fd=dir_fd)
219    except (OSError, ValueError):
220        return False
221    else:
222        return stat.S_ISDIR(st.st_mode)
223
224def _join(dirname, basename):
225    # It is common if dirname or basename is empty
226    if not dirname or not basename:
227        return dirname or basename
228    return os.path.join(dirname, basename)
229
230magic_check = re.compile('([*?[])')
231magic_check_bytes = re.compile(b'([*?[])')
232
233def has_magic(s):
234    if isinstance(s, bytes):
235        match = magic_check_bytes.search(s)
236    else:
237        match = magic_check.search(s)
238    return match is not None
239
240def _ishidden(path):
241    return path[0] in ('.', b'.'[0])
242
243def _isrecursive(pattern):
244    if isinstance(pattern, bytes):
245        return pattern == b'**'
246    else:
247        return pattern == '**'
248
249def escape(pathname):
250    """Escape all special characters.
251    """
252    # Escaping is done by wrapping any of "*?[" between square brackets.
253    # Metacharacters do not work in the drive part and shouldn't be escaped.
254    drive, pathname = os.path.splitdrive(pathname)
255    if isinstance(pathname, bytes):
256        pathname = magic_check_bytes.sub(br'[\1]', pathname)
257    else:
258        pathname = magic_check.sub(r'[\1]', pathname)
259    return drive + pathname
260
261
262_special_parts = ('', '.', '..')
263_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
264_no_recurse_symlinks = object()
265
266
267def translate(pat, *, recursive=False, include_hidden=False, seps=None):
268    """Translate a pathname with shell wildcards to a regular expression.
269
270    If `recursive` is true, the pattern segment '**' will match any number of
271    path segments.
272
273    If `include_hidden` is true, wildcards can match path segments beginning
274    with a dot ('.').
275
276    If a sequence of separator characters is given to `seps`, they will be
277    used to split the pattern into segments and match path separators. If not
278    given, os.path.sep and os.path.altsep (where available) are used.
279    """
280    if not seps:
281        if os.path.altsep:
282            seps = (os.path.sep, os.path.altsep)
283        else:
284            seps = os.path.sep
285    escaped_seps = ''.join(map(re.escape, seps))
286    any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
287    not_sep = f'[^{escaped_seps}]'
288    if include_hidden:
289        one_last_segment = f'{not_sep}+'
290        one_segment = f'{one_last_segment}{any_sep}'
291        any_segments = f'(?:.+{any_sep})?'
292        any_last_segments = '.*'
293    else:
294        one_last_segment = f'[^{escaped_seps}.]{not_sep}*'
295        one_segment = f'{one_last_segment}{any_sep}'
296        any_segments = f'(?:{one_segment})*'
297        any_last_segments = f'{any_segments}(?:{one_last_segment})?'
298
299    results = []
300    parts = re.split(any_sep, pat)
301    last_part_idx = len(parts) - 1
302    for idx, part in enumerate(parts):
303        if part == '*':
304            results.append(one_segment if idx < last_part_idx else one_last_segment)
305        elif recursive and part == '**':
306            if idx < last_part_idx:
307                if parts[idx + 1] != '**':
308                    results.append(any_segments)
309            else:
310                results.append(any_last_segments)
311        else:
312            if part:
313                if not include_hidden and part[0] in '*?':
314                    results.append(r'(?!\.)')
315                results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep))
316            if idx < last_part_idx:
317                results.append(any_sep)
318    res = ''.join(results)
319    return fr'(?s:{res})\Z'
320
321
322@functools.lru_cache(maxsize=512)
323def _compile_pattern(pat, sep, case_sensitive, recursive=True):
324    """Compile given glob pattern to a re.Pattern object (observing case
325    sensitivity)."""
326    flags = re.NOFLAG if case_sensitive else re.IGNORECASE
327    regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep)
328    return re.compile(regex, flags=flags).match
329
330
331class _Globber:
332    """Class providing shell-style pattern matching and globbing.
333    """
334
335    def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
336        self.sep = sep
337        self.case_sensitive = case_sensitive
338        self.case_pedantic = case_pedantic
339        self.recursive = recursive
340
341    # Low-level methods
342
343    lstat = operator.methodcaller('lstat')
344    add_slash = operator.methodcaller('joinpath', '')
345
346    @staticmethod
347    def scandir(path):
348        """Emulates os.scandir(), which returns an object that can be used as
349        a context manager. This method is called by walk() and glob().
350        """
351        return contextlib.nullcontext(path.iterdir())
352
353    @staticmethod
354    def concat_path(path, text):
355        """Appends text to the given path.
356        """
357        return path.with_segments(path._raw_path + text)
358
359    @staticmethod
360    def parse_entry(entry):
361        """Returns the path of an entry yielded from scandir().
362        """
363        return entry
364
365    # High-level methods
366
367    def compile(self, pat):
368        return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive)
369
370    def selector(self, parts):
371        """Returns a function that selects from a given path, walking and
372        filtering according to the glob-style pattern parts in *parts*.
373        """
374        if not parts:
375            return self.select_exists
376        part = parts.pop()
377        if self.recursive and part == '**':
378            selector = self.recursive_selector
379        elif part in _special_parts:
380            selector = self.special_selector
381        elif not self.case_pedantic and magic_check.search(part) is None:
382            selector = self.literal_selector
383        else:
384            selector = self.wildcard_selector
385        return selector(part, parts)
386
387    def special_selector(self, part, parts):
388        """Returns a function that selects special children of the given path.
389        """
390        select_next = self.selector(parts)
391
392        def select_special(path, exists=False):
393            path = self.concat_path(self.add_slash(path), part)
394            return select_next(path, exists)
395        return select_special
396
397    def literal_selector(self, part, parts):
398        """Returns a function that selects a literal descendant of a path.
399        """
400
401        # Optimization: consume and join any subsequent literal parts here,
402        # rather than leaving them for the next selector. This reduces the
403        # number of string concatenation operations and calls to add_slash().
404        while parts and magic_check.search(parts[-1]) is None:
405            part += self.sep + parts.pop()
406
407        select_next = self.selector(parts)
408
409        def select_literal(path, exists=False):
410            path = self.concat_path(self.add_slash(path), part)
411            return select_next(path, exists=False)
412        return select_literal
413
414    def wildcard_selector(self, part, parts):
415        """Returns a function that selects direct children of a given path,
416        filtering by pattern.
417        """
418
419        match = None if part == '*' else self.compile(part)
420        dir_only = bool(parts)
421        if dir_only:
422            select_next = self.selector(parts)
423
424        def select_wildcard(path, exists=False):
425            try:
426                # We must close the scandir() object before proceeding to
427                # avoid exhausting file descriptors when globbing deep trees.
428                with self.scandir(path) as scandir_it:
429                    entries = list(scandir_it)
430            except OSError:
431                pass
432            else:
433                for entry in entries:
434                    if match is None or match(entry.name):
435                        if dir_only:
436                            try:
437                                if not entry.is_dir():
438                                    continue
439                            except OSError:
440                                continue
441                        entry_path = self.parse_entry(entry)
442                        if dir_only:
443                            yield from select_next(entry_path, exists=True)
444                        else:
445                            yield entry_path
446        return select_wildcard
447
448    def recursive_selector(self, part, parts):
449        """Returns a function that selects a given path and all its children,
450        recursively, filtering by pattern.
451        """
452        # Optimization: consume following '**' parts, which have no effect.
453        while parts and parts[-1] == '**':
454            parts.pop()
455
456        # Optimization: consume and join any following non-special parts here,
457        # rather than leaving them for the next selector. They're used to
458        # build a regular expression, which we use to filter the results of
459        # the recursive walk. As a result, non-special pattern segments
460        # following a '**' wildcard don't require additional filesystem access
461        # to expand.
462        follow_symlinks = self.recursive is not _no_recurse_symlinks
463        if follow_symlinks:
464            while parts and parts[-1] not in _special_parts:
465                part += self.sep + parts.pop()
466
467        match = None if part == '**' else self.compile(part)
468        dir_only = bool(parts)
469        select_next = self.selector(parts)
470
471        def select_recursive(path, exists=False):
472            path = self.add_slash(path)
473            match_pos = len(str(path))
474            if match is None or match(str(path), match_pos):
475                yield from select_next(path, exists)
476            stack = [path]
477            while stack:
478                yield from select_recursive_step(stack, match_pos)
479
480        def select_recursive_step(stack, match_pos):
481            path = stack.pop()
482            try:
483                # We must close the scandir() object before proceeding to
484                # avoid exhausting file descriptors when globbing deep trees.
485                with self.scandir(path) as scandir_it:
486                    entries = list(scandir_it)
487            except OSError:
488                pass
489            else:
490                for entry in entries:
491                    is_dir = False
492                    try:
493                        if entry.is_dir(follow_symlinks=follow_symlinks):
494                            is_dir = True
495                    except OSError:
496                        pass
497
498                    if is_dir or not dir_only:
499                        entry_path = self.parse_entry(entry)
500                        if match is None or match(str(entry_path), match_pos):
501                            if dir_only:
502                                yield from select_next(entry_path, exists=True)
503                            else:
504                                # Optimization: directly yield the path if this is
505                                # last pattern part.
506                                yield entry_path
507                        if is_dir:
508                            stack.append(entry_path)
509
510        return select_recursive
511
512    def select_exists(self, path, exists=False):
513        """Yields the given path, if it exists.
514        """
515        if exists:
516            # Optimization: this path is already known to exist, e.g. because
517            # it was returned from os.scandir(), so we skip calling lstat().
518            yield path
519        else:
520            try:
521                self.lstat(path)
522                yield path
523            except OSError:
524                pass
525
526
527class _StringGlobber(_Globber):
528    lstat = staticmethod(os.lstat)
529    scandir = staticmethod(os.scandir)
530    parse_entry = operator.attrgetter('path')
531    concat_path = operator.add
532
533    if os.name == 'nt':
534        @staticmethod
535        def add_slash(pathname):
536            tail = os.path.splitroot(pathname)[2]
537            if not tail or tail[-1] in '\\/':
538                return pathname
539            return f'{pathname}\\'
540    else:
541        @staticmethod
542        def add_slash(pathname):
543            if not pathname or pathname[-1] == '/':
544                return pathname
545            return f'{pathname}/'
546