• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path.  The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13# Strings representing various path-related bits and pieces.
14# These are primarily for export; internally, they are hardcoded.
15# Should be set before imports for resolving cyclic dependency.
16curdir = '.'
17pardir = '..'
18extsep = '.'
19sep = '/'
20pathsep = ':'
21defpath = '/bin:/usr/bin'
22altsep = None
23devnull = '/dev/null'
24
25import errno
26import os
27import sys
28import stat
29import genericpath
30from genericpath import *
31
32__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
33           "basename","dirname","commonprefix","getsize","getmtime",
34           "getatime","getctime","islink","exists","lexists","isdir","isfile",
35           "ismount", "expanduser","expandvars","normpath","abspath",
36           "samefile","sameopenfile","samestat",
37           "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
38           "devnull","realpath","supports_unicode_filenames","relpath",
39           "commonpath", "isjunction","isdevdrive"]
40
41
42def _get_sep(path):
43    if isinstance(path, bytes):
44        return b'/'
45    else:
46        return '/'
47
48# Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
49# On MS-DOS this may also turn slashes into backslashes; however, other
50# normalizations (such as optimizing '../' away) are not allowed
51# (another function should be defined to do that).
52
53def normcase(s):
54    """Normalize case of pathname.  Has no effect under Posix"""
55    return os.fspath(s)
56
57
58# Return whether a path is absolute.
59# Trivial in Posix, harder on the Mac or MS-DOS.
60
61def isabs(s):
62    """Test whether a path is absolute"""
63    s = os.fspath(s)
64    sep = _get_sep(s)
65    return s.startswith(sep)
66
67
68# Join pathnames.
69# Ignore the previous parts if a part is absolute.
70# Insert a '/' unless the first part is empty or already ends in '/'.
71
72def join(a, *p):
73    """Join two or more pathname components, inserting '/' as needed.
74    If any component is an absolute path, all previous path components
75    will be discarded.  An empty last part will result in a path that
76    ends with a separator."""
77    a = os.fspath(a)
78    sep = _get_sep(a)
79    path = a
80    try:
81        for b in p:
82            b = os.fspath(b)
83            if b.startswith(sep) or not path:
84                path = b
85            elif path.endswith(sep):
86                path += b
87            else:
88                path += sep + b
89    except (TypeError, AttributeError, BytesWarning):
90        genericpath._check_arg_types('join', a, *p)
91        raise
92    return path
93
94
95# Split a path in head (everything up to the last '/') and tail (the
96# rest).  If the path ends in '/', tail will be empty.  If there is no
97# '/' in the path, head  will be empty.
98# Trailing '/'es are stripped from head unless it is the root.
99
100def split(p):
101    """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
102    everything after the final slash.  Either part may be empty."""
103    p = os.fspath(p)
104    sep = _get_sep(p)
105    i = p.rfind(sep) + 1
106    head, tail = p[:i], p[i:]
107    if head and head != sep*len(head):
108        head = head.rstrip(sep)
109    return head, tail
110
111
112# Split a path in root and extension.
113# The extension is everything starting at the last dot in the last
114# pathname component; the root is everything before that.
115# It is always true that root + ext == p.
116
117def splitext(p):
118    p = os.fspath(p)
119    if isinstance(p, bytes):
120        sep = b'/'
121        extsep = b'.'
122    else:
123        sep = '/'
124        extsep = '.'
125    return genericpath._splitext(p, sep, None, extsep)
126splitext.__doc__ = genericpath._splitext.__doc__
127
128# Split a pathname into a drive specification and the rest of the
129# path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
130
131def splitdrive(p):
132    """Split a pathname into drive and path. On Posix, drive is always
133    empty."""
134    p = os.fspath(p)
135    return p[:0], p
136
137
138try:
139    from posix import _path_splitroot_ex as splitroot
140except ImportError:
141    def splitroot(p):
142        """Split a pathname into drive, root and tail.
143
144        The tail contains anything after the root."""
145        p = os.fspath(p)
146        if isinstance(p, bytes):
147            sep = b'/'
148            empty = b''
149        else:
150            sep = '/'
151            empty = ''
152        if p[:1] != sep:
153            # Relative path, e.g.: 'foo'
154            return empty, empty, p
155        elif p[1:2] != sep or p[2:3] == sep:
156            # Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
157            return empty, sep, p[1:]
158        else:
159            # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
160            # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
161            return empty, p[:2], p[2:]
162
163
164# Return the tail (basename) part of a path, same as split(path)[1].
165
166def basename(p):
167    """Returns the final component of a pathname"""
168    p = os.fspath(p)
169    sep = _get_sep(p)
170    i = p.rfind(sep) + 1
171    return p[i:]
172
173
174# Return the head (dirname) part of a path, same as split(path)[0].
175
176def dirname(p):
177    """Returns the directory component of a pathname"""
178    p = os.fspath(p)
179    sep = _get_sep(p)
180    i = p.rfind(sep) + 1
181    head = p[:i]
182    if head and head != sep*len(head):
183        head = head.rstrip(sep)
184    return head
185
186
187# Is a path a mount point?
188# (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
189
190def ismount(path):
191    """Test whether a path is a mount point"""
192    try:
193        s1 = os.lstat(path)
194    except (OSError, ValueError):
195        # It doesn't exist -- so not a mount point. :-)
196        return False
197    else:
198        # A symlink can never be a mount point
199        if stat.S_ISLNK(s1.st_mode):
200            return False
201
202    path = os.fspath(path)
203    if isinstance(path, bytes):
204        parent = join(path, b'..')
205    else:
206        parent = join(path, '..')
207    try:
208        s2 = os.lstat(parent)
209    except OSError:
210        parent = realpath(parent)
211        try:
212            s2 = os.lstat(parent)
213        except OSError:
214            return False
215
216    # path/.. on a different device as path or the same i-node as path
217    return s1.st_dev != s2.st_dev or s1.st_ino == s2.st_ino
218
219
220# Expand paths beginning with '~' or '~user'.
221# '~' means $HOME; '~user' means that user's home directory.
222# If the path doesn't begin with '~', or if the user or $HOME is unknown,
223# the path is returned unchanged (leaving error reporting to whatever
224# function is called with the expanded path as argument).
225# See also module 'glob' for expansion of *, ? and [...] in pathnames.
226# (A function should also be defined to do full *sh-style environment
227# variable expansion.)
228
229def expanduser(path):
230    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
231    do nothing."""
232    path = os.fspath(path)
233    if isinstance(path, bytes):
234        tilde = b'~'
235    else:
236        tilde = '~'
237    if not path.startswith(tilde):
238        return path
239    sep = _get_sep(path)
240    i = path.find(sep, 1)
241    if i < 0:
242        i = len(path)
243    if i == 1:
244        if 'HOME' not in os.environ:
245            try:
246                import pwd
247            except ImportError:
248                # pwd module unavailable, return path unchanged
249                return path
250            try:
251                userhome = pwd.getpwuid(os.getuid()).pw_dir
252            except KeyError:
253                # bpo-10496: if the current user identifier doesn't exist in the
254                # password database, return the path unchanged
255                return path
256        else:
257            userhome = os.environ['HOME']
258    else:
259        try:
260            import pwd
261        except ImportError:
262            # pwd module unavailable, return path unchanged
263            return path
264        name = path[1:i]
265        if isinstance(name, bytes):
266            name = os.fsdecode(name)
267        try:
268            pwent = pwd.getpwnam(name)
269        except KeyError:
270            # bpo-10496: if the user name from the path doesn't exist in the
271            # password database, return the path unchanged
272            return path
273        userhome = pwent.pw_dir
274    # if no user home, return the path unchanged on VxWorks
275    if userhome is None and sys.platform == "vxworks":
276        return path
277    if isinstance(path, bytes):
278        userhome = os.fsencode(userhome)
279    userhome = userhome.rstrip(sep)
280    return (userhome + path[i:]) or sep
281
282
283# Expand paths containing shell variable substitutions.
284# This expands the forms $variable and ${variable} only.
285# Non-existent variables are left unchanged.
286
287_varprog = None
288_varprogb = None
289
290def expandvars(path):
291    """Expand shell variables of form $var and ${var}.  Unknown variables
292    are left unchanged."""
293    path = os.fspath(path)
294    global _varprog, _varprogb
295    if isinstance(path, bytes):
296        if b'$' not in path:
297            return path
298        if not _varprogb:
299            import re
300            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
301        search = _varprogb.search
302        start = b'{'
303        end = b'}'
304        environ = getattr(os, 'environb', None)
305    else:
306        if '$' not in path:
307            return path
308        if not _varprog:
309            import re
310            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
311        search = _varprog.search
312        start = '{'
313        end = '}'
314        environ = os.environ
315    i = 0
316    while True:
317        m = search(path, i)
318        if not m:
319            break
320        i, j = m.span(0)
321        name = m.group(1)
322        if name.startswith(start) and name.endswith(end):
323            name = name[1:-1]
324        try:
325            if environ is None:
326                value = os.fsencode(os.environ[os.fsdecode(name)])
327            else:
328                value = environ[name]
329        except KeyError:
330            i = j
331        else:
332            tail = path[j:]
333            path = path[:i] + value
334            i = len(path)
335            path += tail
336    return path
337
338
339# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
340# It should be understood that this may change the meaning of the path
341# if it contains symbolic links!
342
343try:
344    from posix import _path_normpath as normpath
345
346except ImportError:
347    def normpath(path):
348        """Normalize path, eliminating double slashes, etc."""
349        path = os.fspath(path)
350        if isinstance(path, bytes):
351            sep = b'/'
352            dot = b'.'
353            dotdot = b'..'
354        else:
355            sep = '/'
356            dot = '.'
357            dotdot = '..'
358        if not path:
359            return dot
360        _, initial_slashes, path = splitroot(path)
361        comps = path.split(sep)
362        new_comps = []
363        for comp in comps:
364            if not comp or comp == dot:
365                continue
366            if (comp != dotdot or (not initial_slashes and not new_comps) or
367                 (new_comps and new_comps[-1] == dotdot)):
368                new_comps.append(comp)
369            elif new_comps:
370                new_comps.pop()
371        comps = new_comps
372        path = initial_slashes + sep.join(comps)
373        return path or dot
374
375
376def abspath(path):
377    """Return an absolute path."""
378    path = os.fspath(path)
379    if isinstance(path, bytes):
380        if not path.startswith(b'/'):
381            path = join(os.getcwdb(), path)
382    else:
383        if not path.startswith('/'):
384            path = join(os.getcwd(), path)
385    return normpath(path)
386
387
388# Return a canonical path (i.e. the absolute location of a file on the
389# filesystem).
390
391def realpath(filename, *, strict=False):
392    """Return the canonical path of the specified filename, eliminating any
393symbolic links encountered in the path."""
394    filename = os.fspath(filename)
395    if isinstance(filename, bytes):
396        sep = b'/'
397        curdir = b'.'
398        pardir = b'..'
399        getcwd = os.getcwdb
400    else:
401        sep = '/'
402        curdir = '.'
403        pardir = '..'
404        getcwd = os.getcwd
405
406    # The stack of unresolved path parts. When popped, a special value of None
407    # indicates that a symlink target has been resolved, and that the original
408    # symlink path can be retrieved by popping again. The [::-1] slice is a
409    # very fast way of spelling list(reversed(...)).
410    rest = filename.split(sep)[::-1]
411
412    # Number of unprocessed parts in 'rest'. This can differ from len(rest)
413    # later, because 'rest' might contain markers for unresolved symlinks.
414    part_count = len(rest)
415
416    # The resolved path, which is absolute throughout this function.
417    # Note: getcwd() returns a normalized and symlink-free path.
418    path = sep if filename.startswith(sep) else getcwd()
419
420    # Mapping from symlink paths to *fully resolved* symlink targets. If a
421    # symlink is encountered but not yet resolved, the value is None. This is
422    # used both to detect symlink loops and to speed up repeated traversals of
423    # the same links.
424    seen = {}
425
426    while part_count:
427        name = rest.pop()
428        if name is None:
429            # resolved symlink target
430            seen[rest.pop()] = path
431            continue
432        part_count -= 1
433        if not name or name == curdir:
434            # current dir
435            continue
436        if name == pardir:
437            # parent dir
438            path = path[:path.rindex(sep)] or sep
439            continue
440        if path == sep:
441            newpath = path + name
442        else:
443            newpath = path + sep + name
444        try:
445            st_mode = os.lstat(newpath).st_mode
446            if not stat.S_ISLNK(st_mode):
447                if strict and part_count and not stat.S_ISDIR(st_mode):
448                    raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR),
449                                  newpath)
450                path = newpath
451                continue
452            if newpath in seen:
453                # Already seen this path
454                path = seen[newpath]
455                if path is not None:
456                    # use cached value
457                    continue
458                # The symlink is not resolved, so we must have a symlink loop.
459                if strict:
460                    # Raise OSError(errno.ELOOP)
461                    os.stat(newpath)
462                path = newpath
463                continue
464            target = os.readlink(newpath)
465        except OSError:
466            if strict:
467                raise
468            path = newpath
469            continue
470        # Resolve the symbolic link
471        seen[newpath] = None # not resolved symlink
472        if target.startswith(sep):
473            # Symlink target is absolute; reset resolved path.
474            path = sep
475        # Push the symlink path onto the stack, and signal its specialness by
476        # also pushing None. When these entries are popped, we'll record the
477        # fully-resolved symlink target in the 'seen' mapping.
478        rest.append(newpath)
479        rest.append(None)
480        # Push the unresolved symlink target parts onto the stack.
481        target_parts = target.split(sep)[::-1]
482        rest.extend(target_parts)
483        part_count += len(target_parts)
484
485    return path
486
487
488supports_unicode_filenames = (sys.platform == 'darwin')
489
490def relpath(path, start=None):
491    """Return a relative version of a path"""
492
493    path = os.fspath(path)
494    if not path:
495        raise ValueError("no path specified")
496
497    if isinstance(path, bytes):
498        curdir = b'.'
499        sep = b'/'
500        pardir = b'..'
501    else:
502        curdir = '.'
503        sep = '/'
504        pardir = '..'
505
506    if start is None:
507        start = curdir
508    else:
509        start = os.fspath(start)
510
511    try:
512        start_tail = abspath(start).lstrip(sep)
513        path_tail = abspath(path).lstrip(sep)
514        start_list = start_tail.split(sep) if start_tail else []
515        path_list = path_tail.split(sep) if path_tail else []
516        # Work out how much of the filepath is shared by start and path.
517        i = len(commonprefix([start_list, path_list]))
518
519        rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
520        if not rel_list:
521            return curdir
522        return sep.join(rel_list)
523    except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
524        genericpath._check_arg_types('relpath', path, start)
525        raise
526
527
528# Return the longest common sub-path of the sequence of paths given as input.
529# The paths are not normalized before comparing them (this is the
530# responsibility of the caller). Any trailing separator is stripped from the
531# returned path.
532
533def commonpath(paths):
534    """Given a sequence of path names, returns the longest common sub-path."""
535
536    paths = tuple(map(os.fspath, paths))
537
538    if not paths:
539        raise ValueError('commonpath() arg is an empty sequence')
540
541    if isinstance(paths[0], bytes):
542        sep = b'/'
543        curdir = b'.'
544    else:
545        sep = '/'
546        curdir = '.'
547
548    try:
549        split_paths = [path.split(sep) for path in paths]
550
551        try:
552            isabs, = {p.startswith(sep) for p in paths}
553        except ValueError:
554            raise ValueError("Can't mix absolute and relative paths") from None
555
556        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
557        s1 = min(split_paths)
558        s2 = max(split_paths)
559        common = s1
560        for i, c in enumerate(s1):
561            if c != s2[i]:
562                common = s1[:i]
563                break
564
565        prefix = sep if isabs else sep[:0]
566        return prefix + sep.join(common)
567    except (TypeError, AttributeError):
568        genericpath._check_arg_types('commonpath', *paths)
569        raise
570