• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 """Common operations on Posix pathnames.
2 
3 Instead of importing this module directly, import os and refer to
4 this module as os.path.  The "os.path" name is an alias for this
5 module on Posix systems; on other systems (e.g. Windows),
6 os.path provides the same operations in a manner specific to that
7 platform, and is an alias to another module (e.g. ntpath).
8 
9 Some of this can actually be useful on non-Posix systems too, e.g.
10 for manipulation of the pathname component of URLs.
11 """
12 
13 # Strings representing various path-related bits and pieces.
14 # These are primarily for export; internally, they are hardcoded.
15 # Should be set before imports for resolving cyclic dependency.
16 curdir = '.'
17 pardir = '..'
18 extsep = '.'
19 sep = '/'
20 pathsep = ':'
21 defpath = '/bin:/usr/bin'
22 altsep = None
23 devnull = '/dev/null'
24 
25 import os
26 import sys
27 import stat
28 import genericpath
29 from genericpath import *
30 
31 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
32            "basename","dirname","commonprefix","getsize","getmtime",
33            "getatime","getctime","islink","exists","lexists","isdir","isfile",
34            "ismount", "expanduser","expandvars","normpath","abspath",
35            "samefile","sameopenfile","samestat",
36            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
37            "devnull","realpath","supports_unicode_filenames","relpath",
38            "commonpath"]
39 
40 
41 def _get_sep(path):
42     if isinstance(path, bytes):
43         return b'/'
44     else:
45         return '/'
46 
47 # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
48 # On MS-DOS this may also turn slashes into backslashes; however, other
49 # normalizations (such as optimizing '../' away) are not allowed
50 # (another function should be defined to do that).
51 
52 def normcase(s):
53     """Normalize case of pathname.  Has no effect under Posix"""
54     return os.fspath(s)
55 
56 
57 # Return whether a path is absolute.
58 # Trivial in Posix, harder on the Mac or MS-DOS.
59 
60 def isabs(s):
61     """Test whether a path is absolute"""
62     s = os.fspath(s)
63     sep = _get_sep(s)
64     return s.startswith(sep)
65 
66 
67 # Join pathnames.
68 # Ignore the previous parts if a part is absolute.
69 # Insert a '/' unless the first part is empty or already ends in '/'.
70 
71 def join(a, *p):
72     """Join two or more pathname components, inserting '/' as needed.
73     If any component is an absolute path, all previous path components
74     will be discarded.  An empty last part will result in a path that
75     ends with a separator."""
76     a = os.fspath(a)
77     sep = _get_sep(a)
78     path = a
79     try:
80         if not p:
81             path[:0] + sep  #23780: Ensure compatible data type even if p is null.
82         for b in map(os.fspath, p):
83             if b.startswith(sep):
84                 path = b
85             elif not path or path.endswith(sep):
86                 path += b
87             else:
88                 path += sep + b
89     except (TypeError, AttributeError, BytesWarning):
90         genericpath._check_arg_types('join', a, *p)
91         raise
92     return path
93 
94 
95 # Split a path in head (everything up to the last '/') and tail (the
96 # rest).  If the path ends in '/', tail will be empty.  If there is no
97 # '/' in the path, head  will be empty.
98 # Trailing '/'es are stripped from head unless it is the root.
99 
100 def split(p):
101     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
102     everything after the final slash.  Either part may be empty."""
103     p = os.fspath(p)
104     sep = _get_sep(p)
105     i = p.rfind(sep) + 1
106     head, tail = p[:i], p[i:]
107     if head and head != sep*len(head):
108         head = head.rstrip(sep)
109     return head, tail
110 
111 
112 # Split a path in root and extension.
113 # The extension is everything starting at the last dot in the last
114 # pathname component; the root is everything before that.
115 # It is always true that root + ext == p.
116 
117 def splitext(p):
118     p = os.fspath(p)
119     if isinstance(p, bytes):
120         sep = b'/'
121         extsep = b'.'
122     else:
123         sep = '/'
124         extsep = '.'
125     return genericpath._splitext(p, sep, None, extsep)
126 splitext.__doc__ = genericpath._splitext.__doc__
127 
128 # Split a pathname into a drive specification and the rest of the
129 # path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
130 
131 def splitdrive(p):
132     """Split a pathname into drive and path. On Posix, drive is always
133     empty."""
134     p = os.fspath(p)
135     return p[:0], p
136 
137 
138 # Return the tail (basename) part of a path, same as split(path)[1].
139 
140 def basename(p):
141     """Returns the final component of a pathname"""
142     p = os.fspath(p)
143     sep = _get_sep(p)
144     i = p.rfind(sep) + 1
145     return p[i:]
146 
147 
148 # Return the head (dirname) part of a path, same as split(path)[0].
149 
150 def dirname(p):
151     """Returns the directory component of a pathname"""
152     p = os.fspath(p)
153     sep = _get_sep(p)
154     i = p.rfind(sep) + 1
155     head = p[:i]
156     if head and head != sep*len(head):
157         head = head.rstrip(sep)
158     return head
159 
160 
161 # Is a path a symbolic link?
162 # This will always return false on systems where os.lstat doesn't exist.
163 
164 def islink(path):
165     """Test whether a path is a symbolic link"""
166     try:
167         st = os.lstat(path)
168     except (OSError, ValueError, AttributeError):
169         return False
170     return stat.S_ISLNK(st.st_mode)
171 
172 # Being true for dangling symbolic links is also useful.
173 
174 def lexists(path):
175     """Test whether a path exists.  Returns True for broken symbolic links"""
176     try:
177         os.lstat(path)
178     except (OSError, ValueError):
179         return False
180     return True
181 
182 
183 # Is a path a mount point?
184 # (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
185 
186 def ismount(path):
187     """Test whether a path is a mount point"""
188     try:
189         s1 = os.lstat(path)
190     except (OSError, ValueError):
191         # It doesn't exist -- so not a mount point. :-)
192         return False
193     else:
194         # A symlink can never be a mount point
195         if stat.S_ISLNK(s1.st_mode):
196             return False
197 
198     if isinstance(path, bytes):
199         parent = join(path, b'..')
200     else:
201         parent = join(path, '..')
202     parent = realpath(parent)
203     try:
204         s2 = os.lstat(parent)
205     except (OSError, ValueError):
206         return False
207 
208     dev1 = s1.st_dev
209     dev2 = s2.st_dev
210     if dev1 != dev2:
211         return True     # path/.. on a different device as path
212     ino1 = s1.st_ino
213     ino2 = s2.st_ino
214     if ino1 == ino2:
215         return True     # path/.. is the same i-node as path
216     return False
217 
218 
219 # Expand paths beginning with '~' or '~user'.
220 # '~' means $HOME; '~user' means that user's home directory.
221 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
222 # the path is returned unchanged (leaving error reporting to whatever
223 # function is called with the expanded path as argument).
224 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
225 # (A function should also be defined to do full *sh-style environment
226 # variable expansion.)
227 
228 def expanduser(path):
229     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
230     do nothing."""
231     path = os.fspath(path)
232     if isinstance(path, bytes):
233         tilde = b'~'
234     else:
235         tilde = '~'
236     if not path.startswith(tilde):
237         return path
238     sep = _get_sep(path)
239     i = path.find(sep, 1)
240     if i < 0:
241         i = len(path)
242     if i == 1:
243         if 'HOME' not in os.environ:
244             import pwd
245             try:
246                 userhome = pwd.getpwuid(os.getuid()).pw_dir
247             except KeyError:
248                 # bpo-10496: if the current user identifier doesn't exist in the
249                 # password database, return the path unchanged
250                 return path
251         else:
252             userhome = os.environ['HOME']
253     else:
254         import pwd
255         name = path[1:i]
256         if isinstance(name, bytes):
257             name = str(name, 'ASCII')
258         try:
259             pwent = pwd.getpwnam(name)
260         except KeyError:
261             # bpo-10496: if the user name from the path doesn't exist in the
262             # password database, return the path unchanged
263             return path
264         userhome = pwent.pw_dir
265     # if no user home, return the path unchanged on VxWorks
266     if userhome is None and sys.platform == "vxworks":
267         return path
268     if isinstance(path, bytes):
269         userhome = os.fsencode(userhome)
270         root = b'/'
271     else:
272         root = '/'
273     userhome = userhome.rstrip(root)
274     return (userhome + path[i:]) or root
275 
276 
277 # Expand paths containing shell variable substitutions.
278 # This expands the forms $variable and ${variable} only.
279 # Non-existent variables are left unchanged.
280 
281 _varprog = None
282 _varprogb = None
283 
284 def expandvars(path):
285     """Expand shell variables of form $var and ${var}.  Unknown variables
286     are left unchanged."""
287     path = os.fspath(path)
288     global _varprog, _varprogb
289     if isinstance(path, bytes):
290         if b'$' not in path:
291             return path
292         if not _varprogb:
293             import re
294             _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
295         search = _varprogb.search
296         start = b'{'
297         end = b'}'
298         environ = getattr(os, 'environb', None)
299     else:
300         if '$' not in path:
301             return path
302         if not _varprog:
303             import re
304             _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
305         search = _varprog.search
306         start = '{'
307         end = '}'
308         environ = os.environ
309     i = 0
310     while True:
311         m = search(path, i)
312         if not m:
313             break
314         i, j = m.span(0)
315         name = m.group(1)
316         if name.startswith(start) and name.endswith(end):
317             name = name[1:-1]
318         try:
319             if environ is None:
320                 value = os.fsencode(os.environ[os.fsdecode(name)])
321             else:
322                 value = environ[name]
323         except KeyError:
324             i = j
325         else:
326             tail = path[j:]
327             path = path[:i] + value
328             i = len(path)
329             path += tail
330     return path
331 
332 
333 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
334 # It should be understood that this may change the meaning of the path
335 # if it contains symbolic links!
336 
337 def normpath(path):
338     """Normalize path, eliminating double slashes, etc."""
339     path = os.fspath(path)
340     if isinstance(path, bytes):
341         sep = b'/'
342         empty = b''
343         dot = b'.'
344         dotdot = b'..'
345     else:
346         sep = '/'
347         empty = ''
348         dot = '.'
349         dotdot = '..'
350     if path == empty:
351         return dot
352     initial_slashes = path.startswith(sep)
353     # POSIX allows one or two initial slashes, but treats three or more
354     # as single slash.
355     # (see http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)
356     if (initial_slashes and
357         path.startswith(sep*2) and not path.startswith(sep*3)):
358         initial_slashes = 2
359     comps = path.split(sep)
360     new_comps = []
361     for comp in comps:
362         if comp in (empty, dot):
363             continue
364         if (comp != dotdot or (not initial_slashes and not new_comps) or
365              (new_comps and new_comps[-1] == dotdot)):
366             new_comps.append(comp)
367         elif new_comps:
368             new_comps.pop()
369     comps = new_comps
370     path = sep.join(comps)
371     if initial_slashes:
372         path = sep*initial_slashes + path
373     return path or dot
374 
375 
376 def abspath(path):
377     """Return an absolute path."""
378     path = os.fspath(path)
379     if not isabs(path):
380         if isinstance(path, bytes):
381             cwd = os.getcwdb()
382         else:
383             cwd = os.getcwd()
384         path = join(cwd, path)
385     return normpath(path)
386 
387 
388 # Return a canonical path (i.e. the absolute location of a file on the
389 # filesystem).
390 
391 def realpath(filename, *, strict=False):
392     """Return the canonical path of the specified filename, eliminating any
393 symbolic links encountered in the path."""
394     filename = os.fspath(filename)
395     path, ok = _joinrealpath(filename[:0], filename, strict, {})
396     return abspath(path)
397 
398 # Join two paths, normalizing and eliminating any symbolic links
399 # encountered in the second path.
400 def _joinrealpath(path, rest, strict, seen):
401     if isinstance(path, bytes):
402         sep = b'/'
403         curdir = b'.'
404         pardir = b'..'
405     else:
406         sep = '/'
407         curdir = '.'
408         pardir = '..'
409 
410     if isabs(rest):
411         rest = rest[1:]
412         path = sep
413 
414     while rest:
415         name, _, rest = rest.partition(sep)
416         if not name or name == curdir:
417             # current dir
418             continue
419         if name == pardir:
420             # parent dir
421             if path:
422                 path, name = split(path)
423                 if name == pardir:
424                     path = join(path, pardir, pardir)
425             else:
426                 path = pardir
427             continue
428         newpath = join(path, name)
429         try:
430             st = os.lstat(newpath)
431         except OSError:
432             if strict:
433                 raise
434             is_link = False
435         else:
436             is_link = stat.S_ISLNK(st.st_mode)
437         if not is_link:
438             path = newpath
439             continue
440         # Resolve the symbolic link
441         if newpath in seen:
442             # Already seen this path
443             path = seen[newpath]
444             if path is not None:
445                 # use cached value
446                 continue
447             # The symlink is not resolved, so we must have a symlink loop.
448             if strict:
449                 # Raise OSError(errno.ELOOP)
450                 os.stat(newpath)
451             else:
452                 # Return already resolved part + rest of the path unchanged.
453                 return join(newpath, rest), False
454         seen[newpath] = None # not resolved symlink
455         path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
456         if not ok:
457             return join(path, rest), False
458         seen[newpath] = path # resolved symlink
459 
460     return path, True
461 
462 
463 supports_unicode_filenames = (sys.platform == 'darwin')
464 
465 def relpath(path, start=None):
466     """Return a relative version of a path"""
467 
468     if not path:
469         raise ValueError("no path specified")
470 
471     path = os.fspath(path)
472     if isinstance(path, bytes):
473         curdir = b'.'
474         sep = b'/'
475         pardir = b'..'
476     else:
477         curdir = '.'
478         sep = '/'
479         pardir = '..'
480 
481     if start is None:
482         start = curdir
483     else:
484         start = os.fspath(start)
485 
486     try:
487         start_list = [x for x in abspath(start).split(sep) if x]
488         path_list = [x for x in abspath(path).split(sep) if x]
489         # Work out how much of the filepath is shared by start and path.
490         i = len(commonprefix([start_list, path_list]))
491 
492         rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
493         if not rel_list:
494             return curdir
495         return join(*rel_list)
496     except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
497         genericpath._check_arg_types('relpath', path, start)
498         raise
499 
500 
501 # Return the longest common sub-path of the sequence of paths given as input.
502 # The paths are not normalized before comparing them (this is the
503 # responsibility of the caller). Any trailing separator is stripped from the
504 # returned path.
505 
506 def commonpath(paths):
507     """Given a sequence of path names, returns the longest common sub-path."""
508 
509     if not paths:
510         raise ValueError('commonpath() arg is an empty sequence')
511 
512     paths = tuple(map(os.fspath, paths))
513     if isinstance(paths[0], bytes):
514         sep = b'/'
515         curdir = b'.'
516     else:
517         sep = '/'
518         curdir = '.'
519 
520     try:
521         split_paths = [path.split(sep) for path in paths]
522 
523         try:
524             isabs, = set(p[:1] == sep for p in paths)
525         except ValueError:
526             raise ValueError("Can't mix absolute and relative paths") from None
527 
528         split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
529         s1 = min(split_paths)
530         s2 = max(split_paths)
531         common = s1
532         for i, c in enumerate(s1):
533             if c != s2[i]:
534                 common = s1[:i]
535                 break
536 
537         prefix = sep if isabs else sep[:0]
538         return prefix + sep.join(common)
539     except (TypeError, AttributeError):
540         genericpath._check_arg_types('commonpath', *paths)
541         raise
542