1"""Common operations on Posix pathnames. 2 3Instead of importing this module directly, import os and refer to 4this module as os.path. The "os.path" name is an alias for this 5module on Posix systems; on other systems (e.g. Mac, Windows), 6os.path provides the same operations in a manner specific to that 7platform, and is an alias to another module (e.g. macpath, ntpath). 8 9Some of this can actually be useful on non-Posix systems too, e.g. 10for manipulation of the pathname component of URLs. 11""" 12 13import os 14import sys 15import stat 16import genericpath 17import warnings 18from genericpath import * 19from genericpath import _unicode 20 21__all__ = ["normcase","isabs","join","splitdrive","split","splitext", 22 "basename","dirname","commonprefix","getsize","getmtime", 23 "getatime","getctime","islink","exists","lexists","isdir","isfile", 24 "ismount","walk","expanduser","expandvars","normpath","abspath", 25 "samefile","sameopenfile","samestat", 26 "curdir","pardir","sep","pathsep","defpath","altsep","extsep", 27 "devnull","realpath","supports_unicode_filenames","relpath"] 28 29# strings representing various path-related bits and pieces 30curdir = '.' 31pardir = '..' 32extsep = '.' 33sep = '/' 34pathsep = ':' 35defpath = ':/bin:/usr/bin' 36altsep = None 37devnull = '/dev/null' 38 39# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. 40# On MS-DOS this may also turn slashes into backslashes; however, other 41# normalizations (such as optimizing '../' away) are not allowed 42# (another function should be defined to do that). 43 44def normcase(s): 45 """Normalize case of pathname. Has no effect under Posix""" 46 return s 47 48 49# Return whether a path is absolute. 50# Trivial in Posix, harder on the Mac or MS-DOS. 51 52def isabs(s): 53 """Test whether a path is absolute""" 54 return s.startswith('/') 55 56 57# Join pathnames. 58# Ignore the previous parts if a part is absolute. 59# Insert a '/' unless the first part is empty or already ends in '/'. 60 61def join(a, *p): 62 """Join two or more pathname components, inserting '/' as needed. 63 If any component is an absolute path, all previous path components 64 will be discarded. An empty last part will result in a path that 65 ends with a separator.""" 66 path = a 67 for b in p: 68 if b.startswith('/'): 69 path = b 70 elif path == '' or path.endswith('/'): 71 path += b 72 else: 73 path += '/' + b 74 return path 75 76 77# Split a path in head (everything up to the last '/') and tail (the 78# rest). If the path ends in '/', tail will be empty. If there is no 79# '/' in the path, head will be empty. 80# Trailing '/'es are stripped from head unless it is the root. 81 82def split(p): 83 """Split a pathname. Returns tuple "(head, tail)" where "tail" is 84 everything after the final slash. Either part may be empty.""" 85 i = p.rfind('/') + 1 86 head, tail = p[:i], p[i:] 87 if head and head != '/'*len(head): 88 head = head.rstrip('/') 89 return head, tail 90 91 92# Split a path in root and extension. 93# The extension is everything starting at the last dot in the last 94# pathname component; the root is everything before that. 95# It is always true that root + ext == p. 96 97def splitext(p): 98 return genericpath._splitext(p, sep, altsep, extsep) 99splitext.__doc__ = genericpath._splitext.__doc__ 100 101# Split a pathname into a drive specification and the rest of the 102# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. 103 104def splitdrive(p): 105 """Split a pathname into drive and path. On Posix, drive is always 106 empty.""" 107 return '', p 108 109 110# Return the tail (basename) part of a path, same as split(path)[1]. 111 112def basename(p): 113 """Returns the final component of a pathname""" 114 i = p.rfind('/') + 1 115 return p[i:] 116 117 118# Return the head (dirname) part of a path, same as split(path)[0]. 119 120def dirname(p): 121 """Returns the directory component of a pathname""" 122 i = p.rfind('/') + 1 123 head = p[:i] 124 if head and head != '/'*len(head): 125 head = head.rstrip('/') 126 return head 127 128 129# Is a path a symbolic link? 130# This will always return false on systems where os.lstat doesn't exist. 131 132def islink(path): 133 """Test whether a path is a symbolic link""" 134 try: 135 st = os.lstat(path) 136 except (os.error, AttributeError): 137 return False 138 return stat.S_ISLNK(st.st_mode) 139 140# Being true for dangling symbolic links is also useful. 141 142def lexists(path): 143 """Test whether a path exists. Returns True for broken symbolic links""" 144 try: 145 os.lstat(path) 146 except os.error: 147 return False 148 return True 149 150 151# Are two filenames really pointing to the same file? 152 153def samefile(f1, f2): 154 """Test whether two pathnames reference the same actual file""" 155 s1 = os.stat(f1) 156 s2 = os.stat(f2) 157 return samestat(s1, s2) 158 159 160# Are two open files really referencing the same file? 161# (Not necessarily the same file descriptor!) 162 163def sameopenfile(fp1, fp2): 164 """Test whether two open file objects reference the same file""" 165 s1 = os.fstat(fp1) 166 s2 = os.fstat(fp2) 167 return samestat(s1, s2) 168 169 170# Are two stat buffers (obtained from stat, fstat or lstat) 171# describing the same file? 172 173def samestat(s1, s2): 174 """Test whether two stat buffers reference the same file""" 175 return s1.st_ino == s2.st_ino and \ 176 s1.st_dev == s2.st_dev 177 178 179# Is a path a mount point? 180# (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) 181 182def ismount(path): 183 """Test whether a path is a mount point""" 184 if islink(path): 185 # A symlink can never be a mount point 186 return False 187 try: 188 s1 = os.lstat(path) 189 s2 = os.lstat(realpath(join(path, '..'))) 190 except os.error: 191 return False # It doesn't exist -- so not a mount point :-) 192 dev1 = s1.st_dev 193 dev2 = s2.st_dev 194 if dev1 != dev2: 195 return True # path/.. on a different device as path 196 ino1 = s1.st_ino 197 ino2 = s2.st_ino 198 if ino1 == ino2: 199 return True # path/.. is the same i-node as path 200 return False 201 202 203# Directory tree walk. 204# For each directory under top (including top itself, but excluding 205# '.' and '..'), func(arg, dirname, filenames) is called, where 206# dirname is the name of the directory and filenames is the list 207# of files (and subdirectories etc.) in the directory. 208# The func may modify the filenames list, to implement a filter, 209# or to impose a different order of visiting. 210 211def walk(top, func, arg): 212 """Directory tree walk with callback function. 213 214 For each directory in the directory tree rooted at top (including top 215 itself, but excluding '.' and '..'), call func(arg, dirname, fnames). 216 dirname is the name of the directory, and fnames a list of the names of 217 the files and subdirectories in dirname (excluding '.' and '..'). func 218 may modify the fnames list in-place (e.g. via del or slice assignment), 219 and walk will only recurse into the subdirectories whose names remain in 220 fnames; this can be used to implement a filter, or to impose a specific 221 order of visiting. No semantics are defined for, or required of, arg, 222 beyond that arg is always passed to func. It can be used, e.g., to pass 223 a filename pattern, or a mutable object designed to accumulate 224 statistics. Passing None for arg is common.""" 225 warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", 226 stacklevel=2) 227 try: 228 names = os.listdir(top) 229 except os.error: 230 return 231 func(arg, top, names) 232 for name in names: 233 name = join(top, name) 234 try: 235 st = os.lstat(name) 236 except os.error: 237 continue 238 if stat.S_ISDIR(st.st_mode): 239 walk(name, func, arg) 240 241 242# Expand paths beginning with '~' or '~user'. 243# '~' means $HOME; '~user' means that user's home directory. 244# If the path doesn't begin with '~', or if the user or $HOME is unknown, 245# the path is returned unchanged (leaving error reporting to whatever 246# function is called with the expanded path as argument). 247# See also module 'glob' for expansion of *, ? and [...] in pathnames. 248# (A function should also be defined to do full *sh-style environment 249# variable expansion.) 250 251def expanduser(path): 252 """Expand ~ and ~user constructions. If user or $HOME is unknown, 253 do nothing.""" 254 if not path.startswith('~'): 255 return path 256 i = path.find('/', 1) 257 if i < 0: 258 i = len(path) 259 if i == 1: 260 if 'HOME' not in os.environ: 261 import pwd 262 userhome = pwd.getpwuid(os.getuid()).pw_dir 263 else: 264 userhome = os.environ['HOME'] 265 else: 266 import pwd 267 try: 268 pwent = pwd.getpwnam(path[1:i]) 269 except KeyError: 270 return path 271 userhome = pwent.pw_dir 272 userhome = userhome.rstrip('/') 273 return (userhome + path[i:]) or '/' 274 275 276# Expand paths containing shell variable substitutions. 277# This expands the forms $variable and ${variable} only. 278# Non-existent variables are left unchanged. 279 280_varprog = None 281_uvarprog = None 282 283def expandvars(path): 284 """Expand shell variables of form $var and ${var}. Unknown variables 285 are left unchanged.""" 286 global _varprog, _uvarprog 287 if '$' not in path: 288 return path 289 if isinstance(path, _unicode): 290 if not _uvarprog: 291 import re 292 _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE) 293 varprog = _uvarprog 294 encoding = sys.getfilesystemencoding() 295 else: 296 if not _varprog: 297 import re 298 _varprog = re.compile(r'\$(\w+|\{[^}]*\})') 299 varprog = _varprog 300 encoding = None 301 i = 0 302 while True: 303 m = varprog.search(path, i) 304 if not m: 305 break 306 i, j = m.span(0) 307 name = m.group(1) 308 if name.startswith('{') and name.endswith('}'): 309 name = name[1:-1] 310 if encoding: 311 name = name.encode(encoding) 312 if name in os.environ: 313 tail = path[j:] 314 value = os.environ[name] 315 if encoding: 316 value = value.decode(encoding) 317 path = path[:i] + value 318 i = len(path) 319 path += tail 320 else: 321 i = j 322 return path 323 324 325# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. 326# It should be understood that this may change the meaning of the path 327# if it contains symbolic links! 328 329def normpath(path): 330 """Normalize path, eliminating double slashes, etc.""" 331 # Preserve unicode (if path is unicode) 332 slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.') 333 if path == '': 334 return dot 335 initial_slashes = path.startswith('/') 336 # POSIX allows one or two initial slashes, but treats three or more 337 # as single slash. 338 if (initial_slashes and 339 path.startswith('//') and not path.startswith('///')): 340 initial_slashes = 2 341 comps = path.split('/') 342 new_comps = [] 343 for comp in comps: 344 if comp in ('', '.'): 345 continue 346 if (comp != '..' or (not initial_slashes and not new_comps) or 347 (new_comps and new_comps[-1] == '..')): 348 new_comps.append(comp) 349 elif new_comps: 350 new_comps.pop() 351 comps = new_comps 352 path = slash.join(comps) 353 if initial_slashes: 354 path = slash*initial_slashes + path 355 return path or dot 356 357 358def abspath(path): 359 """Return an absolute path.""" 360 if not isabs(path): 361 if isinstance(path, _unicode): 362 cwd = os.getcwdu() 363 else: 364 cwd = os.getcwd() 365 path = join(cwd, path) 366 return normpath(path) 367 368 369# Return a canonical path (i.e. the absolute location of a file on the 370# filesystem). 371 372def realpath(filename): 373 """Return the canonical path of the specified filename, eliminating any 374symbolic links encountered in the path.""" 375 path, ok = _joinrealpath('', filename, {}) 376 return abspath(path) 377 378# Join two paths, normalizing and eliminating any symbolic links 379# encountered in the second path. 380def _joinrealpath(path, rest, seen): 381 if isabs(rest): 382 rest = rest[1:] 383 path = sep 384 385 while rest: 386 name, _, rest = rest.partition(sep) 387 if not name or name == curdir: 388 # current dir 389 continue 390 if name == pardir: 391 # parent dir 392 if path: 393 path, name = split(path) 394 if name == pardir: 395 path = join(path, pardir, pardir) 396 else: 397 path = pardir 398 continue 399 newpath = join(path, name) 400 if not islink(newpath): 401 path = newpath 402 continue 403 # Resolve the symbolic link 404 if newpath in seen: 405 # Already seen this path 406 path = seen[newpath] 407 if path is not None: 408 # use cached value 409 continue 410 # The symlink is not resolved, so we must have a symlink loop. 411 # Return already resolved part + rest of the path unchanged. 412 return join(newpath, rest), False 413 seen[newpath] = None # not resolved symlink 414 path, ok = _joinrealpath(path, os.readlink(newpath), seen) 415 if not ok: 416 return join(path, rest), False 417 seen[newpath] = path # resolved symlink 418 419 return path, True 420 421 422supports_unicode_filenames = (sys.platform == 'darwin') 423 424def relpath(path, start=curdir): 425 """Return a relative version of a path""" 426 427 if not path: 428 raise ValueError("no path specified") 429 430 start_list = [x for x in abspath(start).split(sep) if x] 431 path_list = [x for x in abspath(path).split(sep) if x] 432 433 # Work out how much of the filepath is shared by start and path. 434 i = len(commonprefix([start_list, path_list])) 435 436 rel_list = [pardir] * (len(start_list)-i) + path_list[i:] 437 if not rel_list: 438 return curdir 439 return join(*rel_list) 440