1"""Utility functions for copying and archiving files and directory trees. 2 3XXX The functions here don't copy the resource fork or other metadata on Mac. 4 5""" 6 7import os 8import sys 9import stat 10from os.path import abspath 11import fnmatch 12import collections 13import errno 14 15try: 16 from pwd import getpwnam 17except ImportError: 18 getpwnam = None 19 20try: 21 from grp import getgrnam 22except ImportError: 23 getgrnam = None 24 25__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", 26 "copytree", "move", "rmtree", "Error", "SpecialFileError", 27 "ExecError", "make_archive", "get_archive_formats", 28 "register_archive_format", "unregister_archive_format", 29 "ignore_patterns"] 30 31class Error(EnvironmentError): 32 pass 33 34class SpecialFileError(EnvironmentError): 35 """Raised when trying to do a kind of operation (e.g. copying) which is 36 not supported on a special file (e.g. a named pipe)""" 37 38class ExecError(EnvironmentError): 39 """Raised when a command could not be executed""" 40 41try: 42 WindowsError 43except NameError: 44 WindowsError = None 45 46def copyfileobj(fsrc, fdst, length=16*1024): 47 """copy data from file-like object fsrc to file-like object fdst""" 48 while 1: 49 buf = fsrc.read(length) 50 if not buf: 51 break 52 fdst.write(buf) 53 54def _samefile(src, dst): 55 # Macintosh, Unix. 56 if hasattr(os.path, 'samefile'): 57 try: 58 return os.path.samefile(src, dst) 59 except OSError: 60 return False 61 62 # All other platforms: check for same pathname. 63 return (os.path.normcase(os.path.abspath(src)) == 64 os.path.normcase(os.path.abspath(dst))) 65 66def copyfile(src, dst): 67 """Copy data from src to dst""" 68 if _samefile(src, dst): 69 raise Error("`%s` and `%s` are the same file" % (src, dst)) 70 71 for fn in [src, dst]: 72 try: 73 st = os.stat(fn) 74 except OSError: 75 # File most likely does not exist 76 pass 77 else: 78 # XXX What about other special files? (sockets, devices...) 79 if stat.S_ISFIFO(st.st_mode): 80 raise SpecialFileError("`%s` is a named pipe" % fn) 81 82 with open(src, 'rb') as fsrc: 83 with open(dst, 'wb') as fdst: 84 copyfileobj(fsrc, fdst) 85 86def copymode(src, dst): 87 """Copy mode bits from src to dst""" 88 if hasattr(os, 'chmod'): 89 st = os.stat(src) 90 mode = stat.S_IMODE(st.st_mode) 91 os.chmod(dst, mode) 92 93def copystat(src, dst): 94 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" 95 st = os.stat(src) 96 mode = stat.S_IMODE(st.st_mode) 97 if hasattr(os, 'utime'): 98 os.utime(dst, (st.st_atime, st.st_mtime)) 99 if hasattr(os, 'chmod'): 100 os.chmod(dst, mode) 101 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): 102 try: 103 os.chflags(dst, st.st_flags) 104 except OSError, why: 105 for err in 'EOPNOTSUPP', 'ENOTSUP': 106 if hasattr(errno, err) and why.errno == getattr(errno, err): 107 break 108 else: 109 raise 110 111def copy(src, dst): 112 """Copy data and mode bits ("cp src dst"). 113 114 The destination may be a directory. 115 116 """ 117 if os.path.isdir(dst): 118 dst = os.path.join(dst, os.path.basename(src)) 119 copyfile(src, dst) 120 copymode(src, dst) 121 122def copy2(src, dst): 123 """Copy data and all stat info ("cp -p src dst"). 124 125 The destination may be a directory. 126 127 """ 128 if os.path.isdir(dst): 129 dst = os.path.join(dst, os.path.basename(src)) 130 copyfile(src, dst) 131 copystat(src, dst) 132 133def ignore_patterns(*patterns): 134 """Function that can be used as copytree() ignore parameter. 135 136 Patterns is a sequence of glob-style patterns 137 that are used to exclude files""" 138 def _ignore_patterns(path, names): 139 ignored_names = [] 140 for pattern in patterns: 141 ignored_names.extend(fnmatch.filter(names, pattern)) 142 return set(ignored_names) 143 return _ignore_patterns 144 145def copytree(src, dst, symlinks=False, ignore=None): 146 """Recursively copy a directory tree using copy2(). 147 148 The destination directory must not already exist. 149 If exception(s) occur, an Error is raised with a list of reasons. 150 151 If the optional symlinks flag is true, symbolic links in the 152 source tree result in symbolic links in the destination tree; if 153 it is false, the contents of the files pointed to by symbolic 154 links are copied. 155 156 The optional ignore argument is a callable. If given, it 157 is called with the `src` parameter, which is the directory 158 being visited by copytree(), and `names` which is the list of 159 `src` contents, as returned by os.listdir(): 160 161 callable(src, names) -> ignored_names 162 163 Since copytree() is called recursively, the callable will be 164 called once for each directory that is copied. It returns a 165 list of names relative to the `src` directory that should 166 not be copied. 167 168 XXX Consider this example code rather than the ultimate tool. 169 170 """ 171 names = os.listdir(src) 172 if ignore is not None: 173 ignored_names = ignore(src, names) 174 else: 175 ignored_names = set() 176 177 os.makedirs(dst) 178 errors = [] 179 for name in names: 180 if name in ignored_names: 181 continue 182 srcname = os.path.join(src, name) 183 dstname = os.path.join(dst, name) 184 try: 185 if symlinks and os.path.islink(srcname): 186 linkto = os.readlink(srcname) 187 os.symlink(linkto, dstname) 188 elif os.path.isdir(srcname): 189 copytree(srcname, dstname, symlinks, ignore) 190 else: 191 # Will raise a SpecialFileError for unsupported file types 192 copy2(srcname, dstname) 193 # catch the Error from the recursive copytree so that we can 194 # continue with other files 195 except Error, err: 196 errors.extend(err.args[0]) 197 except EnvironmentError, why: 198 errors.append((srcname, dstname, str(why))) 199 try: 200 copystat(src, dst) 201 except OSError, why: 202 if WindowsError is not None and isinstance(why, WindowsError): 203 # Copying file access times may fail on Windows 204 pass 205 else: 206 errors.append((src, dst, str(why))) 207 if errors: 208 raise Error, errors 209 210def rmtree(path, ignore_errors=False, onerror=None): 211 """Recursively delete a directory tree. 212 213 If ignore_errors is set, errors are ignored; otherwise, if onerror 214 is set, it is called to handle the error with arguments (func, 215 path, exc_info) where func is os.listdir, os.remove, or os.rmdir; 216 path is the argument to that function that caused it to fail; and 217 exc_info is a tuple returned by sys.exc_info(). If ignore_errors 218 is false and onerror is None, an exception is raised. 219 220 """ 221 if ignore_errors: 222 def onerror(*args): 223 pass 224 elif onerror is None: 225 def onerror(*args): 226 raise 227 try: 228 if os.path.islink(path): 229 # symlinks to directories are forbidden, see bug #1669 230 raise OSError("Cannot call rmtree on a symbolic link") 231 except OSError: 232 onerror(os.path.islink, path, sys.exc_info()) 233 # can't continue even if onerror hook returns 234 return 235 names = [] 236 try: 237 names = os.listdir(path) 238 except os.error, err: 239 onerror(os.listdir, path, sys.exc_info()) 240 for name in names: 241 fullname = os.path.join(path, name) 242 try: 243 mode = os.lstat(fullname).st_mode 244 except os.error: 245 mode = 0 246 if stat.S_ISDIR(mode): 247 rmtree(fullname, ignore_errors, onerror) 248 else: 249 try: 250 os.remove(fullname) 251 except os.error, err: 252 onerror(os.remove, fullname, sys.exc_info()) 253 try: 254 os.rmdir(path) 255 except os.error: 256 onerror(os.rmdir, path, sys.exc_info()) 257 258 259def _basename(path): 260 # A basename() variant which first strips the trailing slash, if present. 261 # Thus we always get the last component of the path, even for directories. 262 sep = os.path.sep + (os.path.altsep or '') 263 return os.path.basename(path.rstrip(sep)) 264 265def move(src, dst): 266 """Recursively move a file or directory to another location. This is 267 similar to the Unix "mv" command. 268 269 If the destination is a directory or a symlink to a directory, the source 270 is moved inside the directory. The destination path must not already 271 exist. 272 273 If the destination already exists but is not a directory, it may be 274 overwritten depending on os.rename() semantics. 275 276 If the destination is on our current filesystem, then rename() is used. 277 Otherwise, src is copied to the destination and then removed. 278 A lot more could be done here... A look at a mv.c shows a lot of 279 the issues this implementation glosses over. 280 281 """ 282 real_dst = dst 283 if os.path.isdir(dst): 284 if _samefile(src, dst): 285 # We might be on a case insensitive filesystem, 286 # perform the rename anyway. 287 os.rename(src, dst) 288 return 289 290 real_dst = os.path.join(dst, _basename(src)) 291 if os.path.exists(real_dst): 292 raise Error, "Destination path '%s' already exists" % real_dst 293 try: 294 os.rename(src, real_dst) 295 except OSError: 296 if os.path.isdir(src): 297 if _destinsrc(src, dst): 298 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst) 299 copytree(src, real_dst, symlinks=True) 300 rmtree(src) 301 else: 302 copy2(src, real_dst) 303 os.unlink(src) 304 305def _destinsrc(src, dst): 306 src = abspath(src) 307 dst = abspath(dst) 308 if not src.endswith(os.path.sep): 309 src += os.path.sep 310 if not dst.endswith(os.path.sep): 311 dst += os.path.sep 312 return dst.startswith(src) 313 314def _get_gid(name): 315 """Returns a gid, given a group name.""" 316 if getgrnam is None or name is None: 317 return None 318 try: 319 result = getgrnam(name) 320 except KeyError: 321 result = None 322 if result is not None: 323 return result[2] 324 return None 325 326def _get_uid(name): 327 """Returns an uid, given a user name.""" 328 if getpwnam is None or name is None: 329 return None 330 try: 331 result = getpwnam(name) 332 except KeyError: 333 result = None 334 if result is not None: 335 return result[2] 336 return None 337 338def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, 339 owner=None, group=None, logger=None): 340 """Create a (possibly compressed) tar file from all the files under 341 'base_dir'. 342 343 'compress' must be "gzip" (the default), "bzip2", or None. 344 345 'owner' and 'group' can be used to define an owner and a group for the 346 archive that is being built. If not provided, the current owner and group 347 will be used. 348 349 The output tar file will be named 'base_name' + ".tar", possibly plus 350 the appropriate compression extension (".gz", or ".bz2"). 351 352 Returns the output filename. 353 """ 354 tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''} 355 compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'} 356 357 # flags for compression program, each element of list will be an argument 358 if compress is not None and compress not in compress_ext.keys(): 359 raise ValueError, \ 360 ("bad value for 'compress': must be None, 'gzip' or 'bzip2'") 361 362 archive_name = base_name + '.tar' + compress_ext.get(compress, '') 363 archive_dir = os.path.dirname(archive_name) 364 365 if archive_dir and not os.path.exists(archive_dir): 366 if logger is not None: 367 logger.info("creating %s", archive_dir) 368 if not dry_run: 369 os.makedirs(archive_dir) 370 371 372 # creating the tarball 373 import tarfile # late import so Python build itself doesn't break 374 375 if logger is not None: 376 logger.info('Creating tar archive') 377 378 uid = _get_uid(owner) 379 gid = _get_gid(group) 380 381 def _set_uid_gid(tarinfo): 382 if gid is not None: 383 tarinfo.gid = gid 384 tarinfo.gname = group 385 if uid is not None: 386 tarinfo.uid = uid 387 tarinfo.uname = owner 388 return tarinfo 389 390 if not dry_run: 391 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) 392 try: 393 tar.add(base_dir, filter=_set_uid_gid) 394 finally: 395 tar.close() 396 397 return archive_name 398 399def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): 400 # XXX see if we want to keep an external call here 401 if verbose: 402 zipoptions = "-r" 403 else: 404 zipoptions = "-rq" 405 from distutils.errors import DistutilsExecError 406 from distutils.spawn import spawn 407 try: 408 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) 409 except DistutilsExecError: 410 # XXX really should distinguish between "couldn't find 411 # external 'zip' command" and "zip failed". 412 raise ExecError, \ 413 ("unable to create zip file '%s': " 414 "could neither import the 'zipfile' module nor " 415 "find a standalone zip utility") % zip_filename 416 417def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): 418 """Create a zip file from all the files under 'base_dir'. 419 420 The output zip file will be named 'base_name' + ".zip". Uses either the 421 "zipfile" Python module (if available) or the InfoZIP "zip" utility 422 (if installed and found on the default search path). If neither tool is 423 available, raises ExecError. Returns the name of the output zip 424 file. 425 """ 426 zip_filename = base_name + ".zip" 427 archive_dir = os.path.dirname(base_name) 428 429 if archive_dir and not os.path.exists(archive_dir): 430 if logger is not None: 431 logger.info("creating %s", archive_dir) 432 if not dry_run: 433 os.makedirs(archive_dir) 434 435 # If zipfile module is not available, try spawning an external 'zip' 436 # command. 437 try: 438 import zipfile 439 except ImportError: 440 zipfile = None 441 442 if zipfile is None: 443 _call_external_zip(base_dir, zip_filename, verbose, dry_run) 444 else: 445 if logger is not None: 446 logger.info("creating '%s' and adding '%s' to it", 447 zip_filename, base_dir) 448 449 if not dry_run: 450 with zipfile.ZipFile(zip_filename, "w", 451 compression=zipfile.ZIP_DEFLATED) as zf: 452 path = os.path.normpath(base_dir) 453 if path != os.curdir: 454 zf.write(path, path) 455 if logger is not None: 456 logger.info("adding '%s'", path) 457 for dirpath, dirnames, filenames in os.walk(base_dir): 458 for name in sorted(dirnames): 459 path = os.path.normpath(os.path.join(dirpath, name)) 460 zf.write(path, path) 461 if logger is not None: 462 logger.info("adding '%s'", path) 463 for name in filenames: 464 path = os.path.normpath(os.path.join(dirpath, name)) 465 if os.path.isfile(path): 466 zf.write(path, path) 467 if logger is not None: 468 logger.info("adding '%s'", path) 469 470 return zip_filename 471 472_ARCHIVE_FORMATS = { 473 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), 474 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), 475 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), 476 'zip': (_make_zipfile, [],"ZIP file") 477 } 478 479def get_archive_formats(): 480 """Returns a list of supported formats for archiving and unarchiving. 481 482 Each element of the returned sequence is a tuple (name, description) 483 """ 484 formats = [(name, registry[2]) for name, registry in 485 _ARCHIVE_FORMATS.items()] 486 formats.sort() 487 return formats 488 489def register_archive_format(name, function, extra_args=None, description=''): 490 """Registers an archive format. 491 492 name is the name of the format. function is the callable that will be 493 used to create archives. If provided, extra_args is a sequence of 494 (name, value) tuples that will be passed as arguments to the callable. 495 description can be provided to describe the format, and will be returned 496 by the get_archive_formats() function. 497 """ 498 if extra_args is None: 499 extra_args = [] 500 if not isinstance(function, collections.Callable): 501 raise TypeError('The %s object is not callable' % function) 502 if not isinstance(extra_args, (tuple, list)): 503 raise TypeError('extra_args needs to be a sequence') 504 for element in extra_args: 505 if not isinstance(element, (tuple, list)) or len(element) !=2 : 506 raise TypeError('extra_args elements are : (arg_name, value)') 507 508 _ARCHIVE_FORMATS[name] = (function, extra_args, description) 509 510def unregister_archive_format(name): 511 del _ARCHIVE_FORMATS[name] 512 513def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, 514 dry_run=0, owner=None, group=None, logger=None): 515 """Create an archive file (eg. zip or tar). 516 517 'base_name' is the name of the file to create, minus any format-specific 518 extension; 'format' is the archive format: one of "zip", "tar", "bztar" 519 or "gztar". 520 521 'root_dir' is a directory that will be the root directory of the 522 archive; ie. we typically chdir into 'root_dir' before creating the 523 archive. 'base_dir' is the directory where we start archiving from; 524 ie. 'base_dir' will be the common prefix of all files and 525 directories in the archive. 'root_dir' and 'base_dir' both default 526 to the current directory. Returns the name of the archive file. 527 528 'owner' and 'group' are used when creating a tar archive. By default, 529 uses the current owner and group. 530 """ 531 save_cwd = os.getcwd() 532 if root_dir is not None: 533 if logger is not None: 534 logger.debug("changing into '%s'", root_dir) 535 base_name = os.path.abspath(base_name) 536 if not dry_run: 537 os.chdir(root_dir) 538 539 if base_dir is None: 540 base_dir = os.curdir 541 542 kwargs = {'dry_run': dry_run, 'logger': logger} 543 544 try: 545 format_info = _ARCHIVE_FORMATS[format] 546 except KeyError: 547 raise ValueError, "unknown archive format '%s'" % format 548 549 func = format_info[0] 550 for arg, val in format_info[1]: 551 kwargs[arg] = val 552 553 if format != 'zip': 554 kwargs['owner'] = owner 555 kwargs['group'] = group 556 557 try: 558 filename = func(base_name, base_dir, **kwargs) 559 finally: 560 if root_dir is not None: 561 if logger is not None: 562 logger.debug("changing back to '%s'", save_cwd) 563 os.chdir(save_cwd) 564 565 return filename 566