• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Utility functions for copying and archiving files and directory trees.
2
3XXX The functions here don't copy the resource fork or other metadata on Mac.
4
5"""
6
7import os
8import sys
9import stat
10from os.path import abspath
11import fnmatch
12import collections
13import errno
14
15try:
16    from pwd import getpwnam
17except ImportError:
18    getpwnam = None
19
20try:
21    from grp import getgrnam
22except ImportError:
23    getgrnam = None
24
25__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
26           "copytree", "move", "rmtree", "Error", "SpecialFileError",
27           "ExecError", "make_archive", "get_archive_formats",
28           "register_archive_format", "unregister_archive_format",
29           "ignore_patterns"]
30
31class Error(EnvironmentError):
32    pass
33
34class SpecialFileError(EnvironmentError):
35    """Raised when trying to do a kind of operation (e.g. copying) which is
36    not supported on a special file (e.g. a named pipe)"""
37
38class ExecError(EnvironmentError):
39    """Raised when a command could not be executed"""
40
41try:
42    WindowsError
43except NameError:
44    WindowsError = None
45
46def copyfileobj(fsrc, fdst, length=16*1024):
47    """copy data from file-like object fsrc to file-like object fdst"""
48    while 1:
49        buf = fsrc.read(length)
50        if not buf:
51            break
52        fdst.write(buf)
53
54def _samefile(src, dst):
55    # Macintosh, Unix.
56    if hasattr(os.path, 'samefile'):
57        try:
58            return os.path.samefile(src, dst)
59        except OSError:
60            return False
61
62    # All other platforms: check for same pathname.
63    return (os.path.normcase(os.path.abspath(src)) ==
64            os.path.normcase(os.path.abspath(dst)))
65
66def copyfile(src, dst):
67    """Copy data from src to dst"""
68    if _samefile(src, dst):
69        raise Error("`%s` and `%s` are the same file" % (src, dst))
70
71    for fn in [src, dst]:
72        try:
73            st = os.stat(fn)
74        except OSError:
75            # File most likely does not exist
76            pass
77        else:
78            # XXX What about other special files? (sockets, devices...)
79            if stat.S_ISFIFO(st.st_mode):
80                raise SpecialFileError("`%s` is a named pipe" % fn)
81
82    with open(src, 'rb') as fsrc:
83        with open(dst, 'wb') as fdst:
84            copyfileobj(fsrc, fdst)
85
86def copymode(src, dst):
87    """Copy mode bits from src to dst"""
88    if hasattr(os, 'chmod'):
89        st = os.stat(src)
90        mode = stat.S_IMODE(st.st_mode)
91        os.chmod(dst, mode)
92
93def copystat(src, dst):
94    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
95    st = os.stat(src)
96    mode = stat.S_IMODE(st.st_mode)
97    if hasattr(os, 'utime'):
98        os.utime(dst, (st.st_atime, st.st_mtime))
99    if hasattr(os, 'chmod'):
100        os.chmod(dst, mode)
101    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
102        try:
103            os.chflags(dst, st.st_flags)
104        except OSError, why:
105            for err in 'EOPNOTSUPP', 'ENOTSUP':
106                if hasattr(errno, err) and why.errno == getattr(errno, err):
107                    break
108            else:
109                raise
110
111def copy(src, dst):
112    """Copy data and mode bits ("cp src dst").
113
114    The destination may be a directory.
115
116    """
117    if os.path.isdir(dst):
118        dst = os.path.join(dst, os.path.basename(src))
119    copyfile(src, dst)
120    copymode(src, dst)
121
122def copy2(src, dst):
123    """Copy data and all stat info ("cp -p src dst").
124
125    The destination may be a directory.
126
127    """
128    if os.path.isdir(dst):
129        dst = os.path.join(dst, os.path.basename(src))
130    copyfile(src, dst)
131    copystat(src, dst)
132
133def ignore_patterns(*patterns):
134    """Function that can be used as copytree() ignore parameter.
135
136    Patterns is a sequence of glob-style patterns
137    that are used to exclude files"""
138    def _ignore_patterns(path, names):
139        ignored_names = []
140        for pattern in patterns:
141            ignored_names.extend(fnmatch.filter(names, pattern))
142        return set(ignored_names)
143    return _ignore_patterns
144
145def copytree(src, dst, symlinks=False, ignore=None):
146    """Recursively copy a directory tree using copy2().
147
148    The destination directory must not already exist.
149    If exception(s) occur, an Error is raised with a list of reasons.
150
151    If the optional symlinks flag is true, symbolic links in the
152    source tree result in symbolic links in the destination tree; if
153    it is false, the contents of the files pointed to by symbolic
154    links are copied.
155
156    The optional ignore argument is a callable. If given, it
157    is called with the `src` parameter, which is the directory
158    being visited by copytree(), and `names` which is the list of
159    `src` contents, as returned by os.listdir():
160
161        callable(src, names) -> ignored_names
162
163    Since copytree() is called recursively, the callable will be
164    called once for each directory that is copied. It returns a
165    list of names relative to the `src` directory that should
166    not be copied.
167
168    XXX Consider this example code rather than the ultimate tool.
169
170    """
171    names = os.listdir(src)
172    if ignore is not None:
173        ignored_names = ignore(src, names)
174    else:
175        ignored_names = set()
176
177    os.makedirs(dst)
178    errors = []
179    for name in names:
180        if name in ignored_names:
181            continue
182        srcname = os.path.join(src, name)
183        dstname = os.path.join(dst, name)
184        try:
185            if symlinks and os.path.islink(srcname):
186                linkto = os.readlink(srcname)
187                os.symlink(linkto, dstname)
188            elif os.path.isdir(srcname):
189                copytree(srcname, dstname, symlinks, ignore)
190            else:
191                # Will raise a SpecialFileError for unsupported file types
192                copy2(srcname, dstname)
193        # catch the Error from the recursive copytree so that we can
194        # continue with other files
195        except Error, err:
196            errors.extend(err.args[0])
197        except EnvironmentError, why:
198            errors.append((srcname, dstname, str(why)))
199    try:
200        copystat(src, dst)
201    except OSError, why:
202        if WindowsError is not None and isinstance(why, WindowsError):
203            # Copying file access times may fail on Windows
204            pass
205        else:
206            errors.append((src, dst, str(why)))
207    if errors:
208        raise Error, errors
209
210def rmtree(path, ignore_errors=False, onerror=None):
211    """Recursively delete a directory tree.
212
213    If ignore_errors is set, errors are ignored; otherwise, if onerror
214    is set, it is called to handle the error with arguments (func,
215    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
216    path is the argument to that function that caused it to fail; and
217    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
218    is false and onerror is None, an exception is raised.
219
220    """
221    if ignore_errors:
222        def onerror(*args):
223            pass
224    elif onerror is None:
225        def onerror(*args):
226            raise
227    try:
228        if os.path.islink(path):
229            # symlinks to directories are forbidden, see bug #1669
230            raise OSError("Cannot call rmtree on a symbolic link")
231    except OSError:
232        onerror(os.path.islink, path, sys.exc_info())
233        # can't continue even if onerror hook returns
234        return
235    names = []
236    try:
237        names = os.listdir(path)
238    except os.error, err:
239        onerror(os.listdir, path, sys.exc_info())
240    for name in names:
241        fullname = os.path.join(path, name)
242        try:
243            mode = os.lstat(fullname).st_mode
244        except os.error:
245            mode = 0
246        if stat.S_ISDIR(mode):
247            rmtree(fullname, ignore_errors, onerror)
248        else:
249            try:
250                os.remove(fullname)
251            except os.error, err:
252                onerror(os.remove, fullname, sys.exc_info())
253    try:
254        os.rmdir(path)
255    except os.error:
256        onerror(os.rmdir, path, sys.exc_info())
257
258
259def _basename(path):
260    # A basename() variant which first strips the trailing slash, if present.
261    # Thus we always get the last component of the path, even for directories.
262    sep = os.path.sep + (os.path.altsep or '')
263    return os.path.basename(path.rstrip(sep))
264
265def move(src, dst):
266    """Recursively move a file or directory to another location. This is
267    similar to the Unix "mv" command.
268
269    If the destination is a directory or a symlink to a directory, the source
270    is moved inside the directory. The destination path must not already
271    exist.
272
273    If the destination already exists but is not a directory, it may be
274    overwritten depending on os.rename() semantics.
275
276    If the destination is on our current filesystem, then rename() is used.
277    Otherwise, src is copied to the destination and then removed.
278    A lot more could be done here...  A look at a mv.c shows a lot of
279    the issues this implementation glosses over.
280
281    """
282    real_dst = dst
283    if os.path.isdir(dst):
284        if _samefile(src, dst):
285            # We might be on a case insensitive filesystem,
286            # perform the rename anyway.
287            os.rename(src, dst)
288            return
289
290        real_dst = os.path.join(dst, _basename(src))
291        if os.path.exists(real_dst):
292            raise Error, "Destination path '%s' already exists" % real_dst
293    try:
294        os.rename(src, real_dst)
295    except OSError:
296        if os.path.isdir(src):
297            if _destinsrc(src, dst):
298                raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
299            copytree(src, real_dst, symlinks=True)
300            rmtree(src)
301        else:
302            copy2(src, real_dst)
303            os.unlink(src)
304
305def _destinsrc(src, dst):
306    src = abspath(src)
307    dst = abspath(dst)
308    if not src.endswith(os.path.sep):
309        src += os.path.sep
310    if not dst.endswith(os.path.sep):
311        dst += os.path.sep
312    return dst.startswith(src)
313
314def _get_gid(name):
315    """Returns a gid, given a group name."""
316    if getgrnam is None or name is None:
317        return None
318    try:
319        result = getgrnam(name)
320    except KeyError:
321        result = None
322    if result is not None:
323        return result[2]
324    return None
325
326def _get_uid(name):
327    """Returns an uid, given a user name."""
328    if getpwnam is None or name is None:
329        return None
330    try:
331        result = getpwnam(name)
332    except KeyError:
333        result = None
334    if result is not None:
335        return result[2]
336    return None
337
338def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
339                  owner=None, group=None, logger=None):
340    """Create a (possibly compressed) tar file from all the files under
341    'base_dir'.
342
343    'compress' must be "gzip" (the default), "bzip2", or None.
344
345    'owner' and 'group' can be used to define an owner and a group for the
346    archive that is being built. If not provided, the current owner and group
347    will be used.
348
349    The output tar file will be named 'base_name' +  ".tar", possibly plus
350    the appropriate compression extension (".gz", or ".bz2").
351
352    Returns the output filename.
353    """
354    tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
355    compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
356
357    # flags for compression program, each element of list will be an argument
358    if compress is not None and compress not in compress_ext.keys():
359        raise ValueError, \
360              ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
361
362    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
363    archive_dir = os.path.dirname(archive_name)
364
365    if archive_dir and not os.path.exists(archive_dir):
366        if logger is not None:
367            logger.info("creating %s", archive_dir)
368        if not dry_run:
369            os.makedirs(archive_dir)
370
371
372    # creating the tarball
373    import tarfile  # late import so Python build itself doesn't break
374
375    if logger is not None:
376        logger.info('Creating tar archive')
377
378    uid = _get_uid(owner)
379    gid = _get_gid(group)
380
381    def _set_uid_gid(tarinfo):
382        if gid is not None:
383            tarinfo.gid = gid
384            tarinfo.gname = group
385        if uid is not None:
386            tarinfo.uid = uid
387            tarinfo.uname = owner
388        return tarinfo
389
390    if not dry_run:
391        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
392        try:
393            tar.add(base_dir, filter=_set_uid_gid)
394        finally:
395            tar.close()
396
397    return archive_name
398
399def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
400    # XXX see if we want to keep an external call here
401    if verbose:
402        zipoptions = "-r"
403    else:
404        zipoptions = "-rq"
405    from distutils.errors import DistutilsExecError
406    from distutils.spawn import spawn
407    try:
408        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
409    except DistutilsExecError:
410        # XXX really should distinguish between "couldn't find
411        # external 'zip' command" and "zip failed".
412        raise ExecError, \
413            ("unable to create zip file '%s': "
414            "could neither import the 'zipfile' module nor "
415            "find a standalone zip utility") % zip_filename
416
417def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
418    """Create a zip file from all the files under 'base_dir'.
419
420    The output zip file will be named 'base_name' + ".zip".  Uses either the
421    "zipfile" Python module (if available) or the InfoZIP "zip" utility
422    (if installed and found on the default search path).  If neither tool is
423    available, raises ExecError.  Returns the name of the output zip
424    file.
425    """
426    zip_filename = base_name + ".zip"
427    archive_dir = os.path.dirname(base_name)
428
429    if archive_dir and not os.path.exists(archive_dir):
430        if logger is not None:
431            logger.info("creating %s", archive_dir)
432        if not dry_run:
433            os.makedirs(archive_dir)
434
435    # If zipfile module is not available, try spawning an external 'zip'
436    # command.
437    try:
438        import zipfile
439    except ImportError:
440        zipfile = None
441
442    if zipfile is None:
443        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
444    else:
445        if logger is not None:
446            logger.info("creating '%s' and adding '%s' to it",
447                        zip_filename, base_dir)
448
449        if not dry_run:
450            with zipfile.ZipFile(zip_filename, "w",
451                                 compression=zipfile.ZIP_DEFLATED) as zf:
452                for dirpath, dirnames, filenames in os.walk(base_dir):
453                    for name in filenames:
454                        path = os.path.normpath(os.path.join(dirpath, name))
455                        if os.path.isfile(path):
456                            zf.write(path, path)
457                            if logger is not None:
458                                logger.info("adding '%s'", path)
459
460    return zip_filename
461
462_ARCHIVE_FORMATS = {
463    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
464    'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
465    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
466    'zip':   (_make_zipfile, [],"ZIP file")
467    }
468
469def get_archive_formats():
470    """Returns a list of supported formats for archiving and unarchiving.
471
472    Each element of the returned sequence is a tuple (name, description)
473    """
474    formats = [(name, registry[2]) for name, registry in
475               _ARCHIVE_FORMATS.items()]
476    formats.sort()
477    return formats
478
479def register_archive_format(name, function, extra_args=None, description=''):
480    """Registers an archive format.
481
482    name is the name of the format. function is the callable that will be
483    used to create archives. If provided, extra_args is a sequence of
484    (name, value) tuples that will be passed as arguments to the callable.
485    description can be provided to describe the format, and will be returned
486    by the get_archive_formats() function.
487    """
488    if extra_args is None:
489        extra_args = []
490    if not isinstance(function, collections.Callable):
491        raise TypeError('The %s object is not callable' % function)
492    if not isinstance(extra_args, (tuple, list)):
493        raise TypeError('extra_args needs to be a sequence')
494    for element in extra_args:
495        if not isinstance(element, (tuple, list)) or len(element) !=2 :
496            raise TypeError('extra_args elements are : (arg_name, value)')
497
498    _ARCHIVE_FORMATS[name] = (function, extra_args, description)
499
500def unregister_archive_format(name):
501    del _ARCHIVE_FORMATS[name]
502
503def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
504                 dry_run=0, owner=None, group=None, logger=None):
505    """Create an archive file (eg. zip or tar).
506
507    'base_name' is the name of the file to create, minus any format-specific
508    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
509    or "gztar".
510
511    'root_dir' is a directory that will be the root directory of the
512    archive; ie. we typically chdir into 'root_dir' before creating the
513    archive.  'base_dir' is the directory where we start archiving from;
514    ie. 'base_dir' will be the common prefix of all files and
515    directories in the archive.  'root_dir' and 'base_dir' both default
516    to the current directory.  Returns the name of the archive file.
517
518    'owner' and 'group' are used when creating a tar archive. By default,
519    uses the current owner and group.
520    """
521    save_cwd = os.getcwd()
522    if root_dir is not None:
523        if logger is not None:
524            logger.debug("changing into '%s'", root_dir)
525        base_name = os.path.abspath(base_name)
526        if not dry_run:
527            os.chdir(root_dir)
528
529    if base_dir is None:
530        base_dir = os.curdir
531
532    kwargs = {'dry_run': dry_run, 'logger': logger}
533
534    try:
535        format_info = _ARCHIVE_FORMATS[format]
536    except KeyError:
537        raise ValueError, "unknown archive format '%s'" % format
538
539    func = format_info[0]
540    for arg, val in format_info[1]:
541        kwargs[arg] = val
542
543    if format != 'zip':
544        kwargs['owner'] = owner
545        kwargs['group'] = group
546
547    try:
548        filename = func(base_name, base_dir, **kwargs)
549    finally:
550        if root_dir is not None:
551            if logger is not None:
552                logger.debug("changing back to '%s'", save_cwd)
553            os.chdir(save_cwd)
554
555    return filename
556