• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Utility functions for copying and archiving files and directory trees.
2
3XXX The functions here don't copy the resource fork or other metadata on Mac.
4
5"""
6
7import os
8import sys
9import stat
10from os.path import abspath
11import fnmatch
12import collections
13import errno
14
15try:
16    from pwd import getpwnam
17except ImportError:
18    getpwnam = None
19
20try:
21    from grp import getgrnam
22except ImportError:
23    getgrnam = None
24
25__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
26           "copytree", "move", "rmtree", "Error", "SpecialFileError",
27           "ExecError", "make_archive", "get_archive_formats",
28           "register_archive_format", "unregister_archive_format",
29           "ignore_patterns"]
30
31class Error(EnvironmentError):
32    pass
33
34class SpecialFileError(EnvironmentError):
35    """Raised when trying to do a kind of operation (e.g. copying) which is
36    not supported on a special file (e.g. a named pipe)"""
37
38class ExecError(EnvironmentError):
39    """Raised when a command could not be executed"""
40
41try:
42    WindowsError
43except NameError:
44    WindowsError = None
45
46def copyfileobj(fsrc, fdst, length=16*1024):
47    """copy data from file-like object fsrc to file-like object fdst"""
48    while 1:
49        buf = fsrc.read(length)
50        if not buf:
51            break
52        fdst.write(buf)
53
54def _samefile(src, dst):
55    # Macintosh, Unix.
56    if hasattr(os.path, 'samefile'):
57        try:
58            return os.path.samefile(src, dst)
59        except OSError:
60            return False
61
62    # All other platforms: check for same pathname.
63    return (os.path.normcase(os.path.abspath(src)) ==
64            os.path.normcase(os.path.abspath(dst)))
65
66def copyfile(src, dst):
67    """Copy data from src to dst"""
68    if _samefile(src, dst):
69        raise Error("`%s` and `%s` are the same file" % (src, dst))
70
71    for fn in [src, dst]:
72        try:
73            st = os.stat(fn)
74        except OSError:
75            # File most likely does not exist
76            pass
77        else:
78            # XXX What about other special files? (sockets, devices...)
79            if stat.S_ISFIFO(st.st_mode):
80                raise SpecialFileError("`%s` is a named pipe" % fn)
81
82    with open(src, 'rb') as fsrc:
83        with open(dst, 'wb') as fdst:
84            copyfileobj(fsrc, fdst)
85
86def copymode(src, dst):
87    """Copy mode bits from src to dst"""
88    if hasattr(os, 'chmod'):
89        st = os.stat(src)
90        mode = stat.S_IMODE(st.st_mode)
91        os.chmod(dst, mode)
92
93def copystat(src, dst):
94    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
95    st = os.stat(src)
96    mode = stat.S_IMODE(st.st_mode)
97    if hasattr(os, 'utime'):
98        os.utime(dst, (st.st_atime, st.st_mtime))
99    if hasattr(os, 'chmod'):
100        os.chmod(dst, mode)
101    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
102        try:
103            os.chflags(dst, st.st_flags)
104        except OSError, why:
105            for err in 'EOPNOTSUPP', 'ENOTSUP':
106                if hasattr(errno, err) and why.errno == getattr(errno, err):
107                    break
108            else:
109                raise
110
111def copy(src, dst):
112    """Copy data and mode bits ("cp src dst").
113
114    The destination may be a directory.
115
116    """
117    if os.path.isdir(dst):
118        dst = os.path.join(dst, os.path.basename(src))
119    copyfile(src, dst)
120    copymode(src, dst)
121
122def copy2(src, dst):
123    """Copy data and all stat info ("cp -p src dst").
124
125    The destination may be a directory.
126
127    """
128    if os.path.isdir(dst):
129        dst = os.path.join(dst, os.path.basename(src))
130    copyfile(src, dst)
131    copystat(src, dst)
132
133def ignore_patterns(*patterns):
134    """Function that can be used as copytree() ignore parameter.
135
136    Patterns is a sequence of glob-style patterns
137    that are used to exclude files"""
138    def _ignore_patterns(path, names):
139        ignored_names = []
140        for pattern in patterns:
141            ignored_names.extend(fnmatch.filter(names, pattern))
142        return set(ignored_names)
143    return _ignore_patterns
144
145def copytree(src, dst, symlinks=False, ignore=None):
146    """Recursively copy a directory tree using copy2().
147
148    The destination directory must not already exist.
149    If exception(s) occur, an Error is raised with a list of reasons.
150
151    If the optional symlinks flag is true, symbolic links in the
152    source tree result in symbolic links in the destination tree; if
153    it is false, the contents of the files pointed to by symbolic
154    links are copied.
155
156    The optional ignore argument is a callable. If given, it
157    is called with the `src` parameter, which is the directory
158    being visited by copytree(), and `names` which is the list of
159    `src` contents, as returned by os.listdir():
160
161        callable(src, names) -> ignored_names
162
163    Since copytree() is called recursively, the callable will be
164    called once for each directory that is copied. It returns a
165    list of names relative to the `src` directory that should
166    not be copied.
167
168    XXX Consider this example code rather than the ultimate tool.
169
170    """
171    names = os.listdir(src)
172    if ignore is not None:
173        ignored_names = ignore(src, names)
174    else:
175        ignored_names = set()
176
177    os.makedirs(dst)
178    errors = []
179    for name in names:
180        if name in ignored_names:
181            continue
182        srcname = os.path.join(src, name)
183        dstname = os.path.join(dst, name)
184        try:
185            if symlinks and os.path.islink(srcname):
186                linkto = os.readlink(srcname)
187                os.symlink(linkto, dstname)
188            elif os.path.isdir(srcname):
189                copytree(srcname, dstname, symlinks, ignore)
190            else:
191                # Will raise a SpecialFileError for unsupported file types
192                copy2(srcname, dstname)
193        # catch the Error from the recursive copytree so that we can
194        # continue with other files
195        except Error, err:
196            errors.extend(err.args[0])
197        except EnvironmentError, why:
198            errors.append((srcname, dstname, str(why)))
199    try:
200        copystat(src, dst)
201    except OSError, why:
202        if WindowsError is not None and isinstance(why, WindowsError):
203            # Copying file access times may fail on Windows
204            pass
205        else:
206            errors.append((src, dst, str(why)))
207    if errors:
208        raise Error, errors
209
210def rmtree(path, ignore_errors=False, onerror=None):
211    """Recursively delete a directory tree.
212
213    If ignore_errors is set, errors are ignored; otherwise, if onerror
214    is set, it is called to handle the error with arguments (func,
215    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
216    path is the argument to that function that caused it to fail; and
217    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
218    is false and onerror is None, an exception is raised.
219
220    """
221    if ignore_errors:
222        def onerror(*args):
223            pass
224    elif onerror is None:
225        def onerror(*args):
226            raise
227    try:
228        if os.path.islink(path):
229            # symlinks to directories are forbidden, see bug #1669
230            raise OSError("Cannot call rmtree on a symbolic link")
231    except OSError:
232        onerror(os.path.islink, path, sys.exc_info())
233        # can't continue even if onerror hook returns
234        return
235    names = []
236    try:
237        names = os.listdir(path)
238    except os.error, err:
239        onerror(os.listdir, path, sys.exc_info())
240    for name in names:
241        fullname = os.path.join(path, name)
242        try:
243            mode = os.lstat(fullname).st_mode
244        except os.error:
245            mode = 0
246        if stat.S_ISDIR(mode):
247            rmtree(fullname, ignore_errors, onerror)
248        else:
249            try:
250                os.remove(fullname)
251            except os.error, err:
252                onerror(os.remove, fullname, sys.exc_info())
253    try:
254        os.rmdir(path)
255    except os.error:
256        onerror(os.rmdir, path, sys.exc_info())
257
258
259def _basename(path):
260    # A basename() variant which first strips the trailing slash, if present.
261    # Thus we always get the last component of the path, even for directories.
262    sep = os.path.sep + (os.path.altsep or '')
263    return os.path.basename(path.rstrip(sep))
264
265def move(src, dst):
266    """Recursively move a file or directory to another location. This is
267    similar to the Unix "mv" command.
268
269    If the destination is a directory or a symlink to a directory, the source
270    is moved inside the directory. The destination path must not already
271    exist.
272
273    If the destination already exists but is not a directory, it may be
274    overwritten depending on os.rename() semantics.
275
276    If the destination is on our current filesystem, then rename() is used.
277    Otherwise, src is copied to the destination and then removed.
278    A lot more could be done here...  A look at a mv.c shows a lot of
279    the issues this implementation glosses over.
280
281    """
282    real_dst = dst
283    if os.path.isdir(dst):
284        if _samefile(src, dst):
285            # We might be on a case insensitive filesystem,
286            # perform the rename anyway.
287            os.rename(src, dst)
288            return
289
290        real_dst = os.path.join(dst, _basename(src))
291        if os.path.exists(real_dst):
292            raise Error, "Destination path '%s' already exists" % real_dst
293    try:
294        os.rename(src, real_dst)
295    except OSError:
296        if os.path.isdir(src):
297            if _destinsrc(src, dst):
298                raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
299            copytree(src, real_dst, symlinks=True)
300            rmtree(src)
301        else:
302            copy2(src, real_dst)
303            os.unlink(src)
304
305def _destinsrc(src, dst):
306    src = abspath(src)
307    dst = abspath(dst)
308    if not src.endswith(os.path.sep):
309        src += os.path.sep
310    if not dst.endswith(os.path.sep):
311        dst += os.path.sep
312    return dst.startswith(src)
313
314def _get_gid(name):
315    """Returns a gid, given a group name."""
316    if getgrnam is None or name is None:
317        return None
318    try:
319        result = getgrnam(name)
320    except KeyError:
321        result = None
322    if result is not None:
323        return result[2]
324    return None
325
326def _get_uid(name):
327    """Returns an uid, given a user name."""
328    if getpwnam is None or name is None:
329        return None
330    try:
331        result = getpwnam(name)
332    except KeyError:
333        result = None
334    if result is not None:
335        return result[2]
336    return None
337
338def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
339                  owner=None, group=None, logger=None):
340    """Create a (possibly compressed) tar file from all the files under
341    'base_dir'.
342
343    'compress' must be "gzip" (the default), "bzip2", or None.
344
345    'owner' and 'group' can be used to define an owner and a group for the
346    archive that is being built. If not provided, the current owner and group
347    will be used.
348
349    The output tar file will be named 'base_name' +  ".tar", possibly plus
350    the appropriate compression extension (".gz", or ".bz2").
351
352    Returns the output filename.
353    """
354    tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
355    compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
356
357    # flags for compression program, each element of list will be an argument
358    if compress is not None and compress not in compress_ext.keys():
359        raise ValueError, \
360              ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
361
362    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
363    archive_dir = os.path.dirname(archive_name)
364
365    if archive_dir and not os.path.exists(archive_dir):
366        if logger is not None:
367            logger.info("creating %s", archive_dir)
368        if not dry_run:
369            os.makedirs(archive_dir)
370
371
372    # creating the tarball
373    import tarfile  # late import so Python build itself doesn't break
374
375    if logger is not None:
376        logger.info('Creating tar archive')
377
378    uid = _get_uid(owner)
379    gid = _get_gid(group)
380
381    def _set_uid_gid(tarinfo):
382        if gid is not None:
383            tarinfo.gid = gid
384            tarinfo.gname = group
385        if uid is not None:
386            tarinfo.uid = uid
387            tarinfo.uname = owner
388        return tarinfo
389
390    if not dry_run:
391        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
392        try:
393            tar.add(base_dir, filter=_set_uid_gid)
394        finally:
395            tar.close()
396
397    return archive_name
398
399def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
400    # XXX see if we want to keep an external call here
401    if verbose:
402        zipoptions = "-r"
403    else:
404        zipoptions = "-rq"
405    from distutils.errors import DistutilsExecError
406    from distutils.spawn import spawn
407    try:
408        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
409    except DistutilsExecError:
410        # XXX really should distinguish between "couldn't find
411        # external 'zip' command" and "zip failed".
412        raise ExecError, \
413            ("unable to create zip file '%s': "
414            "could neither import the 'zipfile' module nor "
415            "find a standalone zip utility") % zip_filename
416
417def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
418    """Create a zip file from all the files under 'base_dir'.
419
420    The output zip file will be named 'base_name' + ".zip".  Uses either the
421    "zipfile" Python module (if available) or the InfoZIP "zip" utility
422    (if installed and found on the default search path).  If neither tool is
423    available, raises ExecError.  Returns the name of the output zip
424    file.
425    """
426    zip_filename = base_name + ".zip"
427    archive_dir = os.path.dirname(base_name)
428
429    if archive_dir and not os.path.exists(archive_dir):
430        if logger is not None:
431            logger.info("creating %s", archive_dir)
432        if not dry_run:
433            os.makedirs(archive_dir)
434
435    # If zipfile module is not available, try spawning an external 'zip'
436    # command.
437    try:
438        import zipfile
439    except ImportError:
440        zipfile = None
441
442    if zipfile is None:
443        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
444    else:
445        if logger is not None:
446            logger.info("creating '%s' and adding '%s' to it",
447                        zip_filename, base_dir)
448
449        if not dry_run:
450            with zipfile.ZipFile(zip_filename, "w",
451                                 compression=zipfile.ZIP_DEFLATED) as zf:
452                path = os.path.normpath(base_dir)
453                if path != os.curdir:
454                    zf.write(path, path)
455                    if logger is not None:
456                        logger.info("adding '%s'", path)
457                for dirpath, dirnames, filenames in os.walk(base_dir):
458                    for name in sorted(dirnames):
459                        path = os.path.normpath(os.path.join(dirpath, name))
460                        zf.write(path, path)
461                        if logger is not None:
462                            logger.info("adding '%s'", path)
463                    for name in filenames:
464                        path = os.path.normpath(os.path.join(dirpath, name))
465                        if os.path.isfile(path):
466                            zf.write(path, path)
467                            if logger is not None:
468                                logger.info("adding '%s'", path)
469
470    return zip_filename
471
472_ARCHIVE_FORMATS = {
473    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
474    'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
475    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
476    'zip':   (_make_zipfile, [],"ZIP file")
477    }
478
479def get_archive_formats():
480    """Returns a list of supported formats for archiving and unarchiving.
481
482    Each element of the returned sequence is a tuple (name, description)
483    """
484    formats = [(name, registry[2]) for name, registry in
485               _ARCHIVE_FORMATS.items()]
486    formats.sort()
487    return formats
488
489def register_archive_format(name, function, extra_args=None, description=''):
490    """Registers an archive format.
491
492    name is the name of the format. function is the callable that will be
493    used to create archives. If provided, extra_args is a sequence of
494    (name, value) tuples that will be passed as arguments to the callable.
495    description can be provided to describe the format, and will be returned
496    by the get_archive_formats() function.
497    """
498    if extra_args is None:
499        extra_args = []
500    if not isinstance(function, collections.Callable):
501        raise TypeError('The %s object is not callable' % function)
502    if not isinstance(extra_args, (tuple, list)):
503        raise TypeError('extra_args needs to be a sequence')
504    for element in extra_args:
505        if not isinstance(element, (tuple, list)) or len(element) !=2 :
506            raise TypeError('extra_args elements are : (arg_name, value)')
507
508    _ARCHIVE_FORMATS[name] = (function, extra_args, description)
509
510def unregister_archive_format(name):
511    del _ARCHIVE_FORMATS[name]
512
513def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
514                 dry_run=0, owner=None, group=None, logger=None):
515    """Create an archive file (eg. zip or tar).
516
517    'base_name' is the name of the file to create, minus any format-specific
518    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
519    or "gztar".
520
521    'root_dir' is a directory that will be the root directory of the
522    archive; ie. we typically chdir into 'root_dir' before creating the
523    archive.  'base_dir' is the directory where we start archiving from;
524    ie. 'base_dir' will be the common prefix of all files and
525    directories in the archive.  'root_dir' and 'base_dir' both default
526    to the current directory.  Returns the name of the archive file.
527
528    'owner' and 'group' are used when creating a tar archive. By default,
529    uses the current owner and group.
530    """
531    save_cwd = os.getcwd()
532    if root_dir is not None:
533        if logger is not None:
534            logger.debug("changing into '%s'", root_dir)
535        base_name = os.path.abspath(base_name)
536        if not dry_run:
537            os.chdir(root_dir)
538
539    if base_dir is None:
540        base_dir = os.curdir
541
542    kwargs = {'dry_run': dry_run, 'logger': logger}
543
544    try:
545        format_info = _ARCHIVE_FORMATS[format]
546    except KeyError:
547        raise ValueError, "unknown archive format '%s'" % format
548
549    func = format_info[0]
550    for arg, val in format_info[1]:
551        kwargs[arg] = val
552
553    if format != 'zip':
554        kwargs['owner'] = owner
555        kwargs['group'] = group
556
557    try:
558        filename = func(base_name, base_dir, **kwargs)
559    finally:
560        if root_dir is not None:
561            if logger is not None:
562                logger.debug("changing back to '%s'", save_cwd)
563            os.chdir(save_cwd)
564
565    return filename
566