• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2009, Daniel Krech All rights reserved.
2# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#  * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#
11#  * Redistributions in binary form must reproduce the above copyright
12# notice, this list of conditions and the following disclaimer in the
13# documentation and/or other materials provided with the distribution.
14#
15#  * Neither the name of the Daniel Krech nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Support for automatically downloading Python packages from an URL."""
32
33
34from __future__ import with_statement
35
36import codecs
37import logging
38import new
39import os
40import shutil
41import sys
42import tarfile
43import tempfile
44import urllib
45import urlparse
46import zipfile
47import zipimport
48
49_log = logging.getLogger(__name__)
50
51
52class AutoInstaller(object):
53
54    """Supports automatically installing Python packages from an URL.
55
56    Supports uncompressed files, .tar.gz, and .zip formats.
57
58    Basic usage:
59
60    installer = AutoInstaller()
61
62    installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
63                      url_subpath="pep8-0.5.0/pep8.py")
64    installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip",
65                      url_subpath="mechanize")
66
67    """
68
69    def __init__(self, append_to_search_path=False, make_package=True,
70                 target_dir=None, temp_dir=None):
71        """Create an AutoInstaller instance, and set up the target directory.
72
73        Args:
74          append_to_search_path: A boolean value of whether to append the
75                                 target directory to the sys.path search path.
76          make_package: A boolean value of whether to make the target
77                        directory a package.  This adds an __init__.py file
78                        to the target directory -- allowing packages and
79                        modules within the target directory to be imported
80                        explicitly using dotted module names.
81          target_dir: The directory path to which packages should be installed.
82                      Defaults to a subdirectory of the folder containing
83                      this module called "autoinstalled".
84          temp_dir: The directory path to use for any temporary files
85                    generated while downloading, unzipping, and extracting
86                    packages to install.  Defaults to a standard temporary
87                    location generated by the tempfile module.  This
88                    parameter should normally be used only for development
89                    testing.
90
91        """
92        if target_dir is None:
93            this_dir = os.path.dirname(__file__)
94            target_dir = os.path.join(this_dir, "autoinstalled")
95
96        # Ensure that the target directory exists.
97        self._set_up_target_dir(target_dir, append_to_search_path, make_package)
98
99        self._target_dir = target_dir
100        self._temp_dir = temp_dir
101
102    def _log_transfer(self, message, source, target, log_method=None):
103        """Log a debug message that involves a source and target."""
104        if log_method is None:
105            log_method = _log.debug
106
107        log_method("%s" % message)
108        log_method('    From: "%s"' % source)
109        log_method('      To: "%s"' % target)
110
111    def _create_directory(self, path, name=None):
112        """Create a directory."""
113        log = _log.debug
114
115        name = name + " " if name is not None else ""
116        log('Creating %sdirectory...' % name)
117        log('    "%s"' % path)
118
119        os.makedirs(path)
120
121    def _write_file(self, path, text, encoding):
122        """Create a file at the given path with given text.
123
124        This method overwrites any existing file.
125
126        """
127        _log.debug("Creating file...")
128        _log.debug('    "%s"' % path)
129        with codecs.open(path, "w", encoding) as file:
130            file.write(text)
131
132    def _set_up_target_dir(self, target_dir, append_to_search_path,
133                           make_package):
134        """Set up a target directory.
135
136        Args:
137          target_dir: The path to the target directory to set up.
138          append_to_search_path: A boolean value of whether to append the
139                                 target directory to the sys.path search path.
140          make_package: A boolean value of whether to make the target
141                        directory a package.  This adds an __init__.py file
142                        to the target directory -- allowing packages and
143                        modules within the target directory to be imported
144                        explicitly using dotted module names.
145
146        """
147        if not os.path.exists(target_dir):
148            self._create_directory(target_dir, "autoinstall target")
149
150        if append_to_search_path:
151            sys.path.append(target_dir)
152
153        if make_package:
154            init_path = os.path.join(target_dir, "__init__.py")
155            if not os.path.exists(init_path):
156                text = ("# This file is required for Python to search this "
157                        "directory for modules.\n")
158                self._write_file(init_path, text, "ascii")
159
160    def _create_scratch_directory_inner(self, prefix):
161        """Create a scratch directory without exception handling.
162
163        Creates a scratch directory inside the AutoInstaller temp
164        directory self._temp_dir, or inside a platform-dependent temp
165        directory if self._temp_dir is None.  Returns the path to the
166        created scratch directory.
167
168        Raises:
169          OSError: [Errno 2] if the containing temp directory self._temp_dir
170                             is not None and does not exist.
171
172        """
173        # The tempfile.mkdtemp() method function requires that the
174        # directory corresponding to the "dir" parameter already exist
175        # if it is not None.
176        scratch_dir = tempfile.mkdtemp(prefix=prefix, dir=self._temp_dir)
177        return scratch_dir
178
179    def _create_scratch_directory(self, target_name):
180        """Create a temporary scratch directory, and return its path.
181
182        The scratch directory is generated inside the temp directory
183        of this AutoInstaller instance.  This method also creates the
184        temp directory if it does not already exist.
185
186        """
187        prefix = target_name + "_"
188        try:
189            scratch_dir = self._create_scratch_directory_inner(prefix)
190        except OSError:
191            # Handle case of containing temp directory not existing--
192            # OSError: [Errno 2] No such file or directory:...
193            temp_dir = self._temp_dir
194            if temp_dir is None or os.path.exists(temp_dir):
195                raise
196            # Else try again after creating the temp directory.
197            self._create_directory(temp_dir, "autoinstall temp")
198            scratch_dir = self._create_scratch_directory_inner(prefix)
199
200        return scratch_dir
201
202    def _url_downloaded_path(self, target_name):
203        """Return the path to the file containing the URL downloaded."""
204        filename = ".%s.url" % target_name
205        path = os.path.join(self._target_dir, filename)
206        return path
207
208    def _is_downloaded(self, target_name, url):
209        """Return whether a package version has been downloaded."""
210        version_path = self._url_downloaded_path(target_name)
211
212        _log.debug('Checking %s URL downloaded...' % target_name)
213        _log.debug('    "%s"' % version_path)
214
215        if not os.path.exists(version_path):
216            # Then no package version has been downloaded.
217            _log.debug("No URL file found.")
218            return False
219
220        with codecs.open(version_path, "r", "utf-8") as file:
221            version = file.read()
222
223        return version.strip() == url.strip()
224
225    def _record_url_downloaded(self, target_name, url):
226        """Record the URL downloaded to a file."""
227        version_path = self._url_downloaded_path(target_name)
228        _log.debug("Recording URL downloaded...")
229        _log.debug('    URL: "%s"' % url)
230        _log.debug('     To: "%s"' % version_path)
231
232        self._write_file(version_path, url, "utf-8")
233
234    def _extract_targz(self, path, scratch_dir):
235        # tarfile.extractall() extracts to a path without the
236        # trailing ".tar.gz".
237        target_basename = os.path.basename(path[:-len(".tar.gz")])
238        target_path = os.path.join(scratch_dir, target_basename)
239
240        self._log_transfer("Starting gunzip/extract...", path, target_path)
241
242        try:
243            tar_file = tarfile.open(path)
244        except tarfile.ReadError, err:
245            # Append existing Error message to new Error.
246            message = ("Could not open tar file: %s\n"
247                       " The file probably does not have the correct format.\n"
248                       " --> Inner message: %s"
249                       % (path, err))
250            raise Exception(message)
251
252        try:
253            # This is helpful for debugging purposes.
254            _log.debug("Listing tar file contents...")
255            for name in tar_file.getnames():
256                _log.debug('    * "%s"' % name)
257            _log.debug("Extracting gzipped tar file...")
258            tar_file.extractall(target_path)
259        finally:
260            tar_file.close()
261
262        return target_path
263
264    # This is a replacement for ZipFile.extractall(), which is
265    # available in Python 2.6 but not in earlier versions.
266    def _extract_all(self, zip_file, target_dir):
267        self._log_transfer("Extracting zip file...", zip_file, target_dir)
268
269        # This is helpful for debugging purposes.
270        _log.debug("Listing zip file contents...")
271        for name in zip_file.namelist():
272            _log.debug('    * "%s"' % name)
273
274        for name in zip_file.namelist():
275            path = os.path.join(target_dir, name)
276            self._log_transfer("Extracting...", name, path)
277
278            if not os.path.basename(path):
279                # Then the path ends in a slash, so it is a directory.
280                self._create_directory(path)
281                continue
282            # Otherwise, it is a file.
283
284            try:
285                # We open this file w/o encoding, as we're reading/writing
286                # the raw byte-stream from the zip file.
287                outfile = open(path, 'wb')
288            except IOError, err:
289                # Not all zip files seem to list the directories explicitly,
290                # so try again after creating the containing directory.
291                _log.debug("Got IOError: retrying after creating directory...")
292                dir = os.path.dirname(path)
293                self._create_directory(dir)
294                outfile = open(path, 'wb')
295
296            try:
297                outfile.write(zip_file.read(name))
298            finally:
299                outfile.close()
300
301    def _unzip(self, path, scratch_dir):
302        # zipfile.extractall() extracts to a path without the
303        # trailing ".zip".
304        target_basename = os.path.basename(path[:-len(".zip")])
305        target_path = os.path.join(scratch_dir, target_basename)
306
307        self._log_transfer("Starting unzip...", path, target_path)
308
309        try:
310            zip_file = zipfile.ZipFile(path, "r")
311        except zipfile.BadZipfile, err:
312            message = ("Could not open zip file: %s\n"
313                       " --> Inner message: %s"
314                       % (path, err))
315            raise Exception(message)
316
317        try:
318            self._extract_all(zip_file, scratch_dir)
319        finally:
320            zip_file.close()
321
322        return target_path
323
324    def _prepare_package(self, path, scratch_dir):
325        """Prepare a package for use, if necessary, and return the new path.
326
327        For example, this method unzips zipped files and extracts
328        tar files.
329
330        Args:
331          path: The path to the downloaded URL contents.
332          scratch_dir: The scratch directory.  Note that the scratch
333                       directory contains the file designated by the
334                       path parameter.
335
336        """
337        # FIXME: Add other natural extensions.
338        if path.endswith(".zip"):
339            new_path = self._unzip(path, scratch_dir)
340        elif path.endswith(".tar.gz"):
341            new_path = self._extract_targz(path, scratch_dir)
342        else:
343            # No preparation is needed.
344            new_path = path
345
346        return new_path
347
348    def _download_to_stream(self, url, stream):
349        """Download an URL to a stream, and return the number of bytes."""
350        try:
351            netstream = urllib.urlopen(url)
352        except IOError, err:
353            # Append existing Error message to new Error.
354            message = ('Could not download Python modules from URL "%s".\n'
355                       " Make sure you are connected to the internet.\n"
356                       " You must be connected to the internet when "
357                       "downloading needed modules for the first time.\n"
358                       " --> Inner message: %s"
359                       % (url, err))
360            raise IOError(message)
361        code = 200
362        if hasattr(netstream, "getcode"):
363            code = netstream.getcode()
364        if not 200 <= code < 300:
365            raise ValueError("HTTP Error code %s" % code)
366
367        BUFSIZE = 2**13  # 8KB
368        bytes = 0
369        while True:
370            data = netstream.read(BUFSIZE)
371            if not data:
372                break
373            stream.write(data)
374            bytes += len(data)
375        netstream.close()
376        return bytes
377
378    def _download(self, url, scratch_dir):
379        """Download URL contents, and return the download path."""
380        url_path = urlparse.urlsplit(url)[2]
381        url_path = os.path.normpath(url_path)  # Removes trailing slash.
382        target_filename = os.path.basename(url_path)
383        target_path = os.path.join(scratch_dir, target_filename)
384
385        self._log_transfer("Starting download...", url, target_path)
386
387        with open(target_path, "wb") as stream:
388            bytes = self._download_to_stream(url, stream)
389
390        _log.debug("Downloaded %s bytes." % bytes)
391
392        return target_path
393
394    def _install(self, scratch_dir, package_name, target_path, url,
395                 url_subpath):
396        """Install a python package from an URL.
397
398        This internal method overwrites the target path if the target
399        path already exists.
400
401        """
402        path = self._download(url=url, scratch_dir=scratch_dir)
403        path = self._prepare_package(path, scratch_dir)
404
405        if url_subpath is None:
406            source_path = path
407        else:
408            source_path = os.path.join(path, url_subpath)
409
410        if os.path.exists(target_path):
411            _log.debug('Refreshing install: deleting "%s".' % target_path)
412            if os.path.isdir(target_path):
413                shutil.rmtree(target_path)
414            else:
415                os.remove(target_path)
416
417        self._log_transfer("Moving files into place...", source_path, target_path)
418
419        # The shutil.move() command creates intermediate directories if they
420        # do not exist, but we do not rely on this behavior since we
421        # need to create the __init__.py file anyway.
422        shutil.move(source_path, target_path)
423
424        self._record_url_downloaded(package_name, url)
425
426    def install(self, url, should_refresh=False, target_name=None,
427                url_subpath=None):
428        """Install a python package from an URL.
429
430        Args:
431          url: The URL from which to download the package.
432
433        Optional Args:
434          should_refresh: A boolean value of whether the package should be
435                          downloaded again if the package is already present.
436          target_name: The name of the folder or file in the autoinstaller
437                       target directory at which the package should be
438                       installed.  Defaults to the base name of the
439                       URL sub-path.  This parameter must be provided if
440                       the URL sub-path is not specified.
441          url_subpath: The relative path of the URL directory that should
442                       be installed.  Defaults to the full directory, or
443                       the entire URL contents.
444
445        """
446        if target_name is None:
447            if not url_subpath:
448                raise ValueError('The "target_name" parameter must be '
449                                 'provided if the "url_subpath" parameter '
450                                 "is not provided.")
451            # Remove any trailing slashes.
452            url_subpath = os.path.normpath(url_subpath)
453            target_name = os.path.basename(url_subpath)
454
455        target_path = os.path.join(self._target_dir, target_name)
456        if not should_refresh and self._is_downloaded(target_name, url):
457            _log.debug('URL for %s already downloaded.  Skipping...'
458                       % target_name)
459            _log.debug('    "%s"' % url)
460            return
461
462        self._log_transfer("Auto-installing package: %s" % target_name,
463                            url, target_path, log_method=_log.info)
464
465        # The scratch directory is where we will download and prepare
466        # files specific to this install until they are ready to move
467        # into place.
468        scratch_dir = self._create_scratch_directory(target_name)
469
470        try:
471            self._install(package_name=target_name,
472                          target_path=target_path,
473                          scratch_dir=scratch_dir,
474                          url=url,
475                          url_subpath=url_subpath)
476        except Exception, err:
477            # Append existing Error message to new Error.
478            message = ("Error auto-installing the %s package to:\n"
479                       ' "%s"\n'
480                       " --> Inner message: %s"
481                       % (target_name, target_path, err))
482            raise Exception(message)
483        finally:
484            _log.debug('Cleaning up: deleting "%s".' % scratch_dir)
485            shutil.rmtree(scratch_dir)
486        _log.debug('Auto-installed %s to:' % target_name)
487        _log.debug('    "%s"' % target_path)
488
489
490if __name__=="__main__":
491
492    # Configure the autoinstall logger to log DEBUG messages for
493    # development testing purposes.
494    console = logging.StreamHandler()
495
496    formatter = logging.Formatter('%(name)s: %(levelname)-8s %(message)s')
497    console.setFormatter(formatter)
498    _log.addHandler(console)
499    _log.setLevel(logging.DEBUG)
500
501    # Use a more visible temp directory for debug purposes.
502    this_dir = os.path.dirname(__file__)
503    target_dir = os.path.join(this_dir, "autoinstalled")
504    temp_dir = os.path.join(target_dir, "Temp")
505
506    installer = AutoInstaller(target_dir=target_dir,
507                              temp_dir=temp_dir)
508
509    installer.install(should_refresh=False,
510                      target_name="pep8.py",
511                      url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
512                      url_subpath="pep8-0.5.0/pep8.py")
513    installer.install(should_refresh=False,
514                      target_name="mechanize",
515                      url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip",
516                      url_subpath="mechanize")
517
518