• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2This module defines the PackageManager class which provides an
3implementation of the packaging system API providing methods to fetch,
4upload and remove packages.
5"""
6
7#pylint: disable=missing-docstring
8
9import fcntl
10import logging
11import os
12import re
13import shutil
14
15import common
16from autotest_lib.client.bin import os_dep
17from autotest_lib.client.common_lib import error
18from autotest_lib.client.common_lib import global_config
19from autotest_lib.client.common_lib import utils
20
21
22# the name of the checksum file that stores the packages' checksums
23CHECKSUM_FILE = "packages.checksum"
24
25
26def has_pbzip2():
27    '''Check if parallel bzip2 is available on this system.'''
28    try:
29        os_dep.command('pbzip2')
30    except ValueError:
31        return False
32    return True
33
34
35# is parallel bzip2 available for use?
36_PBZIP2_AVAILABLE = has_pbzip2()
37
38
39def parse_ssh_path(repo):
40    '''
41    Parse ssh://xx@xx/path/to/ and return a tuple with host_line and
42    remote path
43    '''
44
45    match = re.search('^ssh://(.*?)(/.*)$', repo)
46    if match:
47        return match.groups()
48    else:
49        raise error.PackageUploadError(
50            "Incorrect SSH path in global_config: %s" % repo)
51
52
53def repo_run_command(repo, cmd, ignore_status=False, cd=True):
54    """Run a command relative to the repos path"""
55    repo = repo.strip()
56    run_cmd = None
57    cd_str = ''
58    if repo.startswith('ssh://'):
59        username = None
60        hostline, remote_path = parse_ssh_path(repo)
61        if cd:
62            cd_str = 'cd %s && ' % remote_path
63        if '@' in hostline:
64            username, host = hostline.split('@')
65            run_cmd = 'ssh %s@%s "%s%s"' % (username, host, cd_str, cmd)
66        else:
67            run_cmd = 'ssh %s "%s%s"' % (host, cd_str, cmd)
68
69    else:
70        if cd:
71            cd_str = 'cd %s && ' % repo
72        run_cmd = "%s%s" % (cd_str, cmd)
73
74    if run_cmd:
75        return utils.run(run_cmd, ignore_status=ignore_status)
76
77
78def create_directory(repo):
79    remote_path = repo
80    if repo.startswith('ssh://'):
81        _, remote_path = parse_ssh_path(repo)
82    repo_run_command(repo, 'mkdir -p %s' % remote_path, cd=False)
83
84
85def check_diskspace(repo, min_free=None):
86    # Note: 1 GB = 10**9 bytes (SI unit).
87    if min_free is None:
88        min_free = global_config.global_config.get_config_value('PACKAGES',
89                                                          'minimum_free_space',
90                                                          type=int, default=1)
91    try:
92        df = repo_run_command(repo,
93                              'df -PB %d . | tail -1' % 10 ** 9).stdout.split()
94        free_space_gb = int(df[3])
95    except Exception, e:
96        raise error.RepoUnknownError('Unknown Repo Error: %s' % e)
97    if free_space_gb < min_free:
98        raise error.RepoDiskFullError('Not enough disk space available '
99                                      '%sg < %sg' % (free_space_gb, min_free))
100
101
102def check_write(repo):
103    try:
104        repo_testfile = '.repo_test_file'
105        repo_run_command(repo, 'touch %s' % repo_testfile).stdout.strip()
106        repo_run_command(repo, 'rm ' + repo_testfile)
107    except error.CmdError:
108        raise error.RepoWriteError('Unable to write to ' + repo)
109
110
111def trim_custom_directories(repo, older_than_days=None):
112    if not repo:
113        return
114
115    if older_than_days is None:
116        older_than_days = global_config.global_config.get_config_value(
117            'PACKAGES', 'custom_max_age', type=int, default=40)
118    cmd = 'find . -type f -atime +%s -exec rm -f {} \;' % older_than_days
119    repo_run_command(repo, cmd, ignore_status=True)
120
121
122class RepositoryFetcher(object):
123    url = None
124
125
126    def fetch_pkg_file(self, filename, dest_path):
127        """ Fetch a package file from a package repository.
128
129        @param filename: The filename of the package file to fetch.
130        @param dest_path: Destination path to download the file to.
131
132        @raises PackageFetchError if the fetch failed
133        """
134        raise NotImplementedError()
135
136
137class HttpFetcher(RepositoryFetcher):
138    wget_cmd_pattern = 'wget --connect-timeout=15 -nv %s -O %s'
139
140
141    def __init__(self, package_manager, repository_url):
142        """
143        @param repository_url: The base URL of the http repository
144        """
145        self.run_command = package_manager._run_command
146        self.url = repository_url
147
148    def exists(self, destpath, target='file'):
149        """Check if a file or directory exists using `test`.
150
151        This is a wrapper for run_command.
152
153        Args:
154          target: Optional string that should either be 'file' or 'dir'
155                  indicating what should exist.
156        """
157        if target == 'dir':
158            test_cmd = 'test -d %s'
159        else:
160            test_cmd = 'test -e %s'
161
162        try:
163            self.run_command(test_cmd % destpath)
164            return True
165        except (error.CmdError, error.AutoservRunError):
166            return False
167
168    def _quick_http_test(self):
169        """ Run a simple 30 second wget on the repository to see if it is
170        reachable. This avoids the need to wait for a full 10min timeout.
171        """
172        # just make a temp file to write a test fetch into
173        mktemp = 'mktemp -u /tmp/tmp.XXXXXX'
174        dest_file_path = self.run_command(mktemp).stdout.strip()
175
176        try:
177            # build up a wget command
178            http_cmd = self.wget_cmd_pattern % (self.url, dest_file_path)
179            try:
180                self.run_command(http_cmd, _run_command_dargs={'timeout': 30})
181            except Exception, e:
182                msg = 'HTTP test failed, unable to contact %s: %s'
183                raise error.PackageFetchError(msg % (self.url, e))
184        finally:
185            self.run_command('rm -rf %s' % dest_file_path)
186
187
188    def fetch_pkg_file(self, filename, dest_path):
189        logging.info('Fetching %s from %s to %s', filename, self.url,
190                     dest_path)
191
192        # do a quick test to verify the repo is reachable
193        self._quick_http_test()
194
195        # try to retrieve the package via http
196        package_url = os.path.join(self.url, filename)
197        try:
198            cmd = self.wget_cmd_pattern % (package_url, dest_path)
199            result = self.run_command(cmd,
200                                      _run_command_dargs={'timeout': 1200})
201
202            if not self.exists(dest_path):
203                logging.error('wget failed: %s', result)
204                raise error.CmdError(cmd, result)
205
206            logging.info('Successfully fetched %s from %s', filename,
207                         package_url)
208        except error.CmdError as e:
209            # remove whatever junk was retrieved when the get failed
210            self.run_command('rm -f %s' % dest_path)
211
212            raise error.PackageFetchError('%s not found in %s\n%s'
213                    'wget error code: %d' % (filename, package_url,
214                    e.result_obj.stderr, e.result_obj.exit_status))
215
216
217class LocalFilesystemFetcher(RepositoryFetcher):
218    def __init__(self, package_manager, local_dir):
219        self.run_command = package_manager._run_command
220        self.url = local_dir
221
222
223    def fetch_pkg_file(self, filename, dest_path):
224        logging.info('Fetching %s from %s to %s', filename, self.url,
225                     dest_path)
226        local_path = os.path.join(self.url, filename)
227        try:
228            self.run_command('cp %s %s' % (local_path, dest_path))
229            logging.debug('Successfully fetched %s from %s', filename,
230                          local_path)
231        except error.CmdError, e:
232            raise error.PackageFetchError(
233                'Package %s could not be fetched from %s'
234                % (filename, self.url), e)
235
236
237class BasePackageManager(object):
238    def __init__(self, pkgmgr_dir, hostname=None, repo_urls=None,
239                 upload_paths=None, do_locking=True, run_function=utils.run,
240                 run_function_args=[], run_function_dargs={}):
241        '''
242        repo_urls: The list of the repository urls which is consulted
243                   whilst fetching the package
244        upload_paths: The list of the upload of repositories to which
245                      the package is uploaded to
246        pkgmgr_dir : A directory that can be used by the package manager
247                      to dump stuff (like checksum files of the repositories
248                      etc.).
249        do_locking : Enable locking when the packages are installed.
250
251        run_function is used to execute the commands throughout this file.
252        It defaults to utils.run() but a custom method (if provided) should
253        be of the same schema as utils.run. It should return a CmdResult
254        object and throw a CmdError exception. The reason for using a separate
255        function to run the commands is that the same code can be run to fetch
256        a package on the local machine or on a remote machine (in which case
257        ssh_host's run function is passed in for run_function).
258        '''
259        # In memory dictionary that stores the checksum's of packages
260        self._checksum_dict = {}
261
262        self.pkgmgr_dir = pkgmgr_dir
263        self.do_locking = do_locking
264        self.hostname = hostname
265        self.repositories = []
266
267        # Create an internal function that is a simple wrapper of
268        # run_function and takes in the args and dargs as arguments
269        def _run_command(command, _run_command_args=run_function_args,
270                         _run_command_dargs={}):
271            '''
272            Special internal function that takes in a command as
273            argument and passes it on to run_function (if specified).
274            The _run_command_dargs are merged into run_function_dargs
275            with the former having more precedence than the latter.
276            '''
277            new_dargs = dict(run_function_dargs)
278            new_dargs.update(_run_command_dargs)
279            # avoid polluting logs with extremely verbose packaging output
280            new_dargs.update({'stdout_tee' : None})
281
282            return run_function(command, *_run_command_args,
283                                **new_dargs)
284
285        self._run_command = _run_command
286
287        # Process the repository URLs
288        if not repo_urls:
289            repo_urls = []
290        elif hostname:
291            repo_urls = self.get_mirror_list(repo_urls)
292        for url in repo_urls:
293            self.add_repository(url)
294
295        # Process the upload URLs
296        if not upload_paths:
297            self.upload_paths = []
298        else:
299            self.upload_paths = list(upload_paths)
300
301
302    def add_repository(self, repo):
303        if isinstance(repo, basestring):
304            self.repositories.append(self.get_fetcher(repo))
305        elif isinstance(repo, RepositoryFetcher):
306            self.repositories.append(repo)
307        else:
308            raise TypeError("repo must be RepositoryFetcher or url string")
309
310    def exists(self, destpath, target='file'):
311        """Check if a file or directory exists using `test`.
312
313        This is a wrapper for _run_command.
314
315        Args:
316          target: Optional string that should either be 'file' or 'dir'
317                  indicating what should exist.
318        """
319        if target == 'dir':
320            test_cmd = 'test -d %s'
321        else:
322            test_cmd = 'test -e %s'
323
324        try:
325            self._run_command(test_cmd % destpath)
326            return True
327        except (error.CmdError, error.AutoservRunError):
328            return False
329
330    def get_fetcher(self, url):
331        if url.startswith('http://'):
332            return HttpFetcher(self, url)
333        else:
334            return LocalFilesystemFetcher(self, url)
335
336
337    def repo_check(self, repo):
338        '''
339        Check to make sure the repo is in a sane state:
340        ensure we have at least XX amount of free space
341        Make sure we can write to the repo
342        '''
343        if not repo.startswith('/') and not repo.startswith('ssh:'):
344            return
345        try:
346            create_directory(repo)
347            check_diskspace(repo)
348            check_write(repo)
349        except (error.RepoWriteError, error.RepoUnknownError,
350                error.RepoDiskFullError), e:
351            raise error.RepoError("ERROR: Repo %s: %s" % (repo, e))
352
353
354    def upkeep(self, custom_repos=None):
355        '''
356        Clean up custom upload/download areas
357        '''
358        from autotest_lib.server import subcommand
359        if not custom_repos:
360            # Not all package types necessarily require or allow custom repos
361            try:
362                custom_repos = global_config.global_config.get_config_value(
363                    'PACKAGES', 'custom_upload_location').split(',')
364            except global_config.ConfigError:
365                custom_repos = []
366            try:
367                custom_download = global_config.global_config.get_config_value(
368                    'PACKAGES', 'custom_download_location')
369                custom_repos += [custom_download]
370            except global_config.ConfigError:
371                pass
372
373            if not custom_repos:
374                return
375
376        subcommand.parallel_simple(trim_custom_directories, custom_repos,
377                                   log=False)
378
379
380    def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
381                    preserve_install_dir=False, repo_url=None):
382        '''
383        Remove install_dir if it already exists and then recreate it unless
384        preserve_install_dir is specified as True.
385        Fetch the package into the pkg_dir. Untar the package into install_dir
386        The assumption is that packages are of the form :
387        <pkg_type>.<pkg_name>.tar.bz2
388        name        : name of the package
389        type        : type of the package
390        fetch_dir   : The directory into which the package tarball will be
391                      fetched to.
392        install_dir : the directory where the package files will be untarred to
393        repo_url    : the url of the repository to fetch the package from.
394        '''
395
396        # do_locking flag is on by default unless you disable it (typically
397        # in the cases where packages are directly installed from the server
398        # onto the client in which case fcntl stuff wont work as the code
399        # will run on the server in that case..
400        if self.do_locking:
401            lockfile_name = '.%s-%s-lock' % (name, pkg_type)
402            lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
403
404        try:
405            if self.do_locking:
406                fcntl.flock(lockfile, fcntl.LOCK_EX)
407
408            self._run_command('mkdir -p %s' % fetch_dir)
409
410            pkg_name = self.get_tarball_name(name, pkg_type)
411            fetch_path = os.path.join(fetch_dir, pkg_name)
412            try:
413                # Fetch the package into fetch_dir
414                self.fetch_pkg(pkg_name, fetch_path, use_checksum=True)
415
416                # check to see if the install_dir exists and if it does
417                # then check to see if the .checksum file is the latest
418                if (self.exists(install_dir, target='dir') and
419                    not self.untar_required(fetch_path, install_dir)):
420                    return
421
422                # untar the package into install_dir and
423                # update the checksum in that directory
424                if not preserve_install_dir:
425                    # Make sure we clean up the install_dir
426                    self._run_command('rm -rf %s' % install_dir)
427                self._run_command('mkdir -p %s' % install_dir)
428
429                self.untar_pkg(fetch_path, install_dir)
430
431            except error.PackageFetchError, why:
432                raise error.PackageInstallError(
433                    'Installation of %s(type:%s) failed : %s'
434                    % (name, pkg_type, why))
435        finally:
436            if self.do_locking:
437                fcntl.flock(lockfile, fcntl.LOCK_UN)
438                lockfile.close()
439
440
441    def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=False):
442        '''
443        Fetch the package into dest_dir from repo_url. By default repo_url
444        is None and the package is looked in all the repositories specified.
445        Otherwise it fetches it from the specific repo_url.
446        pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
447                                            dep-gcc.tar.bz2, kernel.1-1.rpm)
448        repo_url     : the URL of the repository where the package is located.
449        dest_path    : complete path of where the package will be fetched to.
450        use_checksum : This is set to False to fetch the packages.checksum file
451                       so that the checksum comparison is bypassed for the
452                       checksum file itself. This is used internally by the
453                       packaging system. It should be ignored by externals
454                       callers of this method who use it fetch custom packages.
455        '''
456        # Check if the destination dir exists.
457        if not self.exists(os.path.dirname(dest_path), target='dir'):
458            raise error.PackageFetchError("Please provide a valid "
459                                          "destination: %s " % dest_path)
460
461        # See if the package was already fetched earlier, if so
462        # the checksums need to be compared and the package is now
463        # fetched only if they differ.
464        pkg_exists = self.exists(dest_path)
465
466        # if a repository location is explicitly provided, fetch the package
467        # from there and return
468        if repo_url:
469            repositories = [self.get_fetcher(repo_url)]
470        elif self.repositories:
471            repositories = self.repositories
472        else:
473            raise error.PackageFetchError("No repository urls specified")
474
475        # install the package from the package repos, try the repos in
476        # reverse order, assuming that the 'newest' repos are most desirable
477        for fetcher in reversed(repositories):
478            try:
479                # Fetch the package if it is not there, the checksum does
480                # not match, or checksums are disabled entirely
481                need_to_fetch = (
482                        not use_checksum or not pkg_exists
483                        or not self.compare_checksum(dest_path))
484                if need_to_fetch:
485                    fetcher.fetch_pkg_file(pkg_name, dest_path)
486                    # update checksum so we won't refetch next time.
487                    if use_checksum:
488                        self.update_checksum(dest_path)
489                return
490            except (error.PackageFetchError, error.AutoservRunError) as e:
491                # The package could not be found in this repo, continue looking
492                logging.debug(e)
493
494        repo_url_list = [repo.url for repo in repositories]
495        message = ('%s could not be fetched from any of the repos %s' %
496                   (pkg_name, repo_url_list))
497        logging.error(message)
498        # if we got here then that means the package is not found
499        # in any of the repositories.
500        raise error.PackageFetchError(message)
501
502
503    def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False,
504                   timeout=300):
505        from autotest_lib.server import subcommand
506        if upload_path:
507            upload_path_list = [upload_path]
508            self.upkeep(upload_path_list)
509        elif len(self.upload_paths) > 0:
510            self.upkeep()
511            upload_path_list = self.upload_paths
512        else:
513            raise error.PackageUploadError("Invalid Upload Path specified")
514
515        if update_checksum:
516            # get the packages' checksum file and update it with the current
517            # package's checksum
518            self.update_checksum(pkg_path)
519
520        commands = []
521        for path in upload_path_list:
522            commands.append(subcommand.subcommand(self.upload_pkg_parallel,
523                                                  (pkg_path, path,
524                                                   update_checksum)))
525
526        results = subcommand.parallel(commands, timeout, return_results=True)
527        for result in results:
528            if result:
529                print str(result)
530
531
532    # TODO(aganti): Fix the bug with the current checksum logic where
533    # packages' checksums that are not present consistently in all the
534    # repositories are not handled properly. This is a corner case though
535    # but the ideal solution is to make the checksum file repository specific
536    # and then maintain it.
537    def upload_pkg_parallel(self, pkg_path, upload_path, update_checksum=False):
538        '''
539        Uploads to a specified upload_path or to all the repos.
540        Also uploads the checksum file to all the repos.
541        pkg_path        : The complete path to the package file
542        upload_path     : the absolute path where the files are copied to.
543                          if set to 'None' assumes 'all' repos
544        update_checksum : If set to False, the checksum file is not
545                          going to be updated which happens by default.
546                          This is necessary for custom
547                          packages (like custom kernels and custom tests)
548                          that get uploaded which do not need to be part of
549                          the checksum file and bloat it.
550        '''
551        self.repo_check(upload_path)
552        # upload the package
553        if os.path.isdir(pkg_path):
554            self.upload_pkg_dir(pkg_path, upload_path)
555        else:
556            self.upload_pkg_file(pkg_path, upload_path)
557            if update_checksum:
558                self.upload_pkg_file(self._get_checksum_file_path(),
559                                     upload_path)
560
561
562    def upload_pkg_file(self, file_path, upload_path):
563        '''
564        Upload a single file. Depending on the upload path, the appropriate
565        method for that protocol is called. Currently this simply copies the
566        file to the target directory (but can be extended for other protocols)
567        This assumes that the web server is running on the same machine where
568        the method is being called from. The upload_path's files are
569        basically served by that web server.
570        '''
571        try:
572            if upload_path.startswith('ssh://'):
573                # parse ssh://user@host/usr/local/autotest/packages
574                hostline, remote_path = parse_ssh_path(upload_path)
575                try:
576                    utils.run('scp %s %s:%s' % (file_path, hostline,
577                                                remote_path))
578                    r_path = os.path.join(remote_path,
579                                          os.path.basename(file_path))
580                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, r_path))
581                except error.CmdError:
582                    logging.error("Error uploading to repository %s",
583                                  upload_path)
584            else:
585                shutil.copy(file_path, upload_path)
586                os.chmod(os.path.join(upload_path,
587                                      os.path.basename(file_path)), 0644)
588        except (IOError, os.error), why:
589            logging.error("Upload of %s to %s failed: %s", file_path,
590                          upload_path, why)
591
592
593    def upload_pkg_dir(self, dir_path, upload_path):
594        '''
595        Upload a full directory. Depending on the upload path, the appropriate
596        method for that protocol is called. Currently this copies the whole
597        tmp package directory to the target directory.
598        This assumes that the web server is running on the same machine where
599        the method is being called from. The upload_path's files are
600        basically served by that web server.
601        '''
602        local_path = os.path.join(dir_path, "*")
603        try:
604            if upload_path.startswith('ssh://'):
605                hostline, remote_path = parse_ssh_path(upload_path)
606                try:
607                    utils.run('scp %s %s:%s' % (local_path, hostline,
608                                                remote_path))
609                    ssh_path = os.path.join(remote_path, "*")
610                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, ssh_path))
611                except error.CmdError:
612                    logging.error("Error uploading to repository: %s",
613                                  upload_path)
614            else:
615                utils.run("cp %s %s " % (local_path, upload_path))
616                up_path = os.path.join(upload_path, "*")
617                utils.run("chmod 644 %s" % up_path)
618        except (IOError, os.error), why:
619            raise error.PackageUploadError("Upload of %s to %s failed: %s"
620                                           % (dir_path, upload_path, why))
621
622
623    def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
624        '''
625        Remove the package from the specified remove_path
626        pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
627                                           dep-gcc.tar.bz2)
628        remove_path : the location to remove the package from.
629
630        '''
631        if remove_path:
632            remove_path_list = [remove_path]
633        elif len(self.upload_paths) > 0:
634            remove_path_list = self.upload_paths
635        else:
636            raise error.PackageRemoveError(
637                "Invalid path to remove the pkg from")
638
639        checksum_path = self._get_checksum_file_path()
640
641        if remove_checksum:
642            self.remove_checksum(pkg_name)
643
644        # remove the package and upload the checksum file to the repos
645        for path in remove_path_list:
646            self.remove_pkg_file(pkg_name, path)
647            self.upload_pkg_file(checksum_path, path)
648
649
650    def remove_pkg_file(self, filename, pkg_dir):
651        '''
652        Remove the file named filename from pkg_dir
653        '''
654        try:
655            # Remove the file
656            if pkg_dir.startswith('ssh://'):
657                hostline, remote_path = parse_ssh_path(pkg_dir)
658                path = os.path.join(remote_path, filename)
659                utils.run("ssh %s 'rm -rf %s/%s'" % (hostline, remote_path,
660                          path))
661            else:
662                os.remove(os.path.join(pkg_dir, filename))
663        except (IOError, os.error), why:
664            raise error.PackageRemoveError("Could not remove %s from %s: %s "
665                                           % (filename, pkg_dir, why))
666
667
668    def get_mirror_list(self, repo_urls):
669        '''
670            Stub function for site specific mirrors.
671
672            Returns:
673                Priority ordered list
674        '''
675        return repo_urls
676
677
678    def _get_checksum_file_path(self):
679        '''
680        Return the complete path of the checksum file (assumed to be stored
681        in self.pkgmgr_dir
682        '''
683        return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
684
685
686    def _get_checksum_dict(self):
687        '''
688        Fetch the checksum file if not already fetched. If the checksum file
689        cannot be fetched from the repos then a new file is created with
690        the current package's (specified in pkg_path) checksum value in it.
691        Populate the local checksum dictionary with the values read from
692        the checksum file.
693        The checksum file is assumed to be present in self.pkgmgr_dir
694        '''
695        checksum_path = self._get_checksum_file_path()
696        if not self._checksum_dict:
697            # Fetch the checksum file
698            try:
699                if not self.exists(checksum_path):
700                    # The packages checksum file does not exist locally.
701                    # See if it is present in the repositories.
702                    self.fetch_pkg(CHECKSUM_FILE, checksum_path)
703            except error.PackageFetchError:
704                # This should not happen whilst fetching a package..if a
705                # package is present in the repository, the corresponding
706                # checksum file should also be automatically present. This
707                # case happens only when a package
708                # is being uploaded and if it is the first package to be
709                # uploaded to the repos (hence no checksum file created yet)
710                # Return an empty dictionary in that case
711                return {}
712
713            # Read the checksum file into memory
714            checksum_file_contents = self._run_command('cat '
715                                                       + checksum_path).stdout
716
717            # Return {} if we have an empty checksum file present
718            if not checksum_file_contents.strip():
719                return {}
720
721            # Parse the checksum file contents into self._checksum_dict
722            for line in checksum_file_contents.splitlines():
723                checksum, package_name = line.split(None, 1)
724                self._checksum_dict[package_name] = checksum
725
726        return self._checksum_dict
727
728
729    def _save_checksum_dict(self, checksum_dict):
730        '''
731        Save the checksum dictionary onto the checksum file. Update the
732        local _checksum_dict variable with this new set of values.
733        checksum_dict :  New checksum dictionary
734        checksum_dir  :  The directory in which to store the checksum file to.
735        '''
736        checksum_path = self._get_checksum_file_path()
737        self._checksum_dict = checksum_dict.copy()
738        checksum_contents = '\n'.join(checksum + ' ' + pkg_name
739                                      for pkg_name, checksum in
740                                      checksum_dict.iteritems())
741        # Write the checksum file back to disk
742        self._run_command('echo "%s" > %s' % (checksum_contents,
743                                              checksum_path),
744                          _run_command_dargs={'verbose': False})
745
746
747    def compute_checksum(self, pkg_path):
748        '''
749        Compute the MD5 checksum for the package file and return it.
750        pkg_path : The complete path for the package file
751        '''
752        # Check if the checksum has been pre-calculated.
753        # There are two modes of operation:
754        #
755        # 1. Package is compiled on dev machine / build server : In this
756        # case, we will have the freshest checksum during the install
757        # phase (which was computed and stored during src_compile). The
758        # checksum always gets recomputed during src_compile.
759        #
760        # 2. Package in installed from a fetched prebuilt: Here, we will
761        # have the checksum associated with what was used to compile
762        # the prebuilt. So it is expected to be the same.
763        checksum_path = pkg_path + '.checksum'
764        if os.path.exists(checksum_path):
765            print ("Checksum %s exists" % checksum_path)
766            with open(checksum_path, "r") as f:
767                return f.read().replace('\n', '')
768        md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
769        return md5sum_output.split()[0]
770
771
772    def update_checksum(self, pkg_path):
773        '''
774        Update the checksum of the package in the packages' checksum
775        file. This method is called whenever a package is fetched just
776        to be sure that the checksums in the local file are the latest.
777        pkg_path : The complete path to the package file.
778        '''
779        # Compute the new checksum
780        new_checksum = self.compute_checksum(pkg_path)
781        checksum_dict = self._get_checksum_dict()
782        checksum_dict[os.path.basename(pkg_path)] = new_checksum
783        self._save_checksum_dict(checksum_dict)
784
785
786    def remove_checksum(self, pkg_name):
787        '''
788        Remove the checksum of the package from the packages checksum file.
789        This method is called whenever a package is removed from the
790        repositories in order clean its corresponding checksum.
791        pkg_name :  The name of the package to be removed
792        '''
793        checksum_dict = self._get_checksum_dict()
794        if pkg_name in checksum_dict:
795            del checksum_dict[pkg_name]
796        self._save_checksum_dict(checksum_dict)
797
798
799    def compare_checksum(self, pkg_path):
800        '''
801        Calculate the checksum of the file specified in pkg_path and
802        compare it with the checksum in the checksum file
803        Return True if both match else return False.
804        pkg_path : The full path to the package file for which the
805                   checksum is being compared
806        '''
807        checksum_dict = self._get_checksum_dict()
808        package_name = os.path.basename(pkg_path)
809        if not checksum_dict or package_name not in checksum_dict:
810            return False
811
812        repository_checksum = checksum_dict[package_name]
813        local_checksum = self.compute_checksum(pkg_path)
814        return (local_checksum == repository_checksum)
815
816
817    def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
818        '''
819        Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
820        Excludes the directories specified in exclude_string while tarring
821        the source. Returns the tarball path.
822        '''
823        tarball_path = os.path.join(dest_dir, pkg_name)
824        temp_path = tarball_path + '.tmp'
825        cmd_list = ['tar', '-cf', temp_path, '-C', src_dir]
826        if _PBZIP2_AVAILABLE:
827            cmd_list.append('--use-compress-prog=pbzip2')
828        else:
829            cmd_list.append('-j')
830        if exclude_string is not None:
831            cmd_list.append(exclude_string)
832
833        try:
834            utils.system(' '.join(cmd_list))
835        except:
836            os.unlink(temp_path)
837            raise
838
839        os.rename(temp_path, tarball_path)
840        return tarball_path
841
842
843    def untar_required(self, tarball_path, dest_dir):
844        '''
845        Compare the checksum of the tarball_path with the .checksum file
846        in the dest_dir and return False if it matches. The untar
847        of the package happens only if the checksums do not match.
848        '''
849        checksum_path = os.path.join(dest_dir, '.checksum')
850        try:
851            existing_checksum = self._run_command('cat ' + checksum_path).stdout
852        except (error.CmdError, error.AutoservRunError):
853            # If the .checksum file is not present (generally, this should
854            # not be the case) then return True so that the untar happens
855            return True
856
857        new_checksum = self.compute_checksum(tarball_path)
858        return (new_checksum.strip() != existing_checksum.strip())
859
860
861    def untar_pkg(self, tarball_path, dest_dir):
862        '''
863        Untar the package present in the tarball_path and put a
864        ".checksum" file in the dest_dir containing the checksum
865        of the tarball. This method
866        assumes that the package to be untarred is of the form
867        <name>.tar.bz2
868        '''
869        self._run_command('tar --no-same-owner -xjf %s -C %s' %
870                          (tarball_path, dest_dir))
871        # Put the .checksum file in the install_dir to note
872        # where the package came from
873        pkg_checksum = self.compute_checksum(tarball_path)
874        pkg_checksum_path = os.path.join(dest_dir,
875                                         '.checksum')
876        self._run_command('echo "%s" > %s '
877                          % (pkg_checksum, pkg_checksum_path))
878
879
880    @staticmethod
881    def get_tarball_name(name, pkg_type):
882        """Converts a package name and type into a tarball name.
883
884        @param name: The name of the package
885        @param pkg_type: The type of the package
886
887        @returns A tarball filename for that specific type of package
888        """
889        assert '-' not in pkg_type
890        return '%s-%s.tar.bz2' % (pkg_type, name)
891
892
893    @staticmethod
894    def parse_tarball_name(tarball_name):
895        """Coverts a package tarball name into a package name and type.
896
897        @param tarball_name: The filename of the tarball
898
899        @returns (name, pkg_type) where name is the package name and pkg_type
900            is the package type.
901        """
902        match = re.search(r'^([^-]*)-(.*)\.tar\.bz2$', tarball_name)
903        pkg_type, name = match.groups()
904        return name, pkg_type
905
906
907    def is_url(self, url):
908        """Return true if path looks like a URL"""
909        return url.startswith('http://')
910
911
912    def get_package_name(self, url, pkg_type):
913        '''
914        Extract the group and test name for the url. This method is currently
915        used only for tests.
916        '''
917        if pkg_type == 'test':
918            regex = '[^:]+://(.*)/([^/]*)$'
919            return self._get_package_name(url, regex)
920        else:
921            return ('', url)
922
923
924    def _get_package_name(self, url, regex):
925        if not self.is_url(url):
926            if url.endswith('.tar.bz2'):
927                testname = url.replace('.tar.bz2', '')
928                testname = re.sub(r'(\d*)\.', '', testname)
929                return (testname, testname)
930            else:
931                return ('', url)
932
933        match = re.match(regex, url)
934        if not match:
935            return ('', url)
936        group, filename = match.groups()
937        # Generate the group prefix.
938        group = re.sub(r'\W', '_', group)
939        # Drop the extension to get the raw test name.
940        testname = re.sub(r'\.tar\.bz2', '', filename)
941        # Drop any random numbers at the end of the test name if any
942        testname = re.sub(r'\.(\d*)', '', testname)
943        return (group, testname)
944
945
946class SiteHttpFetcher(HttpFetcher):
947    wget_cmd_pattern = ('wget --connect-timeout=15 --retry-connrefused '
948                        '--wait=5 -nv %s -O %s')
949
950    # shortcut quick http test for now since our dev server does not support
951    # this operation.
952    def _quick_http_test(self):
953        return
954
955
956class PackageManager(BasePackageManager):
957    def get_fetcher(self, url):
958        if url.startswith('http://'):
959            return SiteHttpFetcher(self, url)
960        else:
961            return super(PackageManager, self).get_fetcher(url)
962