• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5# repohooks/pre-upload.py currently does not run pylint. But for developers who
6# want to check their code manually we disable several harmless pylint warnings
7# which just distract from more serious remaining issues.
8#
9# The instance variables _host and _install_paths are not defined in __init__().
10# pylint: disable=attribute-defined-outside-init
11#
12# Many short variable names don't follow the naming convention.
13# pylint: disable=invalid-name
14#
15# _parse_result() and _dir_size() don't access self and could be functions.
16# pylint: disable=no-self-use
17
18import contextlib
19import errno
20import glob
21import hashlib
22import logging
23import os
24import pipes
25import re
26import shutil
27import stat
28import tempfile
29import urlparse
30
31from autotest_lib.client.bin import utils as client_utils
32from autotest_lib.client.common_lib import error
33from autotest_lib.client.common_lib.cros import dev_server
34from autotest_lib.server import test
35from autotest_lib.server import utils
36from autotest_lib.server.cros import cts_expected_failure_parser
37from autotest_lib.server.cros import tradefed_chromelogin as login
38from autotest_lib.server.cros import tradefed_constants as constants
39from autotest_lib.server.cros import tradefed_utils
40
41# For convenience, add to our scope.
42parse_tradefed_result = tradefed_utils.parse_tradefed_result
43adb_keepalive = tradefed_utils.adb_keepalive
44
45
46class TradefedTest(test.test):
47    """Base class to prepare DUT to run tests via tradefed."""
48    version = 1
49
50    # Default and upperbounds of max_retry, based on board and revision
51    # after branching (that is, 'y' of R74-12345.y.z).
52    #
53    # By default, 0<=y<1 does 5 retries and 1<=y does 10. The |max_retry|
54    # parameter in control files can override the count, within the
55    # _BRANCH_MAX_RETRY limit below.
56    _BRANCH_DEFAULT_RETRY = [(0, 5), (1, 10)]  # dev=5, beta=stable=10
57    _BRANCH_MAX_RETRY = [(0, 5), (1, 10),      # dev=5, beta=10, stable=99
58        (constants.APPROXIMATE_STABLE_BRANCH_NUMBER, 99)]
59    # TODO(kinaba): betty-arcnext
60    _BOARD_MAX_RETRY = {'betty': 0}
61
62    _SHARD_CMD = None
63    _board_arch = None
64    _board_name = None
65    _release_branch_number = None  # The 'y' of OS version Rxx-xxxxx.y.z
66    _android_version = None
67    _num_media_bundles = 0
68    _perf_results = []
69
70    def _log_java_version(self):
71        """Quick sanity and spew of java version installed on the server."""
72        utils.run(
73            'java',
74            args=('-version',),
75            ignore_status=False,
76            verbose=True,
77            stdout_tee=utils.TEE_TO_LOGS,
78            stderr_tee=utils.TEE_TO_LOGS)
79
80    def initialize(self,
81                   bundle=None,
82                   uri=None,
83                   host=None,
84                   hosts=None,
85                   max_retry=None,
86                   load_waivers=True,
87                   retry_manual_tests=False,
88                   warn_on_test_retry=True,
89                   hard_reboot_on_failure=False):
90        """Sets up the tools and binary bundles for the test."""
91        self._install_paths = []
92        # TODO(pwang): Remove host if we enable multiple hosts everywhere.
93        self._hosts = [host] if host else hosts
94        for host in self._hosts:
95            logging.info('Hostname: %s', host.host_port)
96        self._verify_hosts()
97
98        self._max_retry = self._get_max_retry(max_retry)
99        self._warn_on_test_retry = warn_on_test_retry
100        # Tests in the lab run within individual lxc container instances.
101        if utils.is_in_container():
102            cache_root = constants.TRADEFED_CACHE_CONTAINER
103        else:
104            cache_root = constants.TRADEFED_CACHE_LOCAL
105
106        # TODO(ihf): reevaluate this again when we run out of memory. We could
107        # for example use 32 bit java on the first run but not during retries.
108        # b/62895114. If select_32bit_java gets deleted for good also remove it
109        # from the base image.
110        # Try to save server memory (crbug.com/717413).
111        # select_32bit_java()
112
113        # The content of the cache survives across jobs.
114        self._safe_makedirs(cache_root)
115        self._tradefed_cache = os.path.join(cache_root, 'cache')
116        self._tradefed_cache_lock = os.path.join(cache_root, 'lock')
117        self._tradefed_cache_dirty = os.path.join(cache_root, 'dirty')
118        # The content of the install location does not survive across jobs and
119        # is isolated (by using a unique path)_against other autotest instances.
120        # This is not needed for the lab, but if somebody wants to run multiple
121        # TradedefTest instance.
122        self._tradefed_install = tempfile.mkdtemp(
123            prefix=constants.TRADEFED_PREFIX)
124        # Under lxc the cache is shared between multiple autotest/tradefed
125        # instances. We need to synchronize access to it. All binaries are
126        # installed through the (shared) cache into the local (unshared)
127        # lxc/autotest instance storage.
128        # If clearing the cache it must happen before all downloads.
129        self._clean_download_cache_if_needed()
130        # Set permissions (rwxr-xr-x) to the executable binaries.
131        permission = (
132            stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH
133            | stat.S_IXOTH)
134        self._install_files(constants.ADB_DIR, constants.ADB_FILES,
135                            permission)
136        self._install_files(constants.SDK_TOOLS_DIR,
137                            constants.SDK_TOOLS_FILES, permission)
138
139        # Install the tradefed bundle.
140        bundle_install_path = self._install_bundle(
141            uri or self._get_default_bundle_url(bundle))
142        self._repository = os.path.join(bundle_install_path,
143                                        self._get_tradefed_base_dir())
144
145        # Load expected test failures to exclude them from re-runs.
146        self._waivers = set()
147        if load_waivers:
148            self._waivers.update(
149                    self._get_expected_failures('expectations', bundle))
150        if not retry_manual_tests:
151            self._waivers.update(
152                    self._get_expected_failures('manual_tests', bundle))
153
154        # Load modules with no tests.
155        self._notest_modules = self._get_expected_failures('notest_modules',
156                bundle)
157        self._hard_reboot_on_failure = hard_reboot_on_failure
158
159    def cleanup(self):
160        """Cleans up any dirtied state."""
161        # Kill any lingering adb servers.
162        for host in self._hosts:
163            try:
164                self._run_adb_cmd(host, verbose=True, args=('kill-server',))
165            except (error.CmdError, AttributeError):
166                pass
167        logging.info('Cleaning up %s.', self._tradefed_install)
168        try:
169            shutil.rmtree(self._tradefed_install)
170        except IOError:
171            pass
172
173        # Create perf data for Chromeperf.
174        for perf in self._perf_results:
175            data = dict(
176                units='count',
177                higher_is_better=False,
178                replace_existing_values=True,
179            )
180            data.update(perf)
181            self.output_perf_value(**data)
182
183    def _verify_hosts(self):
184        """Verify all hosts' ChromeOS consistency."""
185        # Check release builder path. E.g. cave-release/R66-10435.0.0
186        release_builder_path = set(host.get_release_builder_path()
187                                   for host in self._hosts)
188        if len(release_builder_path) > 1:
189            raise error.TestFail('Hosts\' CHROMEOS_RELEASE_BUILDER_PATH is '
190                                 'different: %s', release_builder_path)
191
192        # Check ChromeOS ARC VERSION. E.g.
193        arc_version = set(host.get_arc_version() for host in self._hosts)
194        if len(arc_version) > 1:
195            raise error.TestFail('Hosts\' CHROMEOS_ARC_VERSION is different: '
196                                 '%s', arc_version)
197
198        # Check ChromeOS model for unibuild.
199        # TODO(pwang): Adding a check if we found how to detect host's model.
200
201    def _verify_arc_hosts(self):
202        """Verify all hosts' Android configuration consistency.
203
204        This method should only be called after all hosts' Android has been
205        successfully booted up."""
206        # Check all hosts have same Android fingerprint.
207        fingerprint = set(self._run_adb_cmd(
208            host,
209            args=('shell', 'getprop', 'ro.build.fingerprint')).stdout
210            for host in self._hosts)
211        if len(fingerprint) > 1:
212            raise error.TestFail('Hosts\' supported fingerprint is different: '
213                                 '%s', fingerprint)
214
215        # Check all hosts support same abilist.
216        abilist = set(self._run_adb_cmd(
217            host,
218            args=('shell', 'getprop', 'ro.product.cpu.abilist')).stdout
219            for host in self._hosts)
220        if len(abilist) > 1:
221            raise error.TestFail('Hosts\' supported abilist is different: %s',
222                                 abilist)
223        self._abilist = str(list(abilist)[0]).split(',')
224
225    def _calculate_timeout_factor(self, bundle):
226        """ Calculate the multiplicative factor for timeout.
227
228        The value equals to the times each test case is run, which is determined
229        by the intersection of the supported ABIs of the CTS/GTS bundle and that
230        of the tested device."""
231        arm_abis = set(('armeabi-v7a', 'arm64-v8a'))
232        x86_abis = set(('x86', 'x86_64'))
233        if bundle == 'arm':
234            tradefed_abis = arm_abis
235        elif bundle == 'x86':
236            tradefed_abis = x86_abis
237        else:
238            tradefed_abis = arm_abis | x86_abis
239        self._timeout_factor = len(set(self._get_abilist()) & tradefed_abis)
240
241    @contextlib.contextmanager
242    def _login_chrome(self, **cts_helper_kwargs):
243        """Returns Chrome log-in context manager.
244
245        Please see also cheets_StartAndroid for details about how this works.
246        """
247        # TODO(pwang): Chromelogin takes 10+ seconds for it to successfully
248        #              enter. Parallelize if this becomes a bottleneck.
249        instances = []
250        for host in self._hosts:
251            instances.append(login.ChromeLogin(host, cts_helper_kwargs))
252
253        for instance in instances:
254            instance.enter()
255        yield instances
256        for instance in instances:
257            instance.exit()
258
259    def _get_adb_targets(self):
260        """Get a list of adb targets."""
261        return [self._get_adb_target(host) for host in self._hosts]
262
263    def _get_adb_target(self, host):
264        """Get the adb target format.
265
266        This method is slightly different from host.host_port as we need to
267        explicitly specify the port so the serial name of adb target would
268        match."""
269        return '{}:{}'.format(host.hostname, host.port)
270
271    def _run_adb_cmd(self, host=None, **kwargs):
272        """Running adb command.
273
274        @param host: DUT that want to connect to. (None if the adb command is
275                     intended to run in the server. eg. keygen)
276        """
277        # As of N, tradefed could not specify which adb socket to use, which use
278        # tcp:localhost:5037 by default.
279        adb_global_option = ('-H', 'localhost', '-P', '5037')
280        if host:
281            host_port = self._get_adb_target(host)
282            adb_global_option = ('-s', host_port)
283        kwargs['args'] = adb_global_option + kwargs.get('args', ())
284        result = self._run('adb', **kwargs)
285        logging.info('adb %s:\n%s', ' '.join(kwargs.get('args')),
286                     result.stdout + result.stderr)
287        return result
288
289    def _try_adb_connect(self, host):
290        """Attempts to connect to adb on the DUT.
291
292        @param host: DUT that need to be connected.
293        @return boolean indicating if adb connected successfully.
294        """
295        # This may fail return failure due to a race condition in adb connect
296        # (b/29370989). If adb is already connected, this command will
297        # immediately return success.
298        host_port = self._get_adb_target(host)
299        result = self._run_adb_cmd(
300            host, args=('connect', host_port), verbose=True, ignore_status=True)
301        if result.exit_status != 0:
302            return False
303
304        result = self._run_adb_cmd(host, args=('devices',))
305        if not re.search(r'{}\s+(device|unauthorized)'.format(
306                re.escape(host_port)), result.stdout):
307            logging.info('No result found in with pattern: %s',
308                         r'{}\s+(device|unauthorized)'.format(
309                             re.escape(host_port)))
310            return False
311
312        # Actually test the connection with an adb command as there can be
313        # a race between detecting the connected device and actually being
314        # able to run a commmand with authenticated adb.
315        result = self._run_adb_cmd(
316            host, args=('shell', 'exit'), ignore_status=True)
317        return result.exit_status == 0
318
319    def _android_shell(self, host, command):
320        """Run a command remotely on the device in an android shell
321
322        This function is strictly for internal use only, as commands do not run
323        in a fully consistent Android environment. Prefer adb shell instead.
324        """
325        host.run('android-sh -c ' + pipes.quote(command))
326
327    def _write_android_file(self, host, filename, data):
328        """Writes a file to a location relative to the android container.
329
330        This is an internal function used to bootstrap adb.
331        Tests should use adb push to write files.
332        """
333        android_cmd = 'echo %s > %s' % (pipes.quote(data),
334                                        pipes.quote(filename))
335        self._android_shell(host, android_cmd)
336
337    def _connect_adb(self, host, pubkey_path):
338        """Sets up ADB connection to the ARC container.
339
340        @param host: DUT that should be connected to.
341        @param pubkey_path: public key that adb keygen generated.
342        """
343        logging.info('Setting up adb connection.')
344        # Generate and push keys for adb.
345        # TODO(elijahtaylor): Extract this code to arc_common and de-duplicate
346        # code in arc.py on the client side tests.
347        with open(pubkey_path, 'r') as f:
348            self._write_android_file(host, constants.ANDROID_ADB_KEYS_PATH,
349                                     f.read())
350        self._android_shell(
351            host, 'restorecon ' + pipes.quote(constants.ANDROID_ADB_KEYS_PATH))
352
353        # This starts adbd.
354        self._android_shell(host, 'setprop sys.usb.config mtp,adb')
355
356        # Also let it be automatically started upon reboot.
357        self._android_shell(host, 'setprop persist.sys.usb.config mtp,adb')
358
359        # adbd may take some time to come up. Repeatedly try to connect to adb.
360        utils.poll_for_condition(
361            lambda: self._try_adb_connect(host),
362            exception=error.TestFail('Error: Failed to set up adb connection'),
363            timeout=constants.ADB_READY_TIMEOUT_SECONDS,
364            sleep_interval=constants.ADB_POLLING_INTERVAL_SECONDS)
365
366        logging.info('Successfully setup adb connection.')
367
368    def _wait_for_arc_boot(self, host):
369        """Wait until ARC is fully booted.
370
371        Tests for the presence of the intent helper app to determine whether ARC
372        has finished booting.
373        @param host: DUT that need to be connected to.
374        """
375
376        def _intent_helper_running():
377            result = self._run_adb_cmd(
378                host,
379                args=('shell', 'pgrep', '-f', 'org.chromium.arc.intent_helper'),
380                ignore_status=True)
381            return bool(result.stdout)
382
383        utils.poll_for_condition(
384            _intent_helper_running,
385            exception=error.TestFail(
386                'Error: Timed out waiting for intent helper.'),
387            timeout=constants.ARC_READY_TIMEOUT_SECONDS,
388            sleep_interval=constants.ARC_POLLING_INTERVAL_SECONDS)
389
390    def _disable_adb_install_dialog(self, host):
391        """Disables a dialog shown on adb install execution.
392
393        By default, on adb install execution, "Allow Google to regularly check
394        device activity ... " dialog is shown. It requires manual user action
395        so that tests are blocked at the point.
396        This method disables it.
397        """
398        logging.info('Disabling the adb install dialog.')
399        result = self._run_adb_cmd(
400            host,
401            verbose=True,
402            args=('shell', 'settings', 'put', 'global',
403                  'verifier_verify_adb_installs', '0'))
404        logging.info('Disable adb dialog: %s', result.stdout)
405
406    def _ready_arc(self):
407        """Ready ARC and adb in parallel for running tests via tradefed."""
408        # Generate the adb keys on server.
409        key_path = os.path.join(self.tmpdir, 'test_key')
410        pubkey_path = key_path + '.pub'
411        self._run_adb_cmd(verbose=True, args=('keygen', pipes.quote(key_path)))
412        os.environ['ADB_VENDOR_KEYS'] = key_path
413        # Kill existing adb server to ensure that the env var is picked up.
414        self._run_adb_cmd(verbose=True, args=('kill-server',))
415
416        # TODO(pwang): connect_adb takes 10+ seconds on a single DUT.
417        #              Parallelize it if it becomes a bottleneck.
418        for host in self._hosts:
419            self._connect_adb(host, pubkey_path)
420            self._disable_adb_install_dialog(host)
421            self._wait_for_arc_boot(host)
422        self._verify_arc_hosts()
423
424    def _safe_makedirs(self, path):
425        """Creates a directory at |path| and its ancestors.
426
427        Unlike os.makedirs(), ignore errors even if directories exist.
428        """
429        try:
430            os.makedirs(path)
431        except OSError as e:
432            if not (e.errno == errno.EEXIST and os.path.isdir(path)):
433                raise
434
435    def _unzip(self, filename):
436        """Unzip the file.
437
438        The destination directory name will be the stem of filename.
439        E.g., _unzip('foo/bar/baz.zip') will create directory at
440        'foo/bar/baz', and then will inflate zip's content under the directory.
441        If here is already a directory at the stem, that directory will be used.
442
443        @param filename: Path to the zip archive.
444        @return Path to the inflated directory.
445        """
446        destination = os.path.splitext(filename)[0]
447        if os.path.isdir(destination):
448            logging.info('Skipping unzip %s, reusing content of %s', filename,
449                         destination)
450            return destination
451        tmp = tempfile.mkdtemp(dir=os.path.dirname(filename))
452        logging.info('Begin unzip %s', filename)
453        try:
454            utils.run('unzip', args=('-d', tmp, filename))
455        except:
456            logging.error('Failed unzip, cleaning up.')
457            # Clean up just created files.
458            shutil.rmtree(tmp, ignore_errors=True)
459            raise
460        logging.info('End unzip %s', filename)
461        try:
462            os.renames(tmp, destination)
463        except:
464            logging.error('Failed rename, cleaning up.')
465            shutil.rmtree(destination, ignore_errors=True)
466            shutil.rmtree(tmp, ignore_errors=True)
467            raise
468        return destination
469
470    def _dir_size(self, directory):
471        """Compute recursive size in bytes of directory."""
472        size = 0
473        for root, _, files in os.walk(directory):
474            for name in files:
475                try:
476                    size += os.path.getsize(os.path.join(root, name))
477                except OSError:
478                    logging.error('Inaccessible path (crbug/793696): %s/%s',
479                                  root, name)
480        return size
481
482    def _invalidate_download_cache(self):
483        """Marks the download cache for deferred deletion.
484
485        Used to make cache file operations atomic across failures and reboots.
486        The caller is responsible to hold the lock to the cache.
487        """
488        if not os.path.exists(self._tradefed_cache_dirty):
489            os.mkdir(self._tradefed_cache_dirty)
490
491    def _validate_download_cache(self):
492        """Validates and unmarks the download cache from deletion.
493
494        Used to make cache file operations atomic across failures and reboots.
495        The caller is responsible to hold the lock to the cache.
496        """
497        shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True)
498
499    def _clean_download_cache_if_needed(self, force=False):
500        """Invalidates cache to prevent it from growing too large."""
501        # If the cache is large enough to hold a working set, we can simply
502        # delete everything without thrashing.
503        # TODO(ihf): Investigate strategies like LRU.
504        clean = force
505        with tradefed_utils.lock(self._tradefed_cache_lock):
506            size = self._dir_size(self._tradefed_cache)
507            if size > constants.TRADEFED_CACHE_MAX_SIZE:
508                logging.info(
509                    'Current cache size=%d got too large. Clearing %s.', size,
510                    self._tradefed_cache)
511                clean = True
512            else:
513                logging.info('Current cache size=%d of %s.', size,
514                             self._tradefed_cache)
515            if os.path.exists(self._tradefed_cache_dirty):
516                logging.info('Found dirty cache.')
517                clean = True
518            if clean:
519                logging.warning('Cleaning download cache.')
520                shutil.rmtree(self._tradefed_cache, ignore_errors=True)
521                self._safe_makedirs(self._tradefed_cache)
522                shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True)
523
524    def _download_to_cache(self, uri):
525        """Downloads the uri from the storage server.
526
527        It always checks the cache for available binaries first and skips
528        download if binaries are already in cache.
529
530        The caller of this function is responsible for holding the cache lock.
531
532        @param uri: The Google Storage or dl.google.com uri.
533        @return Path to the downloaded object, name.
534        """
535        # We are hashing the uri instead of the binary. This is acceptable, as
536        # the uris are supposed to contain version information and an object is
537        # not supposed to be changed once created.
538        output_dir = os.path.join(self._tradefed_cache,
539                                  hashlib.md5(uri).hexdigest())
540        # Check for existence of cache entry. We check for directory existence
541        # instead of file existence, so that _install_bundle can delete original
542        # zip files to save disk space.
543        if os.path.exists(output_dir):
544            # TODO(crbug.com/800657): Mitigation for the invalid state. Normally
545            # this should not happen, but when a lock is force borken due to
546            # high IO load, multiple processes may enter the critical section
547            # and leave a bad state permanently.
548            if os.listdir(output_dir):
549                logging.info('Skipping download of %s, reusing content of %s.',
550                             uri, output_dir)
551                return os.path.join(output_dir,
552                    os.path.basename(urlparse.urlparse(uri).path))
553            logging.error('Empty cache entry detected %s', output_dir)
554        return self._download_to_dir(uri, output_dir)
555
556    def _download_to_dir(self, uri, output_dir):
557        """Downloads the gs|http|https uri from the storage server.
558
559        @param uri: The Google Storage or dl.google.com uri.
560        @output_dir: The directory where the downloaded file should be placed.
561        @return Path to the downloaded object, name.
562        """
563        # Split uri into 3 pieces for use by gsutil and also by wget.
564        parsed = urlparse.urlparse(uri)
565        filename = os.path.basename(parsed.path)
566        output = os.path.join(output_dir, filename)
567
568        self._safe_makedirs(output_dir)
569        if parsed.scheme not in ['gs', 'http', 'https']:
570            raise error.TestFail(
571                'Error: Unknown download scheme %s' % parsed.scheme)
572        if parsed.scheme in ['http', 'https']:
573            logging.info('Using wget to download %s to %s.', uri, output_dir)
574            # We are downloading 1 file at a time, hence using -O over -P.
575            utils.run(
576                'wget',
577                args=('--report-speed=bits', '-O', output, uri),
578                verbose=True)
579            return output
580
581        if not client_utils.is_moblab():
582            # If the machine can access to the storage server directly,
583            # defer to "gsutil" for downloading.
584            logging.info('Not in lab. Downloading %s directly to %s.',
585                         uri, output)
586            # b/17445576: gsutil rsync of individual files is not implemented.
587            utils.run('gsutil', args=('cp', uri, output), verbose=True)
588            return output
589
590        # We are in the moblab. Because the machine cannot access the storage
591        # server directly, use dev server to proxy.
592        logging.info('In lab. Downloading %s by staging to %s.',
593                     uri, output)
594
595        dirname = os.path.dirname(parsed.path)
596        archive_url = '%s://%s%s' % (parsed.scheme, parsed.netloc, dirname)
597
598        # First, request the devserver to download files into the lab network.
599        # TODO(ihf): Switch stage_artifacts to honor rsync. Then we don't have
600        # to shuffle files inside of tarballs.
601        info = self._hosts[0].host_info_store.get()
602        ds = dev_server.ImageServer.resolve(info.build)
603        ds.stage_artifacts(
604            info.build, files=[filename], archive_url=archive_url)
605
606        # Then download files from the dev server.
607        # TODO(ihf): use rsync instead of wget. Are there 3 machines involved?
608        # Itself, dev_server plus DUT? Or is there just no rsync in moblab?
609        ds_src = '/'.join([ds.url(), 'static', dirname, filename])
610        logging.info('dev_server URL: %s', ds_src)
611        # Calls into DUT to pull uri from dev_server.
612        utils.run(
613            'wget',
614            args=('--report-speed=bits', '-O', output, ds_src),
615            verbose=True)
616        return output
617
618    def _instance_copyfile(self, cache_path):
619        """Makes a copy of a file from the (shared) cache to a wholy owned
620        local instance. Also copies one level of cache directoy (MD5 named).
621        """
622        filename = os.path.basename(cache_path)
623        dirname = os.path.basename(os.path.dirname(cache_path))
624        instance_dir = os.path.join(self._tradefed_install, dirname)
625        # Make sure destination directory is named the same.
626        self._safe_makedirs(instance_dir)
627        instance_path = os.path.join(instance_dir, filename)
628        shutil.copyfile(cache_path, instance_path)
629        return instance_path
630
631    def _instance_copytree(self, cache_path):
632        """Makes a copy of a directory from the (shared and writable) cache to
633        a wholy owned local instance.
634
635        TODO(ihf): Consider using cp -al to only copy links. Not sure if this
636        is really a benefit across the container boundary, but it is risky due
637        to the possibility of corrupting the original files by an lxc instance.
638        """
639        # We keep the top 2 names from the cache_path = .../dir1/dir2.
640        dir2 = os.path.basename(cache_path)
641        dir1 = os.path.basename(os.path.dirname(cache_path))
642        instance_path = os.path.join(self._tradefed_install, dir1, dir2)
643        logging.info('Copying %s to instance %s', cache_path, instance_path)
644        shutil.copytree(cache_path, instance_path)
645        return instance_path
646
647    def _install_bundle(self, gs_uri):
648        """Downloads a zip file, installs it and returns the local path.
649
650        @param gs_uri: GS bucket that contains the necessary files.
651        """
652        if not gs_uri.endswith('.zip'):
653            raise error.TestFail('Error: Not a .zip file %s.', gs_uri)
654        # Atomic write through of file.
655        with tradefed_utils.lock(self._tradefed_cache_lock):
656            # Atomic operations.
657            self._invalidate_download_cache()
658            # Download is lazy (cache_path may not actually exist if
659            # cache_unzipped does).
660            cache_path = self._download_to_cache(gs_uri)
661            # Unzip is lazy as well (but cache_unzipped guaranteed to
662            # exist).
663            cache_unzipped = self._unzip(cache_path)
664            # To save space we delete the original zip file. This works as
665            # _download only checks existence of the cache directory for
666            # lazily skipping download, and unzip itself will bail if the
667            # unzipped destination exists. Hence we don't need the original
668            # anymore.
669            if os.path.exists(cache_path):
670                logging.info('Deleting original %s', cache_path)
671                os.remove(cache_path)
672            # Erase dirty marker from disk.
673            self._validate_download_cache()
674            # We always copy files to give tradefed a clean copy of the
675            # bundle.
676            unzipped_local = self._instance_copytree(cache_unzipped)
677        return unzipped_local
678
679    def _install_files(self, gs_dir, files, permission):
680        """Installs binary tools."""
681        for filename in files:
682            gs_uri = os.path.join(gs_dir, filename)
683            # Atomic write through of file.
684            with tradefed_utils.lock(self._tradefed_cache_lock):
685                # We don't want to leave a corrupt cache for other jobs.
686                self._invalidate_download_cache()
687                cache_path = self._download_to_cache(gs_uri)
688                # Mark cache as clean again.
689                self._validate_download_cache()
690                # This only affects the current job, so not part of cache
691                # validation.
692                local = self._instance_copyfile(cache_path)
693            os.chmod(local, permission)
694            # Keep track of PATH.
695            self._install_paths.append(os.path.dirname(local))
696
697    def _prepare_media(self, cts_uri, needs_push_media):
698        """Downloads and offers the cached media files to tradefed."""
699        if needs_push_media:
700            media = self._install_bundle(cts_uri['media'])
701            if os.path.islink(constants.TRADEFED_MEDIA_PATH):
702                os.unlink(constants.TRADEFED_MEDIA_PATH)
703            if os.path.isdir(constants.TRADEFED_MEDIA_PATH):
704                shutil.rmtree(constants.TRADEFED_MEDIA_PATH)
705            os.symlink(media, constants.TRADEFED_MEDIA_PATH)
706
707            logging.info('Offered %s as a media directory in %s',
708                    media, constants.TRADEFED_MEDIA_PATH)
709
710        # Records the number of existing media bundles, to check later.
711        if os.path.isdir(constants.TRADEFED_MEDIA_PATH):
712            self._num_media_bundles = len(
713                    os.listdir(constants.TRADEFED_MEDIA_PATH))
714
715    def _fail_on_unexpected_media_download(self):
716        if os.path.isdir(constants.TRADEFED_MEDIA_PATH):
717            contents = os.listdir(constants.TRADEFED_MEDIA_PATH)
718            if len(contents) > self._num_media_bundles:
719                raise error.TestFail(
720                    'Failed: Unexpected media bundle was added %s' % contents)
721
722    def _run(self, *args, **kwargs):
723        """Executes the given command line.
724
725        To support SDK tools, such as adb or aapt, this adds _install_paths
726        to the extra_paths. Before invoking this, ensure _install_files() has
727        been called.
728        """
729        kwargs['extra_paths'] = (
730            kwargs.get('extra_paths', []) + self._install_paths)
731        return utils.run(*args, **kwargs)
732
733    def _collect_tradefed_global_log(self, result, destination):
734        """Collects the tradefed global log.
735
736        @param result: The result object from utils.run.
737        @param destination: Autotest result directory (destination of logs).
738        """
739        match = re.search(r'Saved log to /tmp/(tradefed_global_log_.*\.txt)',
740                          result.stdout)
741        if not match:
742            logging.error('no tradefed_global_log file is found')
743            return
744
745        name = match.group(1)
746        dest = os.path.join(destination, 'logs', 'tmp')
747        self._safe_makedirs(dest)
748        shutil.copy(os.path.join('/tmp', name), os.path.join(dest, name))
749
750    def _parse_result(self, result, waivers=None):
751        """Check the result from the tradefed output.
752
753        This extracts the test pass/fail/executed list from the output of
754        tradefed. It is up to the caller to handle inconsistencies.
755
756        @param result: The result object from utils.run.
757        @param waivers: a set[] of tests which are permitted to fail.
758        """
759        return parse_tradefed_result(result.stdout, waivers)
760
761    def _get_expected_failures(self, directory, bundle_abi):
762        """Return a list of expected failures or no test module.
763
764        @param directory: A directory with expected no tests or failures files.
765        @param bundle_abi: 'arm' or 'x86' if the test is for the particular ABI.
766                           None otherwise (like GTS, built for multi-ABI.)
767        @return: A list of expected failures or no test modules for the current
768                 testing device.
769        """
770        # Load waivers and manual tests so TF doesn't re-run them.
771        expected_fail_files = []
772        test_board = self._get_board_name()
773        test_arch = self._get_board_arch()
774        sdk_ver = self._get_android_version()
775        expected_fail_dir = os.path.join(self.bindir, directory)
776        if os.path.exists(expected_fail_dir):
777            expected_fail_files += glob.glob(expected_fail_dir + '/*.yaml')
778
779        waivers = cts_expected_failure_parser.ParseKnownCTSFailures(
780            expected_fail_files)
781        return waivers.find_waivers(test_arch, test_board, bundle_abi, sdk_ver)
782
783    def _get_abilist(self):
784        """Return the abilist supported by calling adb command.
785
786        This method should only be called after the android environment is
787        successfully initialized."""
788        if not self._abilist:
789            self._abilist = self._run_adb_cmd(
790                self._hosts[0],
791                args=('shell', 'getprop',
792                      'ro.product.cpu.abilist')).stdout.split(',')
793        return self._abilist
794
795    def _get_release_branch_number(self):
796        """Returns the DUT branch number (z of Rxx-yyyyy.z.w) or 0 on error."""
797        if not self._release_branch_number:
798            ver = (self._hosts[0].get_release_version() or '').split('.')
799            self._release_branch_number = (int(ver[1]) if len(ver) >= 3 else 0)
800        return self._release_branch_number
801
802    def _get_board_arch(self):
803        """Return target DUT arch name."""
804        if not self._board_arch:
805            self._board_arch = ('arm' if self._hosts[0].get_cpu_arch() == 'arm'
806                else 'x86')
807        return self._board_arch
808
809    def _get_board_name(self):
810        """Return target DUT board name."""
811        if not self._board_name:
812            self._board_name = self._hosts[0].get_board().split(':')[1]
813        return self._board_name
814
815    def _get_android_version(self):
816        """Return target DUT Android SDK version"""
817        # TODO(kinaba): factor this out to server/hosts/cros_host.py
818        if not self._android_version:
819            self._android_version = self._hosts[0].run(
820                'grep ANDROID_SDK /etc/lsb-release',
821                ignore_status=True).stdout.rstrip().split('=')[1]
822        return self._android_version
823
824    def _get_max_retry(self, max_retry):
825        """Return the maximum number of retries.
826
827        @param max_retry: max_retry specified in the control file.
828        @return: number of retries for this specific host.
829        """
830        if max_retry is None:
831            max_retry = self._get_branch_retry(self._BRANCH_DEFAULT_RETRY)
832        candidate = [max_retry]
833        candidate.append(self._get_board_retry())
834        candidate.append(self._get_branch_retry(self._BRANCH_MAX_RETRY))
835        return min(x for x in candidate if x is not None)
836
837    def _get_board_retry(self):
838        """Return the maximum number of retries for DUT board name.
839
840        @return: number of max_retry or None.
841        """
842        board = self._get_board_name()
843        if board in self._BOARD_MAX_RETRY:
844            return self._BOARD_MAX_RETRY[board]
845        logging.info('No board retry specified for board: %s', board)
846        return None
847
848    def _get_branch_retry(self, table):
849        """Returns the retry count for DUT branch number defined in |table|."""
850        number = self._get_release_branch_number()
851        for lowerbound, retry in reversed(table):
852            if lowerbound <= number:
853                return retry
854        logging.warning('Could not establish channel. Using retry=0.')
855        return 0
856
857    def _run_precondition_scripts(self, commands, steps):
858        """Run precondition scripts on all the hosts."""
859        for host in self._hosts:
860            for command in commands:
861                # Replace {0} (if any) with the retry count.
862                formatted_command = command.format(steps)
863                logging.info('RUN: %s\n', formatted_command)
864                output = host.run(formatted_command, ignore_status=True)
865                logging.info('END: %s\n', output)
866
867    def _run_and_parse_tradefed(self, commands):
868        """Kick off the tradefed command.
869
870        Assumes that only last entry of |commands| actually runs tests and has
871        interesting output (results, logs) for collection. Ignores all other
872        commands for this purpose.
873
874        @param commands: List of lists of command tokens.
875        @raise TestFail: when a test failure is detected.
876        @return: tuple of (tests, pass, fail, notexecuted) counts.
877        """
878        target_argument = []
879        for host in self._hosts:
880            target_argument += ['-s', self._get_adb_target(host)]
881        shard_argument = []
882        if len(self._hosts) > 1:
883            if self._SHARD_CMD:
884                shard_argument = [self._SHARD_CMD, str(len(self._hosts))]
885            else:
886                logging.warning('cts-tradefed shard command isn\'t defined, '
887                                'falling back to use single device.')
888        commands = [command + target_argument + shard_argument
889                    for command in commands]
890
891        try:
892            output = self._run_tradefed(commands)
893        except Exception as e:
894            self._log_java_version()
895            if not isinstance(e, error.CmdTimeoutError):
896                # In case this happened due to file corruptions, try to
897                # force to recreate the cache.
898                logging.error('Failed to run tradefed! Cleaning up now.')
899                self._clean_download_cache_if_needed(force=True)
900            raise
901
902        result_destination = os.path.join(self.resultsdir,
903                                          self._get_tradefed_base_dir())
904        # Gather the global log first. Datetime parsing below can abort the test
905        # if tradefed startup had failed. Even then the global log is useful.
906        self._collect_tradefed_global_log(output, result_destination)
907        # Result parsing must come after all other essential operations as test
908        # warnings, errors and failures can be raised here.
909        return self._parse_result(output, waivers=self._waivers)
910
911    def _setup_result_directories(self):
912        """Sets up the results and logs directories for tradefed.
913
914        Tradefed saves the logs and results at:
915          self._repository/results/$datetime/
916          self._repository/results/$datetime.zip
917          self._repository/logs/$datetime/
918        Because other tools rely on the currently chosen Google storage paths
919        we need to keep destination_results in:
920          self.resultdir/android-cts/results/$datetime/
921          self.resultdir/android-cts/results/$datetime.zip
922          self.resultdir/android-cts/results/logs/$datetime/
923        To bridge between them, create symlinks from the former to the latter.
924        """
925        logging.info('Setting up tradefed results and logs directories.')
926
927        results_destination = os.path.join(self.resultsdir,
928                                           self._get_tradefed_base_dir())
929        logs_destination = os.path.join(results_destination, 'logs')
930        directory_mapping = [
931            (os.path.join(self._repository, 'results'), results_destination),
932            (os.path.join(self._repository, 'logs'), logs_destination),
933        ]
934
935        for (tradefed_path, final_path) in directory_mapping:
936            if os.path.exists(tradefed_path):
937                shutil.rmtree(tradefed_path)
938            self._safe_makedirs(final_path)
939            os.symlink(final_path, tradefed_path)
940
941    def _install_plan(self, subplan):
942        """Copy test subplan to CTS-TF.
943
944        @param subplan: CTS subplan to be copied into TF.
945        """
946        logging.info('Install subplan: %s', subplan)
947        subplans_tf_dir = os.path.join(self._repository, 'subplans')
948        if not os.path.exists(subplans_tf_dir):
949            os.makedirs(subplans_tf_dir)
950        test_subplan_file = os.path.join(self.bindir, 'subplans',
951                                         '%s.xml' % subplan)
952        try:
953            shutil.copy(test_subplan_file, subplans_tf_dir)
954        except (shutil.Error, OSError, IOError) as e:
955            raise error.TestFail(
956                'Error: failed to copy test subplan %s to CTS bundle. %s' %
957                (test_subplan_file, e))
958
959    def _should_skip_test(self, _bundle):
960        """Some tests are expected to fail and are skipped.
961
962        Subclasses should override with specific details.
963        """
964        return False
965
966    def _should_reboot(self, steps):
967        """Oracle to decide if DUT should reboot or just restart Chrome.
968
969        For now we will not reboot after the first two iterations, but on all
970        iterations afterward as before. In particular this means that most CTS
971        tests will now not get a "clean" machine, but one on which tests ran
972        before. But we will still reboot after persistent failures, hopefully
973        not causing too many flakes down the line.
974        """
975        if steps < 3:
976            return False
977        return True
978
979    def _run_tradefed_list_results(self):
980        """Run the `tradefed list results` command.
981
982        @return: tuple of the last (session_id, pass, fail, all_done?).
983        """
984        output = self._run_tradefed([['list', 'results']])
985
986        # Parses the last session from the output that looks like:
987        #
988        # Session  Pass  Fail  Modules Complete ...
989        # 0        90    10    1 of 2
990        # 1        199   1     2 of 2
991        # ...
992        lastmatch = None
993        for m in re.finditer(r'^(\d+)\s+(\d+)\s+(\d+)\s+(\d+) of (\d+)',
994                             output.stdout, re.MULTILINE):
995            session, passed, failed, done, total = map(int,
996                                                       m.group(1, 2, 3, 4, 5))
997            lastmatch = (session, passed, failed, done == total)
998        return lastmatch
999
1000    def _tradefed_retry_command(self, template, session_id):
1001        raise NotImplementedError('Subclass should override this function')
1002
1003    def _tradefed_run_command(self, template):
1004        raise NotImplementedError('Subclass should override this function')
1005
1006    def _run_tradefed_with_retries(self,
1007                                   test_name,
1008                                   run_template,
1009                                   retry_template,
1010                                   timeout,
1011                                   needs_push_media=False,
1012                                   target_module=None,
1013                                   target_plan=None,
1014                                   bundle=None,
1015                                   cts_uri=None,
1016                                   login_precondition_commands=[],
1017                                   precondition_commands=[],
1018                                   perf_description=None):
1019        """Run CTS/GTS with retry logic.
1020
1021        We first kick off the specified module. Then rerun just the failures
1022        on the next MAX_RETRY iterations.
1023        """
1024        # On dev and beta channels timeouts are sharp, lenient on stable.
1025        self._timeout = timeout
1026        if (self._get_release_branch_number() >=
1027                constants.APPROXIMATE_STABLE_BRANCH_NUMBER):
1028            self._timeout += 3600
1029
1030        if self._should_skip_test(bundle):
1031            logging.warning('Skipped test %s', ' '.join(test_name))
1032            return
1033
1034        steps = -1  # For historic reasons the first iteration is not counted.
1035        self.summary = ''
1036        accurate = []
1037        board = self._get_board_name()
1038        session_id = None
1039
1040        self._setup_result_directories()
1041        self._prepare_media(cts_uri, needs_push_media)
1042
1043        # This loop retries failures. For this reason please do not raise
1044        # TestFail in this loop if you suspect the failure might be fixed
1045        # in the next loop iteration.
1046        while steps < self._max_retry:
1047            steps += 1
1048            keep_media = needs_push_media and steps >= 1
1049            self._run_precondition_scripts(login_precondition_commands, steps)
1050            with self._login_chrome(
1051                    board=board,
1052                    reboot=self._should_reboot(steps),
1053                    # TODO(rohitbm): Evaluate if power cycle really helps with
1054                    # Bluetooth test failures, and then make the implementation
1055                    # more strict by first running complete restart and reboot
1056                    # retries and then perform power cycle.
1057                    hard_reboot_on_failure=(self._hard_reboot_on_failure
1058                                     and steps == self._max_retry),
1059                    dont_override_profile=keep_media) as current_logins:
1060                self._ready_arc()
1061                self._calculate_timeout_factor(bundle)
1062                self._run_precondition_scripts(precondition_commands, steps)
1063
1064                # Run tradefed.
1065                if session_id == None:
1066                    if target_plan is not None:
1067                        self._install_plan(target_plan)
1068
1069                    logging.info('Running %s:', test_name)
1070                    commands = [self._tradefed_run_command(run_template)]
1071                else:
1072                    logging.info('Retrying failures of %s with session_id %d:',
1073                                 test_name, session_id)
1074                    commands = [self._tradefed_retry_command(retry_template,
1075                                                             session_id)]
1076
1077                # TODO(pwang): Evaluate if it is worth it to get the number of
1078                #              not-excecuted, for instance, by collecting all
1079                #              tests on startup (very expensive, may take 30
1080                #              minutes).
1081                waived_tests, acc = self._run_and_parse_tradefed(
1082                    commands)
1083                self._fail_on_unexpected_media_download()
1084                result = self._run_tradefed_list_results()
1085                if not result:
1086                    logging.error('Did not find any test results. Retry.')
1087                    for current_login in current_logins:
1088                        current_login.need_reboot()
1089                    continue
1090
1091                waived = len(waived_tests)
1092                last_session_id, passed, failed, all_done = result
1093                # If the result is |acc|urate according to the log, or the
1094                # inaccuracy is recognized by tradefed (not all_done), then
1095                # it is fine.
1096                accurate.append(acc or not all_done)
1097                if failed < waived:
1098                    logging.error(
1099                        'Error: Internal waiver bookkeeping has become '
1100                        'inconsistent (f=%d, w=%d)', failed, waived)
1101
1102                msg = 'run' if session_id == None else ' retry'
1103                msg += '(p=%s, f=%s, w=%s)' % (passed, failed, waived)
1104                self.summary += msg
1105                logging.info('RESULT: %s %s', msg, result)
1106
1107                # Check for no-test modules. We use the "all_done" indicator
1108                # provided by list_results to decide if there are outstanding
1109                # modules to iterate over (similar to missing tests just on a
1110                # per-module basis).
1111                notest = (passed + failed == 0 and all_done)
1112                if target_module in self._notest_modules:
1113                    if notest:
1114                        logging.info('Package has no tests as expected.')
1115                        return
1116                    else:
1117                        # We expected no tests, but the new bundle drop must
1118                        # have added some for us. Alert us to the situation.
1119                        raise error.TestFail(
1120                            'Failed: Remove module %s from '
1121                            'notest_modules directory!' % target_module)
1122                elif notest:
1123                    logging.error('Did not find any tests in module. Hoping '
1124                                  'this is transient. Retry after reboot.')
1125                    for current_login in current_logins:
1126                        current_login.need_reboot()
1127                    continue
1128
1129                session_id = last_session_id
1130
1131                # Check if all the tests passed.
1132                if failed <= waived and all_done:
1133                    break
1134
1135                # TODO(b/127908450) Tradefed loses track of not-executed tests
1136                # when the commandline pattern included '*', and retry run for
1137                # them wrongly declares all tests passed. This is misleading.
1138                # Rather, we give up the retry and report the result as FAIL.
1139                if not all_done and '*' in ''.join(run_template):
1140                    break
1141
1142        # Tradefed finished normally. Record the failures to perf.
1143        if target_module:
1144            # Only record the failure by module, which exclude 'all', 'collects-tests-only', etc.
1145            self._perf_results.append(dict(
1146                description=perf_description if perf_description else target_module,
1147                value=failed,
1148                graph=bundle
1149            ))
1150
1151        if session_id == None:
1152            raise error.TestFail('Error: Could not find any tests in module.')
1153
1154        if failed <= waived and all_done:
1155            if not all(accurate):
1156                # Tests count inaccurate, remove perf to avoid false alarm.
1157                self._perf_results.pop()
1158                raise error.TestFail(
1159                    'Failed: Not all tests were executed. After %d '
1160                    'retries passing %d tests, waived=%d. %s' % (
1161                        steps, passed, waived, self.summary))
1162            # TODO(ihf): Make this error.TestPass('...') once
1163            # available.
1164            if steps > 0 and self._warn_on_test_retry:
1165                raise error.TestWarn(
1166                    'Passed: after %d retries passing %d tests, '
1167                    'waived=%d. %s' % (steps, passed, waived,
1168                                       self.summary))
1169            return
1170
1171        raise error.TestFail(
1172            'Failed: after %d retries giving up. '
1173            'passed=%d, failed=%d, waived=%d%s%s. %s' %
1174            (steps, passed, failed, waived, '' if all_done else ', notexec>=1',
1175             '' if all(accurate) else ', Tests may not be accurate.',
1176             self.summary))
1177