1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5# repohooks/pre-upload.py currently does not run pylint. But for developers who 6# want to check their code manually we disable several harmless pylint warnings 7# which just distract from more serious remaining issues. 8# 9# The instance variables _host and _install_paths are not defined in __init__(). 10# pylint: disable=attribute-defined-outside-init 11# 12# Many short variable names don't follow the naming convention. 13# pylint: disable=invalid-name 14# 15# _parse_result() and _dir_size() don't access self and could be functions. 16# pylint: disable=no-self-use 17 18import contextlib 19import errno 20import glob 21import hashlib 22import logging 23import os 24import pipes 25import re 26import shutil 27import stat 28import tempfile 29import urlparse 30 31from autotest_lib.client.bin import utils as client_utils 32from autotest_lib.client.common_lib import error 33from autotest_lib.client.common_lib.cros import dev_server 34from autotest_lib.server import test 35from autotest_lib.server import utils 36from autotest_lib.server.cros import cts_expected_failure_parser 37from autotest_lib.server.cros import tradefed_chromelogin as login 38from autotest_lib.server.cros import tradefed_constants as constants 39from autotest_lib.server.cros import tradefed_utils 40 41# For convenience, add to our scope. 42parse_tradefed_result = tradefed_utils.parse_tradefed_result 43adb_keepalive = tradefed_utils.adb_keepalive 44 45 46class TradefedTest(test.test): 47 """Base class to prepare DUT to run tests via tradefed.""" 48 version = 1 49 50 # Default and upperbounds of max_retry, based on board and revision 51 # after branching (that is, 'y' of R74-12345.y.z). 52 # 53 # By default, 0<=y<1 does 5 retries and 1<=y does 10. The |max_retry| 54 # parameter in control files can override the count, within the 55 # _BRANCH_MAX_RETRY limit below. 56 _BRANCH_DEFAULT_RETRY = [(0, 5), (1, 10)] # dev=5, beta=stable=10 57 _BRANCH_MAX_RETRY = [(0, 5), (1, 10), # dev=5, beta=10, stable=99 58 (constants.APPROXIMATE_STABLE_BRANCH_NUMBER, 99)] 59 # TODO(kinaba): betty-arcnext 60 _BOARD_MAX_RETRY = {'betty': 0} 61 62 _SHARD_CMD = None 63 _board_arch = None 64 _board_name = None 65 _release_branch_number = None # The 'y' of OS version Rxx-xxxxx.y.z 66 _android_version = None 67 _num_media_bundles = 0 68 _perf_results = [] 69 70 def _log_java_version(self): 71 """Quick sanity and spew of java version installed on the server.""" 72 utils.run( 73 'java', 74 args=('-version',), 75 ignore_status=False, 76 verbose=True, 77 stdout_tee=utils.TEE_TO_LOGS, 78 stderr_tee=utils.TEE_TO_LOGS) 79 80 def initialize(self, 81 bundle=None, 82 uri=None, 83 host=None, 84 hosts=None, 85 max_retry=None, 86 load_waivers=True, 87 retry_manual_tests=False, 88 warn_on_test_retry=True, 89 hard_reboot_on_failure=False): 90 """Sets up the tools and binary bundles for the test.""" 91 self._install_paths = [] 92 # TODO(pwang): Remove host if we enable multiple hosts everywhere. 93 self._hosts = [host] if host else hosts 94 for host in self._hosts: 95 logging.info('Hostname: %s', host.host_port) 96 self._verify_hosts() 97 98 self._max_retry = self._get_max_retry(max_retry) 99 self._warn_on_test_retry = warn_on_test_retry 100 # Tests in the lab run within individual lxc container instances. 101 if utils.is_in_container(): 102 cache_root = constants.TRADEFED_CACHE_CONTAINER 103 else: 104 cache_root = constants.TRADEFED_CACHE_LOCAL 105 106 # TODO(ihf): reevaluate this again when we run out of memory. We could 107 # for example use 32 bit java on the first run but not during retries. 108 # b/62895114. If select_32bit_java gets deleted for good also remove it 109 # from the base image. 110 # Try to save server memory (crbug.com/717413). 111 # select_32bit_java() 112 113 # The content of the cache survives across jobs. 114 self._safe_makedirs(cache_root) 115 self._tradefed_cache = os.path.join(cache_root, 'cache') 116 self._tradefed_cache_lock = os.path.join(cache_root, 'lock') 117 self._tradefed_cache_dirty = os.path.join(cache_root, 'dirty') 118 # The content of the install location does not survive across jobs and 119 # is isolated (by using a unique path)_against other autotest instances. 120 # This is not needed for the lab, but if somebody wants to run multiple 121 # TradedefTest instance. 122 self._tradefed_install = tempfile.mkdtemp( 123 prefix=constants.TRADEFED_PREFIX) 124 # Under lxc the cache is shared between multiple autotest/tradefed 125 # instances. We need to synchronize access to it. All binaries are 126 # installed through the (shared) cache into the local (unshared) 127 # lxc/autotest instance storage. 128 # If clearing the cache it must happen before all downloads. 129 self._clean_download_cache_if_needed() 130 # Set permissions (rwxr-xr-x) to the executable binaries. 131 permission = ( 132 stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH 133 | stat.S_IXOTH) 134 self._install_files(constants.ADB_DIR, constants.ADB_FILES, 135 permission) 136 self._install_files(constants.SDK_TOOLS_DIR, 137 constants.SDK_TOOLS_FILES, permission) 138 139 # Install the tradefed bundle. 140 bundle_install_path = self._install_bundle( 141 uri or self._get_default_bundle_url(bundle)) 142 self._repository = os.path.join(bundle_install_path, 143 self._get_tradefed_base_dir()) 144 145 # Load expected test failures to exclude them from re-runs. 146 self._waivers = set() 147 if load_waivers: 148 self._waivers.update( 149 self._get_expected_failures('expectations', bundle)) 150 if not retry_manual_tests: 151 self._waivers.update( 152 self._get_expected_failures('manual_tests', bundle)) 153 154 # Load modules with no tests. 155 self._notest_modules = self._get_expected_failures('notest_modules', 156 bundle) 157 self._hard_reboot_on_failure = hard_reboot_on_failure 158 159 def cleanup(self): 160 """Cleans up any dirtied state.""" 161 # Kill any lingering adb servers. 162 for host in self._hosts: 163 try: 164 self._run_adb_cmd(host, verbose=True, args=('kill-server',)) 165 except (error.CmdError, AttributeError): 166 pass 167 logging.info('Cleaning up %s.', self._tradefed_install) 168 try: 169 shutil.rmtree(self._tradefed_install) 170 except IOError: 171 pass 172 173 # Create perf data for Chromeperf. 174 for perf in self._perf_results: 175 data = dict( 176 units='count', 177 higher_is_better=False, 178 replace_existing_values=True, 179 ) 180 data.update(perf) 181 self.output_perf_value(**data) 182 183 def _verify_hosts(self): 184 """Verify all hosts' ChromeOS consistency.""" 185 # Check release builder path. E.g. cave-release/R66-10435.0.0 186 release_builder_path = set(host.get_release_builder_path() 187 for host in self._hosts) 188 if len(release_builder_path) > 1: 189 raise error.TestFail('Hosts\' CHROMEOS_RELEASE_BUILDER_PATH is ' 190 'different: %s', release_builder_path) 191 192 # Check ChromeOS ARC VERSION. E.g. 193 arc_version = set(host.get_arc_version() for host in self._hosts) 194 if len(arc_version) > 1: 195 raise error.TestFail('Hosts\' CHROMEOS_ARC_VERSION is different: ' 196 '%s', arc_version) 197 198 # Check ChromeOS model for unibuild. 199 # TODO(pwang): Adding a check if we found how to detect host's model. 200 201 def _verify_arc_hosts(self): 202 """Verify all hosts' Android configuration consistency. 203 204 This method should only be called after all hosts' Android has been 205 successfully booted up.""" 206 # Check all hosts have same Android fingerprint. 207 fingerprint = set(self._run_adb_cmd( 208 host, 209 args=('shell', 'getprop', 'ro.build.fingerprint')).stdout 210 for host in self._hosts) 211 if len(fingerprint) > 1: 212 raise error.TestFail('Hosts\' supported fingerprint is different: ' 213 '%s', fingerprint) 214 215 # Check all hosts support same abilist. 216 abilist = set(self._run_adb_cmd( 217 host, 218 args=('shell', 'getprop', 'ro.product.cpu.abilist')).stdout 219 for host in self._hosts) 220 if len(abilist) > 1: 221 raise error.TestFail('Hosts\' supported abilist is different: %s', 222 abilist) 223 self._abilist = str(list(abilist)[0]).split(',') 224 225 def _calculate_timeout_factor(self, bundle): 226 """ Calculate the multiplicative factor for timeout. 227 228 The value equals to the times each test case is run, which is determined 229 by the intersection of the supported ABIs of the CTS/GTS bundle and that 230 of the tested device.""" 231 arm_abis = set(('armeabi-v7a', 'arm64-v8a')) 232 x86_abis = set(('x86', 'x86_64')) 233 if bundle == 'arm': 234 tradefed_abis = arm_abis 235 elif bundle == 'x86': 236 tradefed_abis = x86_abis 237 else: 238 tradefed_abis = arm_abis | x86_abis 239 self._timeout_factor = len(set(self._get_abilist()) & tradefed_abis) 240 241 @contextlib.contextmanager 242 def _login_chrome(self, **cts_helper_kwargs): 243 """Returns Chrome log-in context manager. 244 245 Please see also cheets_StartAndroid for details about how this works. 246 """ 247 # TODO(pwang): Chromelogin takes 10+ seconds for it to successfully 248 # enter. Parallelize if this becomes a bottleneck. 249 instances = [] 250 for host in self._hosts: 251 instances.append(login.ChromeLogin(host, cts_helper_kwargs)) 252 253 for instance in instances: 254 instance.enter() 255 yield instances 256 for instance in instances: 257 instance.exit() 258 259 def _get_adb_targets(self): 260 """Get a list of adb targets.""" 261 return [self._get_adb_target(host) for host in self._hosts] 262 263 def _get_adb_target(self, host): 264 """Get the adb target format. 265 266 This method is slightly different from host.host_port as we need to 267 explicitly specify the port so the serial name of adb target would 268 match.""" 269 return '{}:{}'.format(host.hostname, host.port) 270 271 def _run_adb_cmd(self, host=None, **kwargs): 272 """Running adb command. 273 274 @param host: DUT that want to connect to. (None if the adb command is 275 intended to run in the server. eg. keygen) 276 """ 277 # As of N, tradefed could not specify which adb socket to use, which use 278 # tcp:localhost:5037 by default. 279 adb_global_option = ('-H', 'localhost', '-P', '5037') 280 if host: 281 host_port = self._get_adb_target(host) 282 adb_global_option = ('-s', host_port) 283 kwargs['args'] = adb_global_option + kwargs.get('args', ()) 284 result = self._run('adb', **kwargs) 285 logging.info('adb %s:\n%s', ' '.join(kwargs.get('args')), 286 result.stdout + result.stderr) 287 return result 288 289 def _try_adb_connect(self, host): 290 """Attempts to connect to adb on the DUT. 291 292 @param host: DUT that need to be connected. 293 @return boolean indicating if adb connected successfully. 294 """ 295 # This may fail return failure due to a race condition in adb connect 296 # (b/29370989). If adb is already connected, this command will 297 # immediately return success. 298 host_port = self._get_adb_target(host) 299 result = self._run_adb_cmd( 300 host, args=('connect', host_port), verbose=True, ignore_status=True) 301 if result.exit_status != 0: 302 return False 303 304 result = self._run_adb_cmd(host, args=('devices',)) 305 if not re.search(r'{}\s+(device|unauthorized)'.format( 306 re.escape(host_port)), result.stdout): 307 logging.info('No result found in with pattern: %s', 308 r'{}\s+(device|unauthorized)'.format( 309 re.escape(host_port))) 310 return False 311 312 # Actually test the connection with an adb command as there can be 313 # a race between detecting the connected device and actually being 314 # able to run a commmand with authenticated adb. 315 result = self._run_adb_cmd( 316 host, args=('shell', 'exit'), ignore_status=True) 317 return result.exit_status == 0 318 319 def _android_shell(self, host, command): 320 """Run a command remotely on the device in an android shell 321 322 This function is strictly for internal use only, as commands do not run 323 in a fully consistent Android environment. Prefer adb shell instead. 324 """ 325 host.run('android-sh -c ' + pipes.quote(command)) 326 327 def _write_android_file(self, host, filename, data): 328 """Writes a file to a location relative to the android container. 329 330 This is an internal function used to bootstrap adb. 331 Tests should use adb push to write files. 332 """ 333 android_cmd = 'echo %s > %s' % (pipes.quote(data), 334 pipes.quote(filename)) 335 self._android_shell(host, android_cmd) 336 337 def _connect_adb(self, host, pubkey_path): 338 """Sets up ADB connection to the ARC container. 339 340 @param host: DUT that should be connected to. 341 @param pubkey_path: public key that adb keygen generated. 342 """ 343 logging.info('Setting up adb connection.') 344 # Generate and push keys for adb. 345 # TODO(elijahtaylor): Extract this code to arc_common and de-duplicate 346 # code in arc.py on the client side tests. 347 with open(pubkey_path, 'r') as f: 348 self._write_android_file(host, constants.ANDROID_ADB_KEYS_PATH, 349 f.read()) 350 self._android_shell( 351 host, 'restorecon ' + pipes.quote(constants.ANDROID_ADB_KEYS_PATH)) 352 353 # This starts adbd. 354 self._android_shell(host, 'setprop sys.usb.config mtp,adb') 355 356 # Also let it be automatically started upon reboot. 357 self._android_shell(host, 'setprop persist.sys.usb.config mtp,adb') 358 359 # adbd may take some time to come up. Repeatedly try to connect to adb. 360 utils.poll_for_condition( 361 lambda: self._try_adb_connect(host), 362 exception=error.TestFail('Error: Failed to set up adb connection'), 363 timeout=constants.ADB_READY_TIMEOUT_SECONDS, 364 sleep_interval=constants.ADB_POLLING_INTERVAL_SECONDS) 365 366 logging.info('Successfully setup adb connection.') 367 368 def _wait_for_arc_boot(self, host): 369 """Wait until ARC is fully booted. 370 371 Tests for the presence of the intent helper app to determine whether ARC 372 has finished booting. 373 @param host: DUT that need to be connected to. 374 """ 375 376 def _intent_helper_running(): 377 result = self._run_adb_cmd( 378 host, 379 args=('shell', 'pgrep', '-f', 'org.chromium.arc.intent_helper'), 380 ignore_status=True) 381 return bool(result.stdout) 382 383 utils.poll_for_condition( 384 _intent_helper_running, 385 exception=error.TestFail( 386 'Error: Timed out waiting for intent helper.'), 387 timeout=constants.ARC_READY_TIMEOUT_SECONDS, 388 sleep_interval=constants.ARC_POLLING_INTERVAL_SECONDS) 389 390 def _disable_adb_install_dialog(self, host): 391 """Disables a dialog shown on adb install execution. 392 393 By default, on adb install execution, "Allow Google to regularly check 394 device activity ... " dialog is shown. It requires manual user action 395 so that tests are blocked at the point. 396 This method disables it. 397 """ 398 logging.info('Disabling the adb install dialog.') 399 result = self._run_adb_cmd( 400 host, 401 verbose=True, 402 args=('shell', 'settings', 'put', 'global', 403 'verifier_verify_adb_installs', '0')) 404 logging.info('Disable adb dialog: %s', result.stdout) 405 406 def _ready_arc(self): 407 """Ready ARC and adb in parallel for running tests via tradefed.""" 408 # Generate the adb keys on server. 409 key_path = os.path.join(self.tmpdir, 'test_key') 410 pubkey_path = key_path + '.pub' 411 self._run_adb_cmd(verbose=True, args=('keygen', pipes.quote(key_path))) 412 os.environ['ADB_VENDOR_KEYS'] = key_path 413 # Kill existing adb server to ensure that the env var is picked up. 414 self._run_adb_cmd(verbose=True, args=('kill-server',)) 415 416 # TODO(pwang): connect_adb takes 10+ seconds on a single DUT. 417 # Parallelize it if it becomes a bottleneck. 418 for host in self._hosts: 419 self._connect_adb(host, pubkey_path) 420 self._disable_adb_install_dialog(host) 421 self._wait_for_arc_boot(host) 422 self._verify_arc_hosts() 423 424 def _safe_makedirs(self, path): 425 """Creates a directory at |path| and its ancestors. 426 427 Unlike os.makedirs(), ignore errors even if directories exist. 428 """ 429 try: 430 os.makedirs(path) 431 except OSError as e: 432 if not (e.errno == errno.EEXIST and os.path.isdir(path)): 433 raise 434 435 def _unzip(self, filename): 436 """Unzip the file. 437 438 The destination directory name will be the stem of filename. 439 E.g., _unzip('foo/bar/baz.zip') will create directory at 440 'foo/bar/baz', and then will inflate zip's content under the directory. 441 If here is already a directory at the stem, that directory will be used. 442 443 @param filename: Path to the zip archive. 444 @return Path to the inflated directory. 445 """ 446 destination = os.path.splitext(filename)[0] 447 if os.path.isdir(destination): 448 logging.info('Skipping unzip %s, reusing content of %s', filename, 449 destination) 450 return destination 451 tmp = tempfile.mkdtemp(dir=os.path.dirname(filename)) 452 logging.info('Begin unzip %s', filename) 453 try: 454 utils.run('unzip', args=('-d', tmp, filename)) 455 except: 456 logging.error('Failed unzip, cleaning up.') 457 # Clean up just created files. 458 shutil.rmtree(tmp, ignore_errors=True) 459 raise 460 logging.info('End unzip %s', filename) 461 try: 462 os.renames(tmp, destination) 463 except: 464 logging.error('Failed rename, cleaning up.') 465 shutil.rmtree(destination, ignore_errors=True) 466 shutil.rmtree(tmp, ignore_errors=True) 467 raise 468 return destination 469 470 def _dir_size(self, directory): 471 """Compute recursive size in bytes of directory.""" 472 size = 0 473 for root, _, files in os.walk(directory): 474 for name in files: 475 try: 476 size += os.path.getsize(os.path.join(root, name)) 477 except OSError: 478 logging.error('Inaccessible path (crbug/793696): %s/%s', 479 root, name) 480 return size 481 482 def _invalidate_download_cache(self): 483 """Marks the download cache for deferred deletion. 484 485 Used to make cache file operations atomic across failures and reboots. 486 The caller is responsible to hold the lock to the cache. 487 """ 488 if not os.path.exists(self._tradefed_cache_dirty): 489 os.mkdir(self._tradefed_cache_dirty) 490 491 def _validate_download_cache(self): 492 """Validates and unmarks the download cache from deletion. 493 494 Used to make cache file operations atomic across failures and reboots. 495 The caller is responsible to hold the lock to the cache. 496 """ 497 shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True) 498 499 def _clean_download_cache_if_needed(self, force=False): 500 """Invalidates cache to prevent it from growing too large.""" 501 # If the cache is large enough to hold a working set, we can simply 502 # delete everything without thrashing. 503 # TODO(ihf): Investigate strategies like LRU. 504 clean = force 505 with tradefed_utils.lock(self._tradefed_cache_lock): 506 size = self._dir_size(self._tradefed_cache) 507 if size > constants.TRADEFED_CACHE_MAX_SIZE: 508 logging.info( 509 'Current cache size=%d got too large. Clearing %s.', size, 510 self._tradefed_cache) 511 clean = True 512 else: 513 logging.info('Current cache size=%d of %s.', size, 514 self._tradefed_cache) 515 if os.path.exists(self._tradefed_cache_dirty): 516 logging.info('Found dirty cache.') 517 clean = True 518 if clean: 519 logging.warning('Cleaning download cache.') 520 shutil.rmtree(self._tradefed_cache, ignore_errors=True) 521 self._safe_makedirs(self._tradefed_cache) 522 shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True) 523 524 def _download_to_cache(self, uri): 525 """Downloads the uri from the storage server. 526 527 It always checks the cache for available binaries first and skips 528 download if binaries are already in cache. 529 530 The caller of this function is responsible for holding the cache lock. 531 532 @param uri: The Google Storage or dl.google.com uri. 533 @return Path to the downloaded object, name. 534 """ 535 # We are hashing the uri instead of the binary. This is acceptable, as 536 # the uris are supposed to contain version information and an object is 537 # not supposed to be changed once created. 538 output_dir = os.path.join(self._tradefed_cache, 539 hashlib.md5(uri).hexdigest()) 540 # Check for existence of cache entry. We check for directory existence 541 # instead of file existence, so that _install_bundle can delete original 542 # zip files to save disk space. 543 if os.path.exists(output_dir): 544 # TODO(crbug.com/800657): Mitigation for the invalid state. Normally 545 # this should not happen, but when a lock is force borken due to 546 # high IO load, multiple processes may enter the critical section 547 # and leave a bad state permanently. 548 if os.listdir(output_dir): 549 logging.info('Skipping download of %s, reusing content of %s.', 550 uri, output_dir) 551 return os.path.join(output_dir, 552 os.path.basename(urlparse.urlparse(uri).path)) 553 logging.error('Empty cache entry detected %s', output_dir) 554 return self._download_to_dir(uri, output_dir) 555 556 def _download_to_dir(self, uri, output_dir): 557 """Downloads the gs|http|https uri from the storage server. 558 559 @param uri: The Google Storage or dl.google.com uri. 560 @output_dir: The directory where the downloaded file should be placed. 561 @return Path to the downloaded object, name. 562 """ 563 # Split uri into 3 pieces for use by gsutil and also by wget. 564 parsed = urlparse.urlparse(uri) 565 filename = os.path.basename(parsed.path) 566 output = os.path.join(output_dir, filename) 567 568 self._safe_makedirs(output_dir) 569 if parsed.scheme not in ['gs', 'http', 'https']: 570 raise error.TestFail( 571 'Error: Unknown download scheme %s' % parsed.scheme) 572 if parsed.scheme in ['http', 'https']: 573 logging.info('Using wget to download %s to %s.', uri, output_dir) 574 # We are downloading 1 file at a time, hence using -O over -P. 575 utils.run( 576 'wget', 577 args=('--report-speed=bits', '-O', output, uri), 578 verbose=True) 579 return output 580 581 if not client_utils.is_moblab(): 582 # If the machine can access to the storage server directly, 583 # defer to "gsutil" for downloading. 584 logging.info('Not in lab. Downloading %s directly to %s.', 585 uri, output) 586 # b/17445576: gsutil rsync of individual files is not implemented. 587 utils.run('gsutil', args=('cp', uri, output), verbose=True) 588 return output 589 590 # We are in the moblab. Because the machine cannot access the storage 591 # server directly, use dev server to proxy. 592 logging.info('In lab. Downloading %s by staging to %s.', 593 uri, output) 594 595 dirname = os.path.dirname(parsed.path) 596 archive_url = '%s://%s%s' % (parsed.scheme, parsed.netloc, dirname) 597 598 # First, request the devserver to download files into the lab network. 599 # TODO(ihf): Switch stage_artifacts to honor rsync. Then we don't have 600 # to shuffle files inside of tarballs. 601 info = self._hosts[0].host_info_store.get() 602 ds = dev_server.ImageServer.resolve(info.build) 603 ds.stage_artifacts( 604 info.build, files=[filename], archive_url=archive_url) 605 606 # Then download files from the dev server. 607 # TODO(ihf): use rsync instead of wget. Are there 3 machines involved? 608 # Itself, dev_server plus DUT? Or is there just no rsync in moblab? 609 ds_src = '/'.join([ds.url(), 'static', dirname, filename]) 610 logging.info('dev_server URL: %s', ds_src) 611 # Calls into DUT to pull uri from dev_server. 612 utils.run( 613 'wget', 614 args=('--report-speed=bits', '-O', output, ds_src), 615 verbose=True) 616 return output 617 618 def _instance_copyfile(self, cache_path): 619 """Makes a copy of a file from the (shared) cache to a wholy owned 620 local instance. Also copies one level of cache directoy (MD5 named). 621 """ 622 filename = os.path.basename(cache_path) 623 dirname = os.path.basename(os.path.dirname(cache_path)) 624 instance_dir = os.path.join(self._tradefed_install, dirname) 625 # Make sure destination directory is named the same. 626 self._safe_makedirs(instance_dir) 627 instance_path = os.path.join(instance_dir, filename) 628 shutil.copyfile(cache_path, instance_path) 629 return instance_path 630 631 def _instance_copytree(self, cache_path): 632 """Makes a copy of a directory from the (shared and writable) cache to 633 a wholy owned local instance. 634 635 TODO(ihf): Consider using cp -al to only copy links. Not sure if this 636 is really a benefit across the container boundary, but it is risky due 637 to the possibility of corrupting the original files by an lxc instance. 638 """ 639 # We keep the top 2 names from the cache_path = .../dir1/dir2. 640 dir2 = os.path.basename(cache_path) 641 dir1 = os.path.basename(os.path.dirname(cache_path)) 642 instance_path = os.path.join(self._tradefed_install, dir1, dir2) 643 logging.info('Copying %s to instance %s', cache_path, instance_path) 644 shutil.copytree(cache_path, instance_path) 645 return instance_path 646 647 def _install_bundle(self, gs_uri): 648 """Downloads a zip file, installs it and returns the local path. 649 650 @param gs_uri: GS bucket that contains the necessary files. 651 """ 652 if not gs_uri.endswith('.zip'): 653 raise error.TestFail('Error: Not a .zip file %s.', gs_uri) 654 # Atomic write through of file. 655 with tradefed_utils.lock(self._tradefed_cache_lock): 656 # Atomic operations. 657 self._invalidate_download_cache() 658 # Download is lazy (cache_path may not actually exist if 659 # cache_unzipped does). 660 cache_path = self._download_to_cache(gs_uri) 661 # Unzip is lazy as well (but cache_unzipped guaranteed to 662 # exist). 663 cache_unzipped = self._unzip(cache_path) 664 # To save space we delete the original zip file. This works as 665 # _download only checks existence of the cache directory for 666 # lazily skipping download, and unzip itself will bail if the 667 # unzipped destination exists. Hence we don't need the original 668 # anymore. 669 if os.path.exists(cache_path): 670 logging.info('Deleting original %s', cache_path) 671 os.remove(cache_path) 672 # Erase dirty marker from disk. 673 self._validate_download_cache() 674 # We always copy files to give tradefed a clean copy of the 675 # bundle. 676 unzipped_local = self._instance_copytree(cache_unzipped) 677 return unzipped_local 678 679 def _install_files(self, gs_dir, files, permission): 680 """Installs binary tools.""" 681 for filename in files: 682 gs_uri = os.path.join(gs_dir, filename) 683 # Atomic write through of file. 684 with tradefed_utils.lock(self._tradefed_cache_lock): 685 # We don't want to leave a corrupt cache for other jobs. 686 self._invalidate_download_cache() 687 cache_path = self._download_to_cache(gs_uri) 688 # Mark cache as clean again. 689 self._validate_download_cache() 690 # This only affects the current job, so not part of cache 691 # validation. 692 local = self._instance_copyfile(cache_path) 693 os.chmod(local, permission) 694 # Keep track of PATH. 695 self._install_paths.append(os.path.dirname(local)) 696 697 def _prepare_media(self, cts_uri, needs_push_media): 698 """Downloads and offers the cached media files to tradefed.""" 699 if needs_push_media: 700 media = self._install_bundle(cts_uri['media']) 701 if os.path.islink(constants.TRADEFED_MEDIA_PATH): 702 os.unlink(constants.TRADEFED_MEDIA_PATH) 703 if os.path.isdir(constants.TRADEFED_MEDIA_PATH): 704 shutil.rmtree(constants.TRADEFED_MEDIA_PATH) 705 os.symlink(media, constants.TRADEFED_MEDIA_PATH) 706 707 logging.info('Offered %s as a media directory in %s', 708 media, constants.TRADEFED_MEDIA_PATH) 709 710 # Records the number of existing media bundles, to check later. 711 if os.path.isdir(constants.TRADEFED_MEDIA_PATH): 712 self._num_media_bundles = len( 713 os.listdir(constants.TRADEFED_MEDIA_PATH)) 714 715 def _fail_on_unexpected_media_download(self): 716 if os.path.isdir(constants.TRADEFED_MEDIA_PATH): 717 contents = os.listdir(constants.TRADEFED_MEDIA_PATH) 718 if len(contents) > self._num_media_bundles: 719 raise error.TestFail( 720 'Failed: Unexpected media bundle was added %s' % contents) 721 722 def _run(self, *args, **kwargs): 723 """Executes the given command line. 724 725 To support SDK tools, such as adb or aapt, this adds _install_paths 726 to the extra_paths. Before invoking this, ensure _install_files() has 727 been called. 728 """ 729 kwargs['extra_paths'] = ( 730 kwargs.get('extra_paths', []) + self._install_paths) 731 return utils.run(*args, **kwargs) 732 733 def _collect_tradefed_global_log(self, result, destination): 734 """Collects the tradefed global log. 735 736 @param result: The result object from utils.run. 737 @param destination: Autotest result directory (destination of logs). 738 """ 739 match = re.search(r'Saved log to /tmp/(tradefed_global_log_.*\.txt)', 740 result.stdout) 741 if not match: 742 logging.error('no tradefed_global_log file is found') 743 return 744 745 name = match.group(1) 746 dest = os.path.join(destination, 'logs', 'tmp') 747 self._safe_makedirs(dest) 748 shutil.copy(os.path.join('/tmp', name), os.path.join(dest, name)) 749 750 def _parse_result(self, result, waivers=None): 751 """Check the result from the tradefed output. 752 753 This extracts the test pass/fail/executed list from the output of 754 tradefed. It is up to the caller to handle inconsistencies. 755 756 @param result: The result object from utils.run. 757 @param waivers: a set[] of tests which are permitted to fail. 758 """ 759 return parse_tradefed_result(result.stdout, waivers) 760 761 def _get_expected_failures(self, directory, bundle_abi): 762 """Return a list of expected failures or no test module. 763 764 @param directory: A directory with expected no tests or failures files. 765 @param bundle_abi: 'arm' or 'x86' if the test is for the particular ABI. 766 None otherwise (like GTS, built for multi-ABI.) 767 @return: A list of expected failures or no test modules for the current 768 testing device. 769 """ 770 # Load waivers and manual tests so TF doesn't re-run them. 771 expected_fail_files = [] 772 test_board = self._get_board_name() 773 test_arch = self._get_board_arch() 774 sdk_ver = self._get_android_version() 775 expected_fail_dir = os.path.join(self.bindir, directory) 776 if os.path.exists(expected_fail_dir): 777 expected_fail_files += glob.glob(expected_fail_dir + '/*.yaml') 778 779 waivers = cts_expected_failure_parser.ParseKnownCTSFailures( 780 expected_fail_files) 781 return waivers.find_waivers(test_arch, test_board, bundle_abi, sdk_ver) 782 783 def _get_abilist(self): 784 """Return the abilist supported by calling adb command. 785 786 This method should only be called after the android environment is 787 successfully initialized.""" 788 if not self._abilist: 789 self._abilist = self._run_adb_cmd( 790 self._hosts[0], 791 args=('shell', 'getprop', 792 'ro.product.cpu.abilist')).stdout.split(',') 793 return self._abilist 794 795 def _get_release_branch_number(self): 796 """Returns the DUT branch number (z of Rxx-yyyyy.z.w) or 0 on error.""" 797 if not self._release_branch_number: 798 ver = (self._hosts[0].get_release_version() or '').split('.') 799 self._release_branch_number = (int(ver[1]) if len(ver) >= 3 else 0) 800 return self._release_branch_number 801 802 def _get_board_arch(self): 803 """Return target DUT arch name.""" 804 if not self._board_arch: 805 self._board_arch = ('arm' if self._hosts[0].get_cpu_arch() == 'arm' 806 else 'x86') 807 return self._board_arch 808 809 def _get_board_name(self): 810 """Return target DUT board name.""" 811 if not self._board_name: 812 self._board_name = self._hosts[0].get_board().split(':')[1] 813 return self._board_name 814 815 def _get_android_version(self): 816 """Return target DUT Android SDK version""" 817 # TODO(kinaba): factor this out to server/hosts/cros_host.py 818 if not self._android_version: 819 self._android_version = self._hosts[0].run( 820 'grep ANDROID_SDK /etc/lsb-release', 821 ignore_status=True).stdout.rstrip().split('=')[1] 822 return self._android_version 823 824 def _get_max_retry(self, max_retry): 825 """Return the maximum number of retries. 826 827 @param max_retry: max_retry specified in the control file. 828 @return: number of retries for this specific host. 829 """ 830 if max_retry is None: 831 max_retry = self._get_branch_retry(self._BRANCH_DEFAULT_RETRY) 832 candidate = [max_retry] 833 candidate.append(self._get_board_retry()) 834 candidate.append(self._get_branch_retry(self._BRANCH_MAX_RETRY)) 835 return min(x for x in candidate if x is not None) 836 837 def _get_board_retry(self): 838 """Return the maximum number of retries for DUT board name. 839 840 @return: number of max_retry or None. 841 """ 842 board = self._get_board_name() 843 if board in self._BOARD_MAX_RETRY: 844 return self._BOARD_MAX_RETRY[board] 845 logging.info('No board retry specified for board: %s', board) 846 return None 847 848 def _get_branch_retry(self, table): 849 """Returns the retry count for DUT branch number defined in |table|.""" 850 number = self._get_release_branch_number() 851 for lowerbound, retry in reversed(table): 852 if lowerbound <= number: 853 return retry 854 logging.warning('Could not establish channel. Using retry=0.') 855 return 0 856 857 def _run_precondition_scripts(self, commands, steps): 858 """Run precondition scripts on all the hosts.""" 859 for host in self._hosts: 860 for command in commands: 861 # Replace {0} (if any) with the retry count. 862 formatted_command = command.format(steps) 863 logging.info('RUN: %s\n', formatted_command) 864 output = host.run(formatted_command, ignore_status=True) 865 logging.info('END: %s\n', output) 866 867 def _run_and_parse_tradefed(self, commands): 868 """Kick off the tradefed command. 869 870 Assumes that only last entry of |commands| actually runs tests and has 871 interesting output (results, logs) for collection. Ignores all other 872 commands for this purpose. 873 874 @param commands: List of lists of command tokens. 875 @raise TestFail: when a test failure is detected. 876 @return: tuple of (tests, pass, fail, notexecuted) counts. 877 """ 878 target_argument = [] 879 for host in self._hosts: 880 target_argument += ['-s', self._get_adb_target(host)] 881 shard_argument = [] 882 if len(self._hosts) > 1: 883 if self._SHARD_CMD: 884 shard_argument = [self._SHARD_CMD, str(len(self._hosts))] 885 else: 886 logging.warning('cts-tradefed shard command isn\'t defined, ' 887 'falling back to use single device.') 888 commands = [command + target_argument + shard_argument 889 for command in commands] 890 891 try: 892 output = self._run_tradefed(commands) 893 except Exception as e: 894 self._log_java_version() 895 if not isinstance(e, error.CmdTimeoutError): 896 # In case this happened due to file corruptions, try to 897 # force to recreate the cache. 898 logging.error('Failed to run tradefed! Cleaning up now.') 899 self._clean_download_cache_if_needed(force=True) 900 raise 901 902 result_destination = os.path.join(self.resultsdir, 903 self._get_tradefed_base_dir()) 904 # Gather the global log first. Datetime parsing below can abort the test 905 # if tradefed startup had failed. Even then the global log is useful. 906 self._collect_tradefed_global_log(output, result_destination) 907 # Result parsing must come after all other essential operations as test 908 # warnings, errors and failures can be raised here. 909 return self._parse_result(output, waivers=self._waivers) 910 911 def _setup_result_directories(self): 912 """Sets up the results and logs directories for tradefed. 913 914 Tradefed saves the logs and results at: 915 self._repository/results/$datetime/ 916 self._repository/results/$datetime.zip 917 self._repository/logs/$datetime/ 918 Because other tools rely on the currently chosen Google storage paths 919 we need to keep destination_results in: 920 self.resultdir/android-cts/results/$datetime/ 921 self.resultdir/android-cts/results/$datetime.zip 922 self.resultdir/android-cts/results/logs/$datetime/ 923 To bridge between them, create symlinks from the former to the latter. 924 """ 925 logging.info('Setting up tradefed results and logs directories.') 926 927 results_destination = os.path.join(self.resultsdir, 928 self._get_tradefed_base_dir()) 929 logs_destination = os.path.join(results_destination, 'logs') 930 directory_mapping = [ 931 (os.path.join(self._repository, 'results'), results_destination), 932 (os.path.join(self._repository, 'logs'), logs_destination), 933 ] 934 935 for (tradefed_path, final_path) in directory_mapping: 936 if os.path.exists(tradefed_path): 937 shutil.rmtree(tradefed_path) 938 self._safe_makedirs(final_path) 939 os.symlink(final_path, tradefed_path) 940 941 def _install_plan(self, subplan): 942 """Copy test subplan to CTS-TF. 943 944 @param subplan: CTS subplan to be copied into TF. 945 """ 946 logging.info('Install subplan: %s', subplan) 947 subplans_tf_dir = os.path.join(self._repository, 'subplans') 948 if not os.path.exists(subplans_tf_dir): 949 os.makedirs(subplans_tf_dir) 950 test_subplan_file = os.path.join(self.bindir, 'subplans', 951 '%s.xml' % subplan) 952 try: 953 shutil.copy(test_subplan_file, subplans_tf_dir) 954 except (shutil.Error, OSError, IOError) as e: 955 raise error.TestFail( 956 'Error: failed to copy test subplan %s to CTS bundle. %s' % 957 (test_subplan_file, e)) 958 959 def _should_skip_test(self, _bundle): 960 """Some tests are expected to fail and are skipped. 961 962 Subclasses should override with specific details. 963 """ 964 return False 965 966 def _should_reboot(self, steps): 967 """Oracle to decide if DUT should reboot or just restart Chrome. 968 969 For now we will not reboot after the first two iterations, but on all 970 iterations afterward as before. In particular this means that most CTS 971 tests will now not get a "clean" machine, but one on which tests ran 972 before. But we will still reboot after persistent failures, hopefully 973 not causing too many flakes down the line. 974 """ 975 if steps < 3: 976 return False 977 return True 978 979 def _run_tradefed_list_results(self): 980 """Run the `tradefed list results` command. 981 982 @return: tuple of the last (session_id, pass, fail, all_done?). 983 """ 984 output = self._run_tradefed([['list', 'results']]) 985 986 # Parses the last session from the output that looks like: 987 # 988 # Session Pass Fail Modules Complete ... 989 # 0 90 10 1 of 2 990 # 1 199 1 2 of 2 991 # ... 992 lastmatch = None 993 for m in re.finditer(r'^(\d+)\s+(\d+)\s+(\d+)\s+(\d+) of (\d+)', 994 output.stdout, re.MULTILINE): 995 session, passed, failed, done, total = map(int, 996 m.group(1, 2, 3, 4, 5)) 997 lastmatch = (session, passed, failed, done == total) 998 return lastmatch 999 1000 def _tradefed_retry_command(self, template, session_id): 1001 raise NotImplementedError('Subclass should override this function') 1002 1003 def _tradefed_run_command(self, template): 1004 raise NotImplementedError('Subclass should override this function') 1005 1006 def _run_tradefed_with_retries(self, 1007 test_name, 1008 run_template, 1009 retry_template, 1010 timeout, 1011 needs_push_media=False, 1012 target_module=None, 1013 target_plan=None, 1014 bundle=None, 1015 cts_uri=None, 1016 login_precondition_commands=[], 1017 precondition_commands=[], 1018 perf_description=None): 1019 """Run CTS/GTS with retry logic. 1020 1021 We first kick off the specified module. Then rerun just the failures 1022 on the next MAX_RETRY iterations. 1023 """ 1024 # On dev and beta channels timeouts are sharp, lenient on stable. 1025 self._timeout = timeout 1026 if (self._get_release_branch_number() >= 1027 constants.APPROXIMATE_STABLE_BRANCH_NUMBER): 1028 self._timeout += 3600 1029 1030 if self._should_skip_test(bundle): 1031 logging.warning('Skipped test %s', ' '.join(test_name)) 1032 return 1033 1034 steps = -1 # For historic reasons the first iteration is not counted. 1035 self.summary = '' 1036 accurate = [] 1037 board = self._get_board_name() 1038 session_id = None 1039 1040 self._setup_result_directories() 1041 self._prepare_media(cts_uri, needs_push_media) 1042 1043 # This loop retries failures. For this reason please do not raise 1044 # TestFail in this loop if you suspect the failure might be fixed 1045 # in the next loop iteration. 1046 while steps < self._max_retry: 1047 steps += 1 1048 keep_media = needs_push_media and steps >= 1 1049 self._run_precondition_scripts(login_precondition_commands, steps) 1050 with self._login_chrome( 1051 board=board, 1052 reboot=self._should_reboot(steps), 1053 # TODO(rohitbm): Evaluate if power cycle really helps with 1054 # Bluetooth test failures, and then make the implementation 1055 # more strict by first running complete restart and reboot 1056 # retries and then perform power cycle. 1057 hard_reboot_on_failure=(self._hard_reboot_on_failure 1058 and steps == self._max_retry), 1059 dont_override_profile=keep_media) as current_logins: 1060 self._ready_arc() 1061 self._calculate_timeout_factor(bundle) 1062 self._run_precondition_scripts(precondition_commands, steps) 1063 1064 # Run tradefed. 1065 if session_id == None: 1066 if target_plan is not None: 1067 self._install_plan(target_plan) 1068 1069 logging.info('Running %s:', test_name) 1070 commands = [self._tradefed_run_command(run_template)] 1071 else: 1072 logging.info('Retrying failures of %s with session_id %d:', 1073 test_name, session_id) 1074 commands = [self._tradefed_retry_command(retry_template, 1075 session_id)] 1076 1077 # TODO(pwang): Evaluate if it is worth it to get the number of 1078 # not-excecuted, for instance, by collecting all 1079 # tests on startup (very expensive, may take 30 1080 # minutes). 1081 waived_tests, acc = self._run_and_parse_tradefed( 1082 commands) 1083 self._fail_on_unexpected_media_download() 1084 result = self._run_tradefed_list_results() 1085 if not result: 1086 logging.error('Did not find any test results. Retry.') 1087 for current_login in current_logins: 1088 current_login.need_reboot() 1089 continue 1090 1091 waived = len(waived_tests) 1092 last_session_id, passed, failed, all_done = result 1093 # If the result is |acc|urate according to the log, or the 1094 # inaccuracy is recognized by tradefed (not all_done), then 1095 # it is fine. 1096 accurate.append(acc or not all_done) 1097 if failed < waived: 1098 logging.error( 1099 'Error: Internal waiver bookkeeping has become ' 1100 'inconsistent (f=%d, w=%d)', failed, waived) 1101 1102 msg = 'run' if session_id == None else ' retry' 1103 msg += '(p=%s, f=%s, w=%s)' % (passed, failed, waived) 1104 self.summary += msg 1105 logging.info('RESULT: %s %s', msg, result) 1106 1107 # Check for no-test modules. We use the "all_done" indicator 1108 # provided by list_results to decide if there are outstanding 1109 # modules to iterate over (similar to missing tests just on a 1110 # per-module basis). 1111 notest = (passed + failed == 0 and all_done) 1112 if target_module in self._notest_modules: 1113 if notest: 1114 logging.info('Package has no tests as expected.') 1115 return 1116 else: 1117 # We expected no tests, but the new bundle drop must 1118 # have added some for us. Alert us to the situation. 1119 raise error.TestFail( 1120 'Failed: Remove module %s from ' 1121 'notest_modules directory!' % target_module) 1122 elif notest: 1123 logging.error('Did not find any tests in module. Hoping ' 1124 'this is transient. Retry after reboot.') 1125 for current_login in current_logins: 1126 current_login.need_reboot() 1127 continue 1128 1129 session_id = last_session_id 1130 1131 # Check if all the tests passed. 1132 if failed <= waived and all_done: 1133 break 1134 1135 # TODO(b/127908450) Tradefed loses track of not-executed tests 1136 # when the commandline pattern included '*', and retry run for 1137 # them wrongly declares all tests passed. This is misleading. 1138 # Rather, we give up the retry and report the result as FAIL. 1139 if not all_done and '*' in ''.join(run_template): 1140 break 1141 1142 # Tradefed finished normally. Record the failures to perf. 1143 if target_module: 1144 # Only record the failure by module, which exclude 'all', 'collects-tests-only', etc. 1145 self._perf_results.append(dict( 1146 description=perf_description if perf_description else target_module, 1147 value=failed, 1148 graph=bundle 1149 )) 1150 1151 if session_id == None: 1152 raise error.TestFail('Error: Could not find any tests in module.') 1153 1154 if failed <= waived and all_done: 1155 if not all(accurate): 1156 # Tests count inaccurate, remove perf to avoid false alarm. 1157 self._perf_results.pop() 1158 raise error.TestFail( 1159 'Failed: Not all tests were executed. After %d ' 1160 'retries passing %d tests, waived=%d. %s' % ( 1161 steps, passed, waived, self.summary)) 1162 # TODO(ihf): Make this error.TestPass('...') once 1163 # available. 1164 if steps > 0 and self._warn_on_test_retry: 1165 raise error.TestWarn( 1166 'Passed: after %d retries passing %d tests, ' 1167 'waived=%d. %s' % (steps, passed, waived, 1168 self.summary)) 1169 return 1170 1171 raise error.TestFail( 1172 'Failed: after %d retries giving up. ' 1173 'passed=%d, failed=%d, waived=%d%s%s. %s' % 1174 (steps, passed, failed, waived, '' if all_done else ', notexec>=1', 1175 '' if all(accurate) else ', Tests may not be accurate.', 1176 self.summary)) 1177