1# Copyright 2016 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6from recipe_engine import recipe_api 7from recipe_engine import recipe_test_api 8 9from . import default 10import subprocess # TODO(borenet): No! Remove this. 11 12 13"""Android flavor, used for running code on Android.""" 14 15 16class AndroidFlavor(default.DefaultFlavor): 17 def __init__(self, m, app_name): 18 super(AndroidFlavor, self).__init__(m, app_name) 19 self._ever_ran_adb = False 20 self.ADB_BINARY = '/usr/bin/adb.1.0.35' 21 self.ADB_PUB_KEY = '/home/chrome-bot/.android/adbkey' 22 if 'skia' not in self.m.vars.swarming_bot_id: 23 self.ADB_BINARY = '/opt/infra-android/tools/adb' 24 self.ADB_PUB_KEY = ('/home/chrome-bot/.android/' 25 'chrome_infrastructure_adbkey') 26 27 # Data should go in android_data_dir, which may be preserved across runs. 28 android_data_dir = '/sdcard/revenge_of_the_skiabot/' 29 self.device_dirs = default.DeviceDirs( 30 bin_dir = '/data/local/tmp/', 31 dm_dir = android_data_dir + 'dm_out', 32 perf_data_dir = android_data_dir + 'perf', 33 resource_dir = android_data_dir + 'resources', 34 images_dir = android_data_dir + 'images', 35 lotties_dir = android_data_dir + 'lotties', 36 skp_dir = android_data_dir + 'skps', 37 svg_dir = android_data_dir + 'svgs', 38 mskp_dir = android_data_dir + 'mskp', 39 tmp_dir = android_data_dir, 40 texttraces_dir = android_data_dir + 'text_blob_traces') 41 42 # A list of devices we can't root. If rooting fails and a device is not 43 # on the list, we fail the task to avoid perf inconsistencies. 44 self.cant_root = ['GalaxyS7_G930FD', 'GalaxyS9', 45 'GalaxyS20', 'MotoG4', 'NVIDIA_Shield', 46 'P30', 'Pixel4','Pixel4XL', 'Pixel5', 'TecnoSpark3Pro'] 47 48 # Maps device type -> CPU ids that should be scaled for nanobench. 49 # Many devices have two (or more) different CPUs (e.g. big.LITTLE 50 # on Nexus5x). The CPUs listed are the biggest cpus on the device. 51 # The CPUs are grouped together, so we only need to scale one of them 52 # (the one listed) in order to scale them all. 53 # E.g. Nexus5x has cpu0-3 as one chip and cpu4-5 as the other. Thus, 54 # if one wants to run a single-threaded application (e.g. nanobench), one 55 # can disable cpu0-3 and scale cpu 4 to have only cpu4 and 5 at the same 56 # frequency. See also disable_for_nanobench. 57 self.cpus_to_scale = { 58 'Nexus5x': [4], 59 'Pixel': [2], 60 'Pixel2XL': [4] 61 } 62 63 # Maps device type -> CPU ids that should be turned off when running 64 # single-threaded applications like nanobench. The devices listed have 65 # multiple, differnt CPUs. We notice a lot of noise that seems to be 66 # caused by nanobench running on the slow CPU, then the big CPU. By 67 # disabling this, we see less of that noise by forcing the same CPU 68 # to be used for the performance testing every time. 69 self.disable_for_nanobench = { 70 'Nexus5x': range(0, 4), 71 'Pixel': range(0, 2), 72 'Pixel2XL': range(0, 4) 73 } 74 75 self.gpu_scaling = { 76 "Nexus5": 450000000, 77 "Nexus5x": 600000000, 78 } 79 80 def _adb(self, title, *cmd, **kwargs): 81 # The only non-infra adb steps (dm / nanobench) happen to not use _adb(). 82 if 'infra_step' not in kwargs: 83 kwargs['infra_step'] = True 84 85 self._ever_ran_adb = True 86 # ADB seems to be occasionally flaky on every device, so always retry. 87 attempts = kwargs.pop('attempts', 3) 88 89 def wait_for_device(attempt): 90 self.m.run(self.m.step, 91 'kill adb server after failure of \'%s\' (attempt %d)' % ( 92 title, attempt), 93 cmd=[self.ADB_BINARY, 'kill-server'], 94 infra_step=True, timeout=30, abort_on_failure=False, 95 fail_build_on_failure=False) 96 self.m.run(self.m.step, 97 'wait for device after failure of \'%s\' (attempt %d)' % ( 98 title, attempt), 99 cmd=[self.ADB_BINARY, 'wait-for-device'], infra_step=True, 100 timeout=180, abort_on_failure=False, 101 fail_build_on_failure=False) 102 103 with self.m.context(cwd=self.m.path['start_dir'].join('skia')): 104 with self.m.env({'ADB_VENDOR_KEYS': self.ADB_PUB_KEY}): 105 return self.m.run.with_retry(self.m.step, title, attempts, 106 cmd=[self.ADB_BINARY]+list(cmd), 107 between_attempts_fn=wait_for_device, 108 **kwargs) 109 110 def _scale_for_dm(self): 111 device = self.m.vars.builder_cfg.get('model') 112 if (device in self.cant_root or 113 self.m.vars.internal_hardware_label): 114 return 115 116 # This is paranoia... any CPUs we disabled while running nanobench 117 # ought to be back online now that we've restarted the device. 118 for i in self.disable_for_nanobench.get(device, []): 119 self._set_cpu_online(i, 1) # enable 120 121 scale_up = self.cpus_to_scale.get(device, [0]) 122 # For big.LITTLE devices, make sure we scale the LITTLE cores up; 123 # there is a chance they are still in powersave mode from when 124 # swarming slows things down for cooling down and charging. 125 if 0 not in scale_up: 126 scale_up.append(0) 127 for i in scale_up: 128 # AndroidOne doesn't support ondemand governor. hotplug is similar. 129 if device == 'AndroidOne': 130 self._set_governor(i, 'hotplug') 131 elif device in ['Pixel3a', 'Pixel4', 'Pixel4a', 'Wembley']: 132 # Pixel3a/4/4a have userspace powersave performance schedutil. 133 # performance seems like a reasonable choice. 134 self._set_governor(i, 'performance') 135 else: 136 self._set_governor(i, 'ondemand') 137 138 def _scale_for_nanobench(self): 139 device = self.m.vars.builder_cfg.get('model') 140 if (device in self.cant_root or 141 self.m.vars.internal_hardware_label): 142 return 143 144 for i in self.cpus_to_scale.get(device, [0]): 145 self._set_governor(i, 'userspace') 146 self._scale_cpu(i, 0.6) 147 148 for i in self.disable_for_nanobench.get(device, []): 149 self._set_cpu_online(i, 0) # disable 150 151 if device in self.gpu_scaling: 152 #https://developer.qualcomm.com/qfile/28823/lm80-p0436-11_adb_commands.pdf 153 # Section 3.2.1 Commands to put the GPU in performance mode 154 # Nexus 5 is 320000000 by default 155 # Nexus 5x is 180000000 by default 156 gpu_freq = self.gpu_scaling[device] 157 self.m.run.with_retry(self.m.python.inline, 158 "Lock GPU to %d (and other perf tweaks)" % gpu_freq, 159 3, # attempts 160 program=""" 161import os 162import subprocess 163import sys 164import time 165ADB = sys.argv[1] 166freq = sys.argv[2] 167idle_timer = "10000" 168 169log = subprocess.check_output([ADB, 'root']) 170# check for message like 'adbd cannot run as root in production builds' 171print(log) 172if 'cannot' in log: 173 raise Exception('adb root failed') 174 175subprocess.check_output([ADB, 'shell', 'stop', 'thermald']) 176 177subprocess.check_output([ADB, 'shell', 'echo "%s" > ' 178 '/sys/class/kgsl/kgsl-3d0/gpuclk' % freq]) 179 180actual_freq = subprocess.check_output([ADB, 'shell', 'cat ' 181 '/sys/class/kgsl/kgsl-3d0/gpuclk']).strip() 182if actual_freq != freq: 183 raise Exception('Frequency (actual, expected) (%s, %s)' 184 % (actual_freq, freq)) 185 186subprocess.check_output([ADB, 'shell', 'echo "%s" > ' 187 '/sys/class/kgsl/kgsl-3d0/idle_timer' % idle_timer]) 188 189actual_timer = subprocess.check_output([ADB, 'shell', 'cat ' 190 '/sys/class/kgsl/kgsl-3d0/idle_timer']).strip() 191if actual_timer != idle_timer: 192 raise Exception('idle_timer (actual, expected) (%s, %s)' 193 % (actual_timer, idle_timer)) 194 195for s in ['force_bus_on', 'force_rail_on', 'force_clk_on']: 196 subprocess.check_output([ADB, 'shell', 'echo "1" > ' 197 '/sys/class/kgsl/kgsl-3d0/%s' % s]) 198 actual_set = subprocess.check_output([ADB, 'shell', 'cat ' 199 '/sys/class/kgsl/kgsl-3d0/%s' % s]).strip() 200 if actual_set != "1": 201 raise Exception('%s (actual, expected) (%s, 1)' 202 % (s, actual_set)) 203""", 204 args = [self.ADB_BINARY, gpu_freq], 205 infra_step=True, 206 timeout=30) 207 208 def _set_governor(self, cpu, gov): 209 self._ever_ran_adb = True 210 self.m.run.with_retry(self.m.python.inline, 211 "Set CPU %d's governor to %s" % (cpu, gov), 212 3, # attempts 213 program=""" 214import os 215import subprocess 216import sys 217import time 218ADB = sys.argv[1] 219cpu = int(sys.argv[2]) 220gov = sys.argv[3] 221 222log = subprocess.check_output([ADB, 'root']) 223# check for message like 'adbd cannot run as root in production builds' 224print(log) 225if 'cannot' in log: 226 raise Exception('adb root failed') 227 228subprocess.check_output([ADB, 'shell', 'echo "%s" > ' 229 '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor' % (gov, cpu)]) 230actual_gov = subprocess.check_output([ADB, 'shell', 'cat ' 231 '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor' % cpu]).strip() 232if actual_gov != gov: 233 raise Exception('(actual, expected) (%s, %s)' 234 % (actual_gov, gov)) 235""", 236 args = [self.ADB_BINARY, cpu, gov], 237 infra_step=True, 238 timeout=30) 239 240 241 def _set_cpu_online(self, cpu, value): 242 """Set /sys/devices/system/cpu/cpu{N}/online to value (0 or 1).""" 243 self._ever_ran_adb = True 244 msg = 'Disabling' 245 if value: 246 msg = 'Enabling' 247 self.m.run.with_retry(self.m.python.inline, 248 '%s CPU %d' % (msg, cpu), 249 3, # attempts 250 program=""" 251import os 252import subprocess 253import sys 254import time 255ADB = sys.argv[1] 256cpu = int(sys.argv[2]) 257value = int(sys.argv[3]) 258 259log = subprocess.check_output([ADB, 'root']) 260# check for message like 'adbd cannot run as root in production builds' 261print(log) 262if 'cannot' in log: 263 raise Exception('adb root failed') 264 265# If we try to echo 1 to an already online cpu, adb returns exit code 1. 266# So, check the value before trying to write it. 267prior_status = subprocess.check_output([ADB, 'shell', 'cat ' 268 '/sys/devices/system/cpu/cpu%d/online' % cpu]).strip() 269if prior_status == str(value): 270 print('CPU %d online already %d' % (cpu, value)) 271 sys.exit() 272 273subprocess.check_output([ADB, 'shell', 'echo %s > ' 274 '/sys/devices/system/cpu/cpu%d/online' % (value, cpu)]) 275actual_status = subprocess.check_output([ADB, 'shell', 'cat ' 276 '/sys/devices/system/cpu/cpu%d/online' % cpu]).strip() 277if actual_status != str(value): 278 raise Exception('(actual, expected) (%s, %d)' 279 % (actual_status, value)) 280""", 281 args = [self.ADB_BINARY, cpu, value], 282 infra_step=True, 283 timeout=30) 284 285 286 def _scale_cpu(self, cpu, target_percent): 287 self._ever_ran_adb = True 288 self.m.run.with_retry(self.m.python.inline, 289 'Scale CPU %d to %f' % (cpu, target_percent), 290 3, # attempts 291 program=""" 292import os 293import subprocess 294import sys 295import time 296ADB = sys.argv[1] 297target_percent = float(sys.argv[2]) 298cpu = int(sys.argv[3]) 299log = subprocess.check_output([ADB, 'root']) 300# check for message like 'adbd cannot run as root in production builds' 301print(log) 302if 'cannot' in log: 303 raise Exception('adb root failed') 304 305root = '/sys/devices/system/cpu/cpu%d/cpufreq' %cpu 306 307# All devices we test on give a list of their available frequencies. 308available_freqs = subprocess.check_output([ADB, 'shell', 309 'cat %s/scaling_available_frequencies' % root]) 310 311# Check for message like '/system/bin/sh: file not found' 312if available_freqs and '/system/bin/sh' not in available_freqs: 313 available_freqs = sorted( 314 int(i) for i in available_freqs.strip().split()) 315else: 316 raise Exception('Could not get list of available frequencies: %s' % 317 available_freqs) 318 319maxfreq = available_freqs[-1] 320target = int(round(maxfreq * target_percent)) 321freq = maxfreq 322for f in reversed(available_freqs): 323 if f <= target: 324 freq = f 325 break 326 327print('Setting frequency to %d' % freq) 328 329# If scaling_max_freq is lower than our attempted setting, it won't take. 330# We must set min first, because if we try to set max to be less than min 331# (which sometimes happens after certain devices reboot) it returns a 332# perplexing permissions error. 333subprocess.check_output([ADB, 'shell', 'echo 0 > ' 334 '%s/scaling_min_freq' % root]) 335subprocess.check_output([ADB, 'shell', 'echo %d > ' 336 '%s/scaling_max_freq' % (freq, root)]) 337subprocess.check_output([ADB, 'shell', 'echo %d > ' 338 '%s/scaling_setspeed' % (freq, root)]) 339time.sleep(5) 340actual_freq = subprocess.check_output([ADB, 'shell', 'cat ' 341 '%s/scaling_cur_freq' % root]).strip() 342if actual_freq != str(freq): 343 raise Exception('(actual, expected) (%s, %d)' 344 % (actual_freq, freq)) 345""", 346 args = [self.ADB_BINARY, str(target_percent), cpu], 347 infra_step=True, 348 timeout=30) 349 350 351 def _asan_setup_path(self): 352 return self.m.vars.workdir.join( 353 'android_ndk_linux', 'toolchains', 'llvm', 'prebuilt', 'linux-x86_64', 354 'lib64', 'clang', '9.0.8', 'bin', 'asan_device_setup') 355 356 357 def install(self): 358 self._adb('mkdir ' + self.device_dirs.resource_dir, 359 'shell', 'mkdir', '-p', self.device_dirs.resource_dir) 360 if self.m.vars.builder_cfg.get('model') in ['GalaxyS20', 'GalaxyS9']: 361 # See skia:10184, should be moot once upgraded to Android 11? 362 self._adb('cp libGLES_mali.so to ' + self.device_dirs.bin_dir, 363 'shell', 'cp', 364 '/vendor/lib64/egl/libGLES_mali.so', 365 self.device_dirs.bin_dir + 'libvulkan.so') 366 if 'ASAN' in self.m.vars.extra_tokens: 367 self._ever_ran_adb = True 368 self.m.run(self.m.python.inline, 'Setting up device to run ASAN', 369 program=""" 370import os 371import subprocess 372import sys 373import time 374ADB = sys.argv[1] 375ASAN_SETUP = sys.argv[2] 376 377def wait_for_device(): 378 while True: 379 time.sleep(5) 380 print('Waiting for device') 381 subprocess.check_output([ADB, 'wait-for-device']) 382 bit1 = subprocess.check_output([ADB, 'shell', 'getprop', 383 'dev.bootcomplete']) 384 bit2 = subprocess.check_output([ADB, 'shell', 'getprop', 385 'sys.boot_completed']) 386 if '1' in bit1 and '1' in bit2: 387 print('Device detected') 388 break 389 390log = subprocess.check_output([ADB, 'root']) 391# check for message like 'adbd cannot run as root in production builds' 392print(log) 393if 'cannot' in log: 394 raise Exception('adb root failed') 395 396output = subprocess.check_output([ADB, 'disable-verity']) 397print(output) 398 399if 'already disabled' not in output: 400 print('Rebooting device') 401 subprocess.check_output([ADB, 'reboot']) 402 wait_for_device() 403 404def installASAN(revert=False): 405 # ASAN setup script is idempotent, either it installs it or 406 # says it's installed. Returns True on success, false otherwise. 407 out = subprocess.check_output([ADB, 'wait-for-device']) 408 print(out) 409 cmd = [ASAN_SETUP] 410 if revert: 411 cmd = [ASAN_SETUP, '--revert'] 412 process = subprocess.Popen(cmd, env={'ADB': ADB}, 413 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 414 415 # this also blocks until command finishes 416 (stdout, stderr) = process.communicate() 417 print(stdout) 418 print('Stderr: %s' % stderr) 419 return process.returncode == 0 420 421if not installASAN(): 422 print('Trying to revert the ASAN install and then re-install') 423 # ASAN script sometimes has issues if it was interrupted or partially applied 424 # Try reverting it, then re-enabling it 425 if not installASAN(revert=True): 426 raise Exception('reverting ASAN install failed') 427 428 # Sleep because device does not reboot instantly 429 time.sleep(10) 430 431 if not installASAN(): 432 raise Exception('Tried twice to setup ASAN and failed.') 433 434# Sleep because device does not reboot instantly 435time.sleep(10) 436wait_for_device() 437# Sleep again to hopefully avoid error "secure_mkdirs failed: No such file or 438# directory" when pushing resources to the device. 439time.sleep(60) 440""", 441 args = [self.ADB_BINARY, self._asan_setup_path()], 442 infra_step=True, 443 timeout=300, 444 abort_on_failure=True) 445 if self.app_name: 446 if (self.app_name == 'nanobench'): 447 self._scale_for_nanobench() 448 else: 449 self._scale_for_dm() 450 app_path = self.host_dirs.bin_dir.join(self.app_name) 451 self._adb('push %s' % self.app_name, 452 'push', app_path, self.device_dirs.bin_dir) 453 454 455 456 def cleanup_steps(self): 457 if 'ASAN' in self.m.vars.extra_tokens: 458 self._ever_ran_adb = True 459 # Remove ASAN. 460 self.m.run(self.m.step, 461 'wait for device before uninstalling ASAN', 462 cmd=[self.ADB_BINARY, 'wait-for-device'], infra_step=True, 463 timeout=180, abort_on_failure=False, 464 fail_build_on_failure=False) 465 self.m.run(self.m.step, 'uninstall ASAN', 466 cmd=[self._asan_setup_path(), '--revert'], 467 infra_step=True, timeout=300, 468 abort_on_failure=False, fail_build_on_failure=False) 469 470 if self._ever_ran_adb: 471 self.m.run(self.m.python.inline, 'dump log', program=""" 472 import os 473 import subprocess 474 import sys 475 out = sys.argv[1] 476 log = subprocess.check_output(['%s', 'logcat', '-d']) 477 for line in log.split('\\n'): 478 tokens = line.split() 479 if len(tokens) == 11 and tokens[-7] == 'F' and tokens[-3] == 'pc': 480 addr, path = tokens[-2:] 481 local = os.path.join(out, os.path.basename(path)) 482 if os.path.exists(local): 483 try: 484 sym = subprocess.check_output(['addr2line', '-Cfpe', local, addr]) 485 line = line.replace(addr, addr + ' ' + sym.strip()) 486 except subprocess.CalledProcessError: 487 pass 488 print(line) 489 """ % self.ADB_BINARY, 490 args=[self.host_dirs.bin_dir], 491 infra_step=True, 492 timeout=300, 493 abort_on_failure=False) 494 495 # Only quarantine the bot if the first failed step 496 # is an infra step. If, instead, we did this for any infra failures, we 497 # would do this too much. For example, if a Nexus 10 died during dm 498 # and the following pull step would also fail "device not found" - causing 499 # us to run the shutdown command when the device was probably not in a 500 # broken state; it was just rebooting. 501 if (self.m.run.failed_steps and 502 isinstance(self.m.run.failed_steps[0], recipe_api.InfraFailure)): 503 bot_id = self.m.vars.swarming_bot_id 504 self.m.file.write_text('Quarantining Bot', 505 '/home/chrome-bot/%s.force_quarantine' % bot_id, 506 ' ') 507 508 if self._ever_ran_adb: 509 self._adb('kill adb server', 'kill-server') 510 511 def step(self, name, cmd): 512 sh = '%s.sh' % cmd[0] 513 self.m.run.writefile(self.m.vars.tmp_dir.join(sh), 514 'set -x; LD_LIBRARY_PATH=%s %s%s; echo $? >%src' % ( 515 self.device_dirs.bin_dir, 516 self.device_dirs.bin_dir, subprocess.list2cmdline(map(str, cmd)), 517 self.device_dirs.bin_dir)) 518 self._adb('push %s' % sh, 519 'push', self.m.vars.tmp_dir.join(sh), self.device_dirs.bin_dir) 520 521 self._adb('clear log', 'logcat', '-c') 522 self.m.python.inline('%s' % cmd[0], """ 523 import subprocess 524 import sys 525 bin_dir = sys.argv[1] 526 sh = sys.argv[2] 527 subprocess.check_call(['%s', 'shell', 'sh', bin_dir + sh]) 528 try: 529 sys.exit(int(subprocess.check_output(['%s', 'shell', 'cat', 530 bin_dir + 'rc']))) 531 except ValueError: 532 print("Couldn't read the return code. Probably killed for OOM.") 533 sys.exit(1) 534 """ % (self.ADB_BINARY, self.ADB_BINARY), 535 args=[self.device_dirs.bin_dir, sh]) 536 537 def copy_file_to_device(self, host, device): 538 self._adb('push %s %s' % (host, device), 'push', host, device) 539 540 def copy_directory_contents_to_device(self, host, device): 541 contents = self.m.file.glob_paths('ls %s/*' % host, 542 host, '*', 543 test_data=['foo.png', 'bar.jpg']) 544 args = contents + [device] 545 self._adb('push %s/* %s' % (host, device), 'push', *args) 546 547 def copy_directory_contents_to_host(self, device, host): 548 # TODO(borenet): When all of our devices are on Android 6.0 and up, we can 549 # switch to using tar to zip up the results before pulling. 550 with self.m.step.nest('adb pull'): 551 tmp = self.m.path.mkdtemp('adb_pull') 552 self._adb('pull %s' % device, 'pull', device, tmp) 553 paths = self.m.file.glob_paths( 554 'list pulled files', 555 tmp, 556 self.m.path.basename(device) + self.m.path.sep + '*', 557 test_data=['%d.png' % i for i in (1, 2)]) 558 for p in paths: 559 self.m.file.copy('copy %s' % self.m.path.basename(p), p, host) 560 561 def read_file_on_device(self, path, **kwargs): 562 rv = self._adb('read %s' % path, 563 'shell', 'cat', path, stdout=self.m.raw_io.output(), 564 **kwargs) 565 return rv.stdout.decode('utf-8').rstrip() if rv and rv.stdout else None 566 567 def remove_file_on_device(self, path): 568 self.m.run.with_retry(self.m.python.inline, 'rm %s' % path, 3, program=""" 569 import subprocess 570 import sys 571 572 # Remove the path. 573 adb = sys.argv[1] 574 path = sys.argv[2] 575 print('Removing %s' % path) 576 cmd = [adb, 'shell', 'rm', '-rf', path] 577 print(' '.join(cmd)) 578 subprocess.check_call(cmd) 579 580 # Verify that the path was deleted. 581 print('Checking for existence of %s' % path) 582 cmd = [adb, 'shell', 'ls', path] 583 print(' '.join(cmd)) 584 try: 585 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 586 except subprocess.CalledProcessError as e: 587 output = e.output 588 print('Output was:') 589 print('======') 590 print(output) 591 print('======') 592 if 'No such file or directory' not in output: 593 raise Exception('%s exists despite being deleted' % path) 594 """, 595 args=[self.ADB_BINARY, path], 596 infra_step=True) 597 598 def create_clean_device_dir(self, path): 599 self.remove_file_on_device(path) 600 self._adb('mkdir %s' % path, 'shell', 'mkdir', '-p', path) 601