1# Copyright 2016 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6from recipe_engine import recipe_api 7from recipe_engine import recipe_test_api 8 9from . import default 10import subprocess # TODO(borenet): No! Remove this. 11 12 13"""Android flavor, used for running code on Android.""" 14 15 16class AndroidFlavor(default.DefaultFlavor): 17 def __init__(self, m, app_name): 18 super(AndroidFlavor, self).__init__(m, app_name) 19 self._ever_ran_adb = False 20 self.ADB_BINARY = '/usr/bin/adb.1.0.35' 21 self.ADB_PUB_KEY = '/home/chrome-bot/.android/adbkey' 22 if 'skia' not in self.m.vars.swarming_bot_id: 23 self.ADB_BINARY = '/opt/infra-android/tools/adb' 24 self.ADB_PUB_KEY = ('/home/chrome-bot/.android/' 25 'chrome_infrastructure_adbkey') 26 27 # Data should go in android_data_dir, which may be preserved across runs. 28 android_data_dir = '/sdcard/revenge_of_the_skiabot/' 29 self.device_dirs = default.DeviceDirs( 30 bin_dir = '/data/local/tmp/', 31 dm_dir = android_data_dir + 'dm_out', 32 perf_data_dir = android_data_dir + 'perf', 33 resource_dir = android_data_dir + 'resources', 34 fonts_dir = 'NOT_SUPPORTED', 35 images_dir = android_data_dir + 'images', 36 lotties_dir = android_data_dir + 'lotties', 37 skp_dir = android_data_dir + 'skps', 38 svg_dir = android_data_dir + 'svgs', 39 tmp_dir = android_data_dir, 40 texttraces_dir = android_data_dir + 'text_blob_traces') 41 42 # A list of devices we can't root. If rooting fails and a device is not 43 # on the list, we fail the task to avoid perf inconsistencies. 44 self.cant_root = ['GalaxyS7_G930FD', 45 'GalaxyS20', 'MotoG4', 46 'P30', 'Pixel4','Pixel4XL', 'Pixel5', 'TecnoSpark3Pro', 'JioNext', 47 'GalaxyS24', 'MotoG73'] 48 49 self.use_performance_governor_for_dm = [ 50 'Pixel3a', 51 'Pixel4', 52 'Pixel4a', 53 'Wembley', 54 'Pixel6', 55 'Pixel7', 56 'Pixel9', 57 ] 58 59 self.use_powersave_governor_for_nanobench = [ 60 'Pixel6', 61 'Pixel7', 62 'Pixel9', 63 ] 64 65 # Maps device type -> CPU ids that should be scaled for nanobench. 66 # Many devices have two (or more) different CPUs (e.g. big.LITTLE 67 # on Nexus5x). The CPUs listed are the biggest cpus on the device. 68 # The CPUs are grouped together, so we only need to scale one of them 69 # (the one listed) in order to scale them all. 70 # E.g. Nexus5x has cpu0-3 as one chip and cpu4-5 as the other. Thus, 71 # if one wants to run a single-threaded application (e.g. nanobench), one 72 # can disable cpu0-3 and scale cpu 4 to have only cpu4 and 5 at the same 73 # frequency. See also disable_for_nanobench. 74 self.cpus_to_scale = { 75 'Nexus5x': [4], 76 'Pixel': [2], 77 } 78 79 # Maps device type -> CPU ids that should be turned off when running 80 # single-threaded applications like nanobench. The devices listed have 81 # multiple, differnt CPUs. We notice a lot of noise that seems to be 82 # caused by nanobench running on the slow CPU, then the big CPU. By 83 # disabling this, we see less of that noise by forcing the same CPU 84 # to be used for the performance testing every time. 85 self.disable_for_nanobench = { 86 'Nexus5x': range(0, 4), 87 'Pixel': range(0, 2), 88 'Pixel6': range(4,8), # Only use the 4 small cores. 89 'Pixel7': range(4,8), 90 'Pixel9': range(4,8), 91 } 92 93 self.gpu_scaling = { 94 "Nexus5": 450000000, 95 "Nexus5x": 600000000, 96 } 97 98 def _wait_for_device(self, title, attempt): 99 self.m.run(self.m.step, 100 'adb kill-server after failure of \'%s\' (attempt %d)' % ( 101 title, attempt), 102 cmd=[self.ADB_BINARY, 'kill-server'], 103 infra_step=True, timeout=30, abort_on_failure=False, 104 fail_build_on_failure=False) 105 self.m.run(self.m.step, 106 'wait for device after failure of \'%s\' (attempt %d)' % ( 107 title, attempt), 108 cmd=[self.ADB_BINARY, 'wait-for-device'], infra_step=True, 109 timeout=180, abort_on_failure=False, 110 fail_build_on_failure=False) 111 self.m.run(self.m.step, 112 'adb devices -l after failure of \'%s\' (attempt %d)' % ( 113 title, attempt), 114 cmd=[self.ADB_BINARY, 'devices', '-l'], 115 infra_step=True, timeout=30, abort_on_failure=False, 116 fail_build_on_failure=False) 117 self.m.run(self.m.step, 118 'adb reboot device after failure of \'%s\' (attempt %d)' % ( 119 title, attempt), 120 cmd=[self.ADB_BINARY, 'reboot'], 121 infra_step=True, timeout=30, abort_on_failure=False, 122 fail_build_on_failure=False) 123 self.m.run(self.m.step, 124 'wait for device after failure of \'%s\' (attempt %d)' % ( 125 title, attempt), 126 cmd=[ 127 self.ADB_BINARY, 'wait-for-device', 'shell', 128 # Wait until the boot is actually complete. 129 # https://android.stackexchange.com/a/164050 130 'while [[ -z $(getprop sys.boot_completed) ]]; do sleep 1; done', 131 ], 132 timeout=180, abort_on_failure=False, 133 fail_build_on_failure=False) 134 device = self.m.vars.builder_cfg.get('model') 135 if (device in self.cant_root): # pragma: nocover 136 return 137 self.m.run(self.m.step, 138 'adb root', 139 cmd=[ 140 self.ADB_BINARY, 'root' 141 ], 142 timeout=180, abort_on_failure=False, 143 fail_build_on_failure=False) 144 145 def _adb(self, title, *cmd, **kwargs): 146 # The only non-infra adb steps (dm / nanobench) happen to not use _adb(). 147 if 'infra_step' not in kwargs: 148 kwargs['infra_step'] = True 149 150 self._ever_ran_adb = True 151 # ADB seems to be occasionally flaky on every device, so always retry. 152 attempts = kwargs.pop('attempts', 3) 153 154 def wait_for_device(attempt): 155 return self._wait_for_device(title, attempt) 156 157 with self.m.context(cwd=self.m.path.start_dir.joinpath('skia')): 158 with self.m.env({'ADB_VENDOR_KEYS': self.ADB_PUB_KEY}): 159 return self.m.run.with_retry(self.m.step, title, attempts, 160 cmd=[self.ADB_BINARY]+list(cmd), 161 between_attempts_fn=wait_for_device, 162 **kwargs) 163 164 def _scale_for_dm(self): 165 device = self.m.vars.builder_cfg.get('model') 166 if (device in self.cant_root or 167 self.m.vars.internal_hardware_label): 168 return 169 170 # This is paranoia... any CPUs we disabled while running nanobench 171 # ought to be back online now that we've restarted the device. 172 for i in self.disable_for_nanobench.get(device, []): 173 self._set_cpu_online(i, 1) # enable 174 175 scale_up = self.cpus_to_scale.get(device, [0]) 176 # For big.LITTLE devices, make sure we scale the LITTLE cores up; 177 # there is a chance they are still in powersave mode from when 178 # swarming slows things down for cooling down and charging. 179 if 0 not in scale_up: 180 scale_up.append(0) 181 for i in scale_up: 182 # AndroidOne doesn't support ondemand governor. hotplug is similar. 183 if device == 'AndroidOne': 184 self._set_governor(i, 'hotplug') 185 elif device in self.use_performance_governor_for_dm: 186 # Pixel3a/4/4a have userspace powersave performance schedutil. 187 # performance seems like a reasonable choice. 188 self._set_governor(i, 'performance') 189 else: 190 self._set_governor(i, 'ondemand') 191 192 def _scale_for_nanobench(self): 193 device = self.m.vars.builder_cfg.get('model') 194 if (device in self.cant_root or 195 self.m.vars.internal_hardware_label): 196 return 197 198 for i in self.cpus_to_scale.get(device, [0]): 199 if device in self.use_powersave_governor_for_nanobench: 200 self._set_governor(i, 'powersave') 201 elif device not in self.cant_root: 202 self._set_governor(i, 'userspace') 203 self._scale_cpu(i, 0.6) 204 205 for i in self.disable_for_nanobench.get(device, []): 206 self._set_cpu_online(i, 0) # disable 207 208 if device in self.gpu_scaling: 209 #https://developer.qualcomm.com/qfile/28823/lm80-p0436-11_adb_commands.pdf 210 # Section 3.2.1 Commands to put the GPU in performance mode 211 # Nexus 5 is 320000000 by default 212 # Nexus 5x is 180000000 by default 213 gpu_freq = self.gpu_scaling[device] 214 script = self.module.resource('set_gpu_scaling.py') 215 self.m.run.with_retry(self.m.step, 216 "Lock GPU to %d (and other perf tweaks)" % gpu_freq, 217 3, # attempts 218 cmd=['python3', script, self.ADB_BINARY, gpu_freq], 219 infra_step=True, 220 timeout=30) 221 222 def _set_governor(self, cpu, gov): 223 self._ever_ran_adb = True 224 script = self.module.resource('set_cpu_scaling_governor.py') 225 self.m.run.with_retry(self.m.step, 226 "Set CPU %d's governor to %s" % (cpu, gov), 227 3, # attempts 228 cmd=['python3', script, self.ADB_BINARY, cpu, gov], 229 infra_step=True, 230 timeout=30) 231 232 233 def _set_cpu_online(self, cpu, value): 234 """Set /sys/devices/system/cpu/cpu{N}/online to value (0 or 1).""" 235 self._ever_ran_adb = True 236 msg = 'Disabling' 237 if value: 238 msg = 'Enabling' 239 240 def wait_for_device(attempt): 241 return self._wait_for_device("set cpu online", attempt) # pragma: nocover 242 243 script = self.module.resource('set_cpu_online.py') 244 self.m.run.with_retry(self.m.step, 245 '%s CPU %d' % (msg, cpu), 246 3, # attempts 247 cmd=['python3', script, self.ADB_BINARY, cpu, value], 248 infra_step=True, 249 between_attempts_fn=wait_for_device, 250 timeout=30) 251 252 253 def _scale_cpu(self, cpu, target_percent): 254 self._ever_ran_adb = True 255 256 def wait_for_device(attempt): 257 return self._wait_for_device("scale cpu", attempt) 258 259 script = self.module.resource('scale_cpu.py') 260 self.m.run.with_retry(self.m.step, 261 'Scale CPU %d to %f' % (cpu, target_percent), 262 3, # attempts 263 cmd=['python3', script, self.ADB_BINARY, str(target_percent), cpu], 264 infra_step=True, 265 between_attempts_fn=wait_for_device, 266 timeout=30) 267 268 269 def _asan_setup_path(self): 270 return self.m.vars.workdir.joinpath( 271 'android_ndk_linux', 'toolchains', 'llvm', 'prebuilt', 'linux-x86_64', 272 'lib', 'clang', '17', 'bin', 'asan_device_setup') 273 274 275 def install(self): 276 self._adb('mkdir ' + self.device_dirs.resource_dir, 277 'shell', 'mkdir', '-p', self.device_dirs.resource_dir) 278 if self.m.vars.builder_cfg.get('model') == 'GalaxyS20': 279 # See skia:10184, should be moot once upgraded to Android 11? 280 self._adb('cp libGLES_mali.so to ' + self.device_dirs.bin_dir, 281 'shell', 'cp', 282 '/vendor/lib64/egl/libGLES_mali.so', 283 self.device_dirs.bin_dir + 'libvulkan.so') 284 if 'ASAN' in self.m.vars.extra_tokens: 285 self._ever_ran_adb = True 286 script = self.module.resource('setup_device_for_asan.py') 287 self.m.run( 288 self.m.step, 'Setting up device to run ASAN', 289 cmd=['python3', script, self.ADB_BINARY, self._asan_setup_path()], 290 infra_step=True, 291 timeout=300, 292 abort_on_failure=True) 293 if self.app_name: 294 if (self.app_name == 'nanobench'): 295 self._scale_for_nanobench() 296 else: 297 self._scale_for_dm() 298 app_path = self.host_dirs.bin_dir.joinpath(self.app_name) 299 self._adb('push %s' % self.app_name, 300 'push', app_path, self.device_dirs.bin_dir) 301 302 303 304 def cleanup_steps(self): 305 self.m.run(self.m.step, 306 'adb reboot device', 307 cmd=[self.ADB_BINARY, 'reboot'], 308 infra_step=True, timeout=30, abort_on_failure=False, 309 fail_build_on_failure=False) 310 self.m.run(self.m.step, 311 'wait for device after rebooting', 312 cmd=[ 313 self.ADB_BINARY, 'wait-for-device', 'shell', 314 # Wait until the boot is actually complete. 315 # https://android.stackexchange.com/a/164050 316 'while [[ -z $(getprop sys.boot_completed) ]]; do sleep 1; done', 317 ], 318 timeout=180, abort_on_failure=False, 319 fail_build_on_failure=False) 320 321 if 'ASAN' in self.m.vars.extra_tokens: 322 self._ever_ran_adb = True 323 # Remove ASAN. 324 self.m.run(self.m.step, 325 'wait for device before uninstalling ASAN', 326 cmd=[self.ADB_BINARY, 'wait-for-device', 'shell', 327 # Wait until the boot is actually complete. 328 # https://android.stackexchange.com/a/164050 329 'while [[ -z $(getprop sys.boot_completed) ]]; do sleep 1; done', 330 ], infra_step=True, 331 timeout=180, abort_on_failure=False, 332 fail_build_on_failure=False) 333 self.m.run(self.m.step, 'uninstall ASAN', 334 cmd=[self._asan_setup_path(), '--revert'], 335 infra_step=True, timeout=300, 336 abort_on_failure=False, fail_build_on_failure=False) 337 338 if self._ever_ran_adb: 339 script = self.module.resource('dump_adb_log.py') 340 self.m.run(self.m.step, 'dump log', 341 cmd=['python3', script, self.host_dirs.bin_dir, self.ADB_BINARY], 342 infra_step=True, 343 timeout=300, 344 abort_on_failure=False) 345 346 # Only quarantine the bot if the first failed step 347 # is an infra step. If, instead, we did this for any infra failures, we 348 # would do this too much. For example, if a Nexus 10 died during dm 349 # and the following pull step would also fail "device not found" - causing 350 # us to run the shutdown command when the device was probably not in a 351 # broken state; it was just rebooting. 352 if (self.m.run.failed_steps and 353 isinstance(self.m.run.failed_steps[0], recipe_api.InfraFailure)): 354 bot_id = self.m.vars.swarming_bot_id 355 self.m.file.write_text('Quarantining Bot', 356 '/home/chrome-bot/%s.force_quarantine' % bot_id, 357 ' ') 358 359 # if self._ever_ran_adb: 360 # self._adb('kill adb server', 'kill-server') 361 362 def step(self, name, cmd): 363 sh = '%s.sh' % cmd[0] 364 self.m.run.writefile(self.m.vars.tmp_dir.joinpath(sh), 365 'set -x; LD_LIBRARY_PATH=%s %s%s; echo $? >%src' % ( 366 self.device_dirs.bin_dir, 367 self.device_dirs.bin_dir, subprocess.list2cmdline(map(str, cmd)), 368 self.device_dirs.bin_dir)) 369 self._adb('push %s' % sh, 370 'push', self.m.vars.tmp_dir.joinpath(sh), self.device_dirs.bin_dir) 371 372 self._adb('clear log', 'logcat', '-c') 373 script = self.module.resource('run_sh.py') 374 self.m.step('%s' % cmd[0], 375 cmd=['python3', script, self.device_dirs.bin_dir, sh, self.ADB_BINARY]) 376 377 def copy_file_to_device(self, host, device): 378 self._adb('push %s %s' % (host, device), 'push', host, device) 379 380 def copy_directory_contents_to_device(self, host, device): 381 contents = self.m.file.glob_paths('ls %s/*' % host, 382 host, '*', 383 test_data=['foo.png', 'bar.jpg']) 384 args = contents + [device] 385 self._adb('push %s/* %s' % (host, device), 'push', *args) 386 387 def copy_directory_contents_to_host(self, device, host): 388 # TODO(borenet): When all of our devices are on Android 6.0 and up, we can 389 # switch to using tar to zip up the results before pulling. 390 with self.m.step.nest('adb pull'): 391 tmp = self.m.path.mkdtemp('adb_pull') 392 self._adb('pull %s' % device, 'pull', device, tmp) 393 paths = self.m.file.glob_paths( 394 'list pulled files', 395 tmp, 396 self.m.path.basename(device) + self.m.path.sep + '*', 397 test_data=['%d.png' % i for i in (1, 2)]) 398 for p in paths: 399 self.m.file.copy('copy %s' % self.m.path.basename(p), p, host) 400 401 def read_file_on_device(self, path, **kwargs): 402 testKwargs = { 403 'attempts': 1, 404 'abort_on_failure': False, 405 'fail_build_on_failure': False, 406 } 407 rv = self._adb('check if %s exists' % path, 408 'shell', 'test', '-f', path, **testKwargs) 409 if not rv: # pragma: nocover 410 return None 411 412 rv = self._adb('read %s' % path, 413 'shell', 'cat', path, stdout=self.m.raw_io.output(), 414 **kwargs) 415 return rv.stdout.decode('utf-8').rstrip() if rv and rv.stdout else None 416 417 def remove_file_on_device(self, path): 418 script = self.module.resource('remove_file_on_device.py') 419 self.m.run.with_retry(self.m.step, 'rm %s' % path, 3, 420 cmd=['python3', script, self.ADB_BINARY, path], 421 infra_step=True) 422 423 def create_clean_device_dir(self, path): 424 self.remove_file_on_device(path) 425 self._adb('mkdir %s' % path, 'shell', 'mkdir', '-p', path) 426