• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5
6from recipe_engine import recipe_api
7from recipe_engine import recipe_test_api
8
9from . import default
10import subprocess  # TODO(borenet): No! Remove this.
11
12
13"""Android flavor, used for running code on Android."""
14
15
16class AndroidFlavor(default.DefaultFlavor):
17  def __init__(self, m, app_name):
18    super(AndroidFlavor, self).__init__(m, app_name)
19    self._ever_ran_adb = False
20    self.ADB_BINARY = '/usr/bin/adb.1.0.35'
21    self.ADB_PUB_KEY = '/home/chrome-bot/.android/adbkey'
22    if 'skia' not in self.m.vars.swarming_bot_id:
23      self.ADB_BINARY = '/opt/infra-android/tools/adb'
24      self.ADB_PUB_KEY = ('/home/chrome-bot/.android/'
25                          'chrome_infrastructure_adbkey')
26
27    # Data should go in android_data_dir, which may be preserved across runs.
28    android_data_dir = '/sdcard/revenge_of_the_skiabot/'
29    self.device_dirs = default.DeviceDirs(
30        bin_dir        = '/data/local/tmp/',
31        dm_dir         = android_data_dir + 'dm_out',
32        perf_data_dir  = android_data_dir + 'perf',
33        resource_dir   = android_data_dir + 'resources',
34        images_dir     = android_data_dir + 'images',
35        lotties_dir    = android_data_dir + 'lotties',
36        skp_dir        = android_data_dir + 'skps',
37        svg_dir        = android_data_dir + 'svgs',
38        mskp_dir       = android_data_dir + 'mskp',
39        tmp_dir        = android_data_dir,
40        texttraces_dir = android_data_dir + 'text_blob_traces')
41
42    # A list of devices we can't root.  If rooting fails and a device is not
43    # on the list, we fail the task to avoid perf inconsistencies.
44    self.cant_root = ['GalaxyS7_G930FD', 'GalaxyS9',
45                      'GalaxyS20', 'MotoG4', 'NVIDIA_Shield',
46                      'P30', 'Pixel4','Pixel4XL', 'Pixel5', 'TecnoSpark3Pro']
47
48    # Maps device type -> CPU ids that should be scaled for nanobench.
49    # Many devices have two (or more) different CPUs (e.g. big.LITTLE
50    # on Nexus5x). The CPUs listed are the biggest cpus on the device.
51    # The CPUs are grouped together, so we only need to scale one of them
52    # (the one listed) in order to scale them all.
53    # E.g. Nexus5x has cpu0-3 as one chip and cpu4-5 as the other. Thus,
54    # if one wants to run a single-threaded application (e.g. nanobench), one
55    # can disable cpu0-3 and scale cpu 4 to have only cpu4 and 5 at the same
56    # frequency.  See also disable_for_nanobench.
57    self.cpus_to_scale = {
58      'Nexus5x': [4],
59      'Pixel': [2],
60      'Pixel2XL': [4]
61    }
62
63    # Maps device type -> CPU ids that should be turned off when running
64    # single-threaded applications like nanobench. The devices listed have
65    # multiple, differnt CPUs. We notice a lot of noise that seems to be
66    # caused by nanobench running on the slow CPU, then the big CPU. By
67    # disabling this, we see less of that noise by forcing the same CPU
68    # to be used for the performance testing every time.
69    self.disable_for_nanobench = {
70      'Nexus5x': range(0, 4),
71      'Pixel': range(0, 2),
72      'Pixel2XL': range(0, 4)
73    }
74
75    self.gpu_scaling = {
76      "Nexus5":  450000000,
77      "Nexus5x": 600000000,
78    }
79
80  def _adb(self, title, *cmd, **kwargs):
81    # The only non-infra adb steps (dm / nanobench) happen to not use _adb().
82    if 'infra_step' not in kwargs:
83      kwargs['infra_step'] = True
84
85    self._ever_ran_adb = True
86    # ADB seems to be occasionally flaky on every device, so always retry.
87    attempts = kwargs.pop('attempts', 3)
88
89    def wait_for_device(attempt):
90      self.m.run(self.m.step,
91                 'kill adb server after failure of \'%s\' (attempt %d)' % (
92                     title, attempt),
93                 cmd=[self.ADB_BINARY, 'kill-server'],
94                 infra_step=True, timeout=30, abort_on_failure=False,
95                 fail_build_on_failure=False)
96      self.m.run(self.m.step,
97                 'wait for device after failure of \'%s\' (attempt %d)' % (
98                     title, attempt),
99                 cmd=[self.ADB_BINARY, 'wait-for-device'], infra_step=True,
100                 timeout=180, abort_on_failure=False,
101                 fail_build_on_failure=False)
102
103    with self.m.context(cwd=self.m.path['start_dir'].join('skia')):
104      with self.m.env({'ADB_VENDOR_KEYS': self.ADB_PUB_KEY}):
105        return self.m.run.with_retry(self.m.step, title, attempts,
106                                     cmd=[self.ADB_BINARY]+list(cmd),
107                                     between_attempts_fn=wait_for_device,
108                                     **kwargs)
109
110  def _scale_for_dm(self):
111    device = self.m.vars.builder_cfg.get('model')
112    if (device in self.cant_root or
113        self.m.vars.internal_hardware_label):
114      return
115
116    # This is paranoia... any CPUs we disabled while running nanobench
117    # ought to be back online now that we've restarted the device.
118    for i in self.disable_for_nanobench.get(device, []):
119      self._set_cpu_online(i, 1) # enable
120
121    scale_up = self.cpus_to_scale.get(device, [0])
122    # For big.LITTLE devices, make sure we scale the LITTLE cores up;
123    # there is a chance they are still in powersave mode from when
124    # swarming slows things down for cooling down and charging.
125    if 0 not in scale_up:
126      scale_up.append(0)
127    for i in scale_up:
128      # AndroidOne doesn't support ondemand governor. hotplug is similar.
129      if device == 'AndroidOne':
130        self._set_governor(i, 'hotplug')
131      elif device in ['Pixel3a', 'Pixel4', 'Pixel4a', 'Wembley']:
132        # Pixel3a/4/4a have userspace powersave performance schedutil.
133        # performance seems like a reasonable choice.
134        self._set_governor(i, 'performance')
135      else:
136        self._set_governor(i, 'ondemand')
137
138  def _scale_for_nanobench(self):
139    device = self.m.vars.builder_cfg.get('model')
140    if (device in self.cant_root or
141      self.m.vars.internal_hardware_label):
142      return
143
144    for i in self.cpus_to_scale.get(device, [0]):
145      self._set_governor(i, 'userspace')
146      self._scale_cpu(i, 0.6)
147
148    for i in self.disable_for_nanobench.get(device, []):
149      self._set_cpu_online(i, 0) # disable
150
151    if device in self.gpu_scaling:
152      #https://developer.qualcomm.com/qfile/28823/lm80-p0436-11_adb_commands.pdf
153      # Section 3.2.1 Commands to put the GPU in performance mode
154      # Nexus 5 is  320000000 by default
155      # Nexus 5x is 180000000 by default
156      gpu_freq = self.gpu_scaling[device]
157      self.m.run.with_retry(self.m.python.inline,
158        "Lock GPU to %d (and other perf tweaks)" % gpu_freq,
159        3, # attempts
160        program="""
161import os
162import subprocess
163import sys
164import time
165ADB = sys.argv[1]
166freq = sys.argv[2]
167idle_timer = "10000"
168
169log = subprocess.check_output([ADB, 'root'])
170# check for message like 'adbd cannot run as root in production builds'
171print(log)
172if 'cannot' in log:
173  raise Exception('adb root failed')
174
175subprocess.check_output([ADB, 'shell', 'stop', 'thermald'])
176
177subprocess.check_output([ADB, 'shell', 'echo "%s" > '
178    '/sys/class/kgsl/kgsl-3d0/gpuclk' % freq])
179
180actual_freq = subprocess.check_output([ADB, 'shell', 'cat '
181    '/sys/class/kgsl/kgsl-3d0/gpuclk']).strip()
182if actual_freq != freq:
183  raise Exception('Frequency (actual, expected) (%s, %s)'
184                  % (actual_freq, freq))
185
186subprocess.check_output([ADB, 'shell', 'echo "%s" > '
187    '/sys/class/kgsl/kgsl-3d0/idle_timer' % idle_timer])
188
189actual_timer = subprocess.check_output([ADB, 'shell', 'cat '
190    '/sys/class/kgsl/kgsl-3d0/idle_timer']).strip()
191if actual_timer != idle_timer:
192  raise Exception('idle_timer (actual, expected) (%s, %s)'
193                  % (actual_timer, idle_timer))
194
195for s in ['force_bus_on', 'force_rail_on', 'force_clk_on']:
196  subprocess.check_output([ADB, 'shell', 'echo "1" > '
197      '/sys/class/kgsl/kgsl-3d0/%s' % s])
198  actual_set = subprocess.check_output([ADB, 'shell', 'cat '
199      '/sys/class/kgsl/kgsl-3d0/%s' % s]).strip()
200  if actual_set != "1":
201    raise Exception('%s (actual, expected) (%s, 1)'
202                    % (s, actual_set))
203""",
204        args = [self.ADB_BINARY, gpu_freq],
205        infra_step=True,
206        timeout=30)
207
208  def _set_governor(self, cpu, gov):
209    self._ever_ran_adb = True
210    self.m.run.with_retry(self.m.python.inline,
211        "Set CPU %d's governor to %s" % (cpu, gov),
212        3, # attempts
213        program="""
214import os
215import subprocess
216import sys
217import time
218ADB = sys.argv[1]
219cpu = int(sys.argv[2])
220gov = sys.argv[3]
221
222log = subprocess.check_output([ADB, 'root'])
223# check for message like 'adbd cannot run as root in production builds'
224print(log)
225if 'cannot' in log:
226  raise Exception('adb root failed')
227
228subprocess.check_output([ADB, 'shell', 'echo "%s" > '
229    '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor' % (gov, cpu)])
230actual_gov = subprocess.check_output([ADB, 'shell', 'cat '
231    '/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor' % cpu]).strip()
232if actual_gov != gov:
233  raise Exception('(actual, expected) (%s, %s)'
234                  % (actual_gov, gov))
235""",
236        args = [self.ADB_BINARY, cpu, gov],
237        infra_step=True,
238        timeout=30)
239
240
241  def _set_cpu_online(self, cpu, value):
242    """Set /sys/devices/system/cpu/cpu{N}/online to value (0 or 1)."""
243    self._ever_ran_adb = True
244    msg = 'Disabling'
245    if value:
246      msg = 'Enabling'
247    self.m.run.with_retry(self.m.python.inline,
248        '%s CPU %d' % (msg, cpu),
249        3, # attempts
250        program="""
251import os
252import subprocess
253import sys
254import time
255ADB = sys.argv[1]
256cpu = int(sys.argv[2])
257value = int(sys.argv[3])
258
259log = subprocess.check_output([ADB, 'root'])
260# check for message like 'adbd cannot run as root in production builds'
261print(log)
262if 'cannot' in log:
263  raise Exception('adb root failed')
264
265# If we try to echo 1 to an already online cpu, adb returns exit code 1.
266# So, check the value before trying to write it.
267prior_status = subprocess.check_output([ADB, 'shell', 'cat '
268    '/sys/devices/system/cpu/cpu%d/online' % cpu]).strip()
269if prior_status == str(value):
270  print('CPU %d online already %d' % (cpu, value))
271  sys.exit()
272
273subprocess.check_output([ADB, 'shell', 'echo %s > '
274    '/sys/devices/system/cpu/cpu%d/online' % (value, cpu)])
275actual_status = subprocess.check_output([ADB, 'shell', 'cat '
276    '/sys/devices/system/cpu/cpu%d/online' % cpu]).strip()
277if actual_status != str(value):
278  raise Exception('(actual, expected) (%s, %d)'
279                  % (actual_status, value))
280""",
281        args = [self.ADB_BINARY, cpu, value],
282        infra_step=True,
283        timeout=30)
284
285
286  def _scale_cpu(self, cpu, target_percent):
287    self._ever_ran_adb = True
288    self.m.run.with_retry(self.m.python.inline,
289        'Scale CPU %d to %f' % (cpu, target_percent),
290        3, # attempts
291        program="""
292import os
293import subprocess
294import sys
295import time
296ADB = sys.argv[1]
297target_percent = float(sys.argv[2])
298cpu = int(sys.argv[3])
299log = subprocess.check_output([ADB, 'root'])
300# check for message like 'adbd cannot run as root in production builds'
301print(log)
302if 'cannot' in log:
303  raise Exception('adb root failed')
304
305root = '/sys/devices/system/cpu/cpu%d/cpufreq' %cpu
306
307# All devices we test on give a list of their available frequencies.
308available_freqs = subprocess.check_output([ADB, 'shell',
309    'cat %s/scaling_available_frequencies' % root])
310
311# Check for message like '/system/bin/sh: file not found'
312if available_freqs and '/system/bin/sh' not in available_freqs:
313  available_freqs = sorted(
314      int(i) for i in available_freqs.strip().split())
315else:
316  raise Exception('Could not get list of available frequencies: %s' %
317                  available_freqs)
318
319maxfreq = available_freqs[-1]
320target = int(round(maxfreq * target_percent))
321freq = maxfreq
322for f in reversed(available_freqs):
323  if f <= target:
324    freq = f
325    break
326
327print('Setting frequency to %d' % freq)
328
329# If scaling_max_freq is lower than our attempted setting, it won't take.
330# We must set min first, because if we try to set max to be less than min
331# (which sometimes happens after certain devices reboot) it returns a
332# perplexing permissions error.
333subprocess.check_output([ADB, 'shell', 'echo 0 > '
334    '%s/scaling_min_freq' % root])
335subprocess.check_output([ADB, 'shell', 'echo %d > '
336    '%s/scaling_max_freq' % (freq, root)])
337subprocess.check_output([ADB, 'shell', 'echo %d > '
338    '%s/scaling_setspeed' % (freq, root)])
339time.sleep(5)
340actual_freq = subprocess.check_output([ADB, 'shell', 'cat '
341    '%s/scaling_cur_freq' % root]).strip()
342if actual_freq != str(freq):
343  raise Exception('(actual, expected) (%s, %d)'
344                  % (actual_freq, freq))
345""",
346        args = [self.ADB_BINARY, str(target_percent), cpu],
347        infra_step=True,
348        timeout=30)
349
350
351  def _asan_setup_path(self):
352    return self.m.vars.workdir.join(
353        'android_ndk_linux', 'toolchains', 'llvm', 'prebuilt', 'linux-x86_64',
354        'lib64', 'clang', '9.0.8', 'bin', 'asan_device_setup')
355
356
357  def install(self):
358    self._adb('mkdir ' + self.device_dirs.resource_dir,
359              'shell', 'mkdir', '-p', self.device_dirs.resource_dir)
360    if self.m.vars.builder_cfg.get('model') in ['GalaxyS20', 'GalaxyS9']:
361      # See skia:10184, should be moot once upgraded to Android 11?
362      self._adb('cp libGLES_mali.so to ' + self.device_dirs.bin_dir,
363                 'shell', 'cp',
364                '/vendor/lib64/egl/libGLES_mali.so',
365                self.device_dirs.bin_dir + 'libvulkan.so')
366    if 'ASAN' in self.m.vars.extra_tokens:
367      self._ever_ran_adb = True
368      self.m.run(self.m.python.inline, 'Setting up device to run ASAN',
369                 program="""
370import os
371import subprocess
372import sys
373import time
374ADB = sys.argv[1]
375ASAN_SETUP = sys.argv[2]
376
377def wait_for_device():
378  while True:
379    time.sleep(5)
380    print('Waiting for device')
381    subprocess.check_output([ADB, 'wait-for-device'])
382    bit1 = subprocess.check_output([ADB, 'shell', 'getprop',
383                                   'dev.bootcomplete'])
384    bit2 = subprocess.check_output([ADB, 'shell', 'getprop',
385                                   'sys.boot_completed'])
386    if '1' in bit1 and '1' in bit2:
387      print('Device detected')
388      break
389
390log = subprocess.check_output([ADB, 'root'])
391# check for message like 'adbd cannot run as root in production builds'
392print(log)
393if 'cannot' in log:
394  raise Exception('adb root failed')
395
396output = subprocess.check_output([ADB, 'disable-verity'])
397print(output)
398
399if 'already disabled' not in output:
400  print('Rebooting device')
401  subprocess.check_output([ADB, 'reboot'])
402  wait_for_device()
403
404def installASAN(revert=False):
405  # ASAN setup script is idempotent, either it installs it or
406  # says it's installed.  Returns True on success, false otherwise.
407  out = subprocess.check_output([ADB, 'wait-for-device'])
408  print(out)
409  cmd = [ASAN_SETUP]
410  if revert:
411    cmd = [ASAN_SETUP, '--revert']
412  process = subprocess.Popen(cmd, env={'ADB': ADB},
413                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
414
415  # this also blocks until command finishes
416  (stdout, stderr) = process.communicate()
417  print(stdout)
418  print('Stderr: %s' % stderr)
419  return process.returncode == 0
420
421if not installASAN():
422  print('Trying to revert the ASAN install and then re-install')
423  # ASAN script sometimes has issues if it was interrupted or partially applied
424  # Try reverting it, then re-enabling it
425  if not installASAN(revert=True):
426    raise Exception('reverting ASAN install failed')
427
428  # Sleep because device does not reboot instantly
429  time.sleep(10)
430
431  if not installASAN():
432    raise Exception('Tried twice to setup ASAN and failed.')
433
434# Sleep because device does not reboot instantly
435time.sleep(10)
436wait_for_device()
437# Sleep again to hopefully avoid error "secure_mkdirs failed: No such file or
438# directory" when pushing resources to the device.
439time.sleep(60)
440""",
441                 args = [self.ADB_BINARY, self._asan_setup_path()],
442                 infra_step=True,
443                 timeout=300,
444                 abort_on_failure=True)
445    if self.app_name:
446      if (self.app_name == 'nanobench'):
447        self._scale_for_nanobench()
448      else:
449        self._scale_for_dm()
450      app_path = self.host_dirs.bin_dir.join(self.app_name)
451      self._adb('push %s' % self.app_name,
452                'push', app_path, self.device_dirs.bin_dir)
453
454
455
456  def cleanup_steps(self):
457    if 'ASAN' in self.m.vars.extra_tokens:
458      self._ever_ran_adb = True
459      # Remove ASAN.
460      self.m.run(self.m.step,
461                 'wait for device before uninstalling ASAN',
462                 cmd=[self.ADB_BINARY, 'wait-for-device'], infra_step=True,
463                 timeout=180, abort_on_failure=False,
464                 fail_build_on_failure=False)
465      self.m.run(self.m.step, 'uninstall ASAN',
466                 cmd=[self._asan_setup_path(), '--revert'],
467                 infra_step=True, timeout=300,
468                 abort_on_failure=False, fail_build_on_failure=False)
469
470    if self._ever_ran_adb:
471      self.m.run(self.m.python.inline, 'dump log', program="""
472          import os
473          import subprocess
474          import sys
475          out = sys.argv[1]
476          log = subprocess.check_output(['%s', 'logcat', '-d'])
477          for line in log.split('\\n'):
478            tokens = line.split()
479            if len(tokens) == 11 and tokens[-7] == 'F' and tokens[-3] == 'pc':
480              addr, path = tokens[-2:]
481              local = os.path.join(out, os.path.basename(path))
482              if os.path.exists(local):
483                try:
484                  sym = subprocess.check_output(['addr2line', '-Cfpe', local, addr])
485                  line = line.replace(addr, addr + ' ' + sym.strip())
486                except subprocess.CalledProcessError:
487                  pass
488            print(line)
489          """ % self.ADB_BINARY,
490          args=[self.host_dirs.bin_dir],
491          infra_step=True,
492          timeout=300,
493          abort_on_failure=False)
494
495    # Only quarantine the bot if the first failed step
496    # is an infra step. If, instead, we did this for any infra failures, we
497    # would do this too much. For example, if a Nexus 10 died during dm
498    # and the following pull step would also fail "device not found" - causing
499    # us to run the shutdown command when the device was probably not in a
500    # broken state; it was just rebooting.
501    if (self.m.run.failed_steps and
502        isinstance(self.m.run.failed_steps[0], recipe_api.InfraFailure)):
503      bot_id = self.m.vars.swarming_bot_id
504      self.m.file.write_text('Quarantining Bot',
505                             '/home/chrome-bot/%s.force_quarantine' % bot_id,
506                             ' ')
507
508    if self._ever_ran_adb:
509      self._adb('kill adb server', 'kill-server')
510
511  def step(self, name, cmd):
512    sh = '%s.sh' % cmd[0]
513    self.m.run.writefile(self.m.vars.tmp_dir.join(sh),
514        'set -x; LD_LIBRARY_PATH=%s %s%s; echo $? >%src' % (
515            self.device_dirs.bin_dir,
516            self.device_dirs.bin_dir, subprocess.list2cmdline(map(str, cmd)),
517            self.device_dirs.bin_dir))
518    self._adb('push %s' % sh,
519              'push', self.m.vars.tmp_dir.join(sh), self.device_dirs.bin_dir)
520
521    self._adb('clear log', 'logcat', '-c')
522    self.m.python.inline('%s' % cmd[0], """
523    import subprocess
524    import sys
525    bin_dir = sys.argv[1]
526    sh      = sys.argv[2]
527    subprocess.check_call(['%s', 'shell', 'sh', bin_dir + sh])
528    try:
529      sys.exit(int(subprocess.check_output(['%s', 'shell', 'cat',
530                                            bin_dir + 'rc'])))
531    except ValueError:
532      print("Couldn't read the return code.  Probably killed for OOM.")
533      sys.exit(1)
534    """ % (self.ADB_BINARY, self.ADB_BINARY),
535      args=[self.device_dirs.bin_dir, sh])
536
537  def copy_file_to_device(self, host, device):
538    self._adb('push %s %s' % (host, device), 'push', host, device)
539
540  def copy_directory_contents_to_device(self, host, device):
541    contents = self.m.file.glob_paths('ls %s/*' % host,
542                                      host, '*',
543                                      test_data=['foo.png', 'bar.jpg'])
544    args = contents + [device]
545    self._adb('push %s/* %s' % (host, device), 'push', *args)
546
547  def copy_directory_contents_to_host(self, device, host):
548    # TODO(borenet): When all of our devices are on Android 6.0 and up, we can
549    # switch to using tar to zip up the results before pulling.
550    with self.m.step.nest('adb pull'):
551      tmp = self.m.path.mkdtemp('adb_pull')
552      self._adb('pull %s' % device, 'pull', device, tmp)
553      paths = self.m.file.glob_paths(
554          'list pulled files',
555          tmp,
556          self.m.path.basename(device) + self.m.path.sep + '*',
557          test_data=['%d.png' % i for i in (1, 2)])
558      for p in paths:
559        self.m.file.copy('copy %s' % self.m.path.basename(p), p, host)
560
561  def read_file_on_device(self, path, **kwargs):
562    rv = self._adb('read %s' % path,
563                   'shell', 'cat', path, stdout=self.m.raw_io.output(),
564                   **kwargs)
565    return rv.stdout.decode('utf-8').rstrip() if rv and rv.stdout else None
566
567  def remove_file_on_device(self, path):
568    self.m.run.with_retry(self.m.python.inline, 'rm %s' % path, 3, program="""
569        import subprocess
570        import sys
571
572        # Remove the path.
573        adb = sys.argv[1]
574        path = sys.argv[2]
575        print('Removing %s' % path)
576        cmd = [adb, 'shell', 'rm', '-rf', path]
577        print(' '.join(cmd))
578        subprocess.check_call(cmd)
579
580        # Verify that the path was deleted.
581        print('Checking for existence of %s' % path)
582        cmd = [adb, 'shell', 'ls', path]
583        print(' '.join(cmd))
584        try:
585          output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
586        except subprocess.CalledProcessError as e:
587          output = e.output
588        print('Output was:')
589        print('======')
590        print(output)
591        print('======')
592        if 'No such file or directory' not in output:
593          raise Exception('%s exists despite being deleted' % path)
594        """,
595        args=[self.ADB_BINARY, path],
596        infra_step=True)
597
598  def create_clean_device_dir(self, path):
599    self.remove_file_on_device(path)
600    self._adb('mkdir %s' % path, 'shell', 'mkdir', '-p', path)
601