• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives or replays webpages and creates SKPs in a Google Storage location.
7
8To archive webpages and store SKP files (archives should be rarely updated):
9
10cd skia
11python tools/skp/webpages_playback.py --data_store=gs://rmistry --record \
12--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \
13--browser_executable=/tmp/chromium/out/Release/chrome
14
15The above command uses Google Storage bucket 'rmistry' to download needed files.
16
17To replay archived webpages and re-generate SKP files (should be run whenever
18SkPicture.PICTURE_VERSION changes):
19
20cd skia
21python tools/skp/webpages_playback.py --data_store=gs://rmistry \
22--page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \
23--browser_executable=/tmp/chromium/out/Release/chrome
24
25
26Specify the --page_sets flag (default value is 'all') to pick a list of which
27webpages should be archived and/or replayed. Eg:
28
29--page_sets=tools/skp/page_sets/skia_yahooanswers_desktop.py,\
30tools/skp/page_sets/skia_googlecalendar_nexus10.py
31
32The --browser_executable flag should point to the browser binary you want to use
33to capture archives and/or capture SKP files. Majority of the time it should be
34a newly built chrome binary.
35
36The --data_store flag controls where the needed artifacts are downloaded from.
37It also controls where the generated artifacts, such as recorded webpages and
38resulting skp renderings, are uploaded to. URLs with scheme 'gs://' use Google
39Storage. Otherwise use local filesystem.
40
41The --upload=True flag means generated artifacts will be
42uploaded or copied to the location specified by --data_store. (default value is
43False if not specified).
44
45The --non-interactive flag controls whether the script will prompt the user
46(default value is False if not specified).
47
48The --skia_tools flag if specified will allow this script to run
49debugger, render_pictures, and render_pdfs on the captured
50SKP(s). The tools are run after all SKPs are succesfully captured to make sure
51they can be added to the buildbots with no breakages.
52"""
53
54import glob
55import optparse
56import os
57import posixpath
58import shutil
59import subprocess
60import sys
61import tempfile
62import time
63import traceback
64
65
66ROOT_PLAYBACK_DIR_NAME = 'playback'
67SKPICTURES_DIR_NAME = 'skps'
68
69GS_PREFIX = 'gs://'
70
71PARTNERS_GS_BUCKET = 'gs://chrome-partner-telemetry'
72
73# Local archive and SKP directories.
74LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR = os.path.join(
75    os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data')
76TMP_SKP_DIR = tempfile.mkdtemp()
77
78# Name of the SKP benchmark
79SKP_BENCHMARK = 'skpicture_printer'
80
81# The max base name length of Skp files.
82MAX_SKP_BASE_NAME_LEN = 31
83
84# Dictionary of device to platform prefixes for SKP files.
85DEVICE_TO_PLATFORM_PREFIX = {
86    'desktop': 'desk',
87    'galaxynexus': 'mobi',
88    'nexus10': 'tabl'
89}
90
91# How many times the record_wpr binary should be retried.
92RETRY_RECORD_WPR_COUNT = 5
93# How many times the run_benchmark binary should be retried.
94RETRY_RUN_MEASUREMENT_COUNT = 3
95
96X11_DISPLAY = os.getenv('DISPLAY', ':0')
97
98# Path to Chromium's page sets.
99CHROMIUM_PAGE_SETS_PATH = os.path.join('tools', 'perf', 'page_sets')
100
101# Dictionary of supported Chromium page sets to their file prefixes.
102CHROMIUM_PAGE_SETS_TO_PREFIX = {
103    'key_mobile_sites_smooth.py': 'keymobi',
104    'top_25_smooth.py': 'top25desk',
105}
106
107PAGE_SETS_TO_EXCLUSIONS = {
108    # See skbug.com/7348
109    'key_mobile_sites_smooth.py': '"(digg|worldjournal|twitter|espn)"',
110    # See skbug.com/7421
111    'top_25_smooth.py': '"(mail\.google\.com)"',
112}
113
114
115def remove_prefix(s, prefix):
116  if s.startswith(prefix):
117    return s[len(prefix):]
118  return s
119
120
121class SkPicturePlayback(object):
122  """Class that archives or replays webpages and creates SKPs."""
123
124  def __init__(self, parse_options):
125    """Constructs a SkPicturePlayback BuildStep instance."""
126    assert parse_options.browser_executable, 'Must specify --browser_executable'
127    self._browser_executable = parse_options.browser_executable
128    self._browser_args = '--disable-setuid-sandbox'
129    if parse_options.browser_extra_args:
130      self._browser_args = '%s %s' % (
131          self._browser_args, parse_options.browser_extra_args)
132
133    self._chrome_page_sets_path = os.path.join(parse_options.chrome_src_path,
134                                               CHROMIUM_PAGE_SETS_PATH)
135    self._all_page_sets_specified = parse_options.page_sets == 'all'
136    self._page_sets = self._ParsePageSets(parse_options.page_sets)
137
138    self._record = parse_options.record
139    self._skia_tools = parse_options.skia_tools
140    self._non_interactive = parse_options.non_interactive
141    self._upload = parse_options.upload
142    self._skp_prefix = parse_options.skp_prefix
143    data_store_location = parse_options.data_store
144    if data_store_location.startswith(GS_PREFIX):
145      self.gs = GoogleStorageDataStore(data_store_location)
146    else:
147      self.gs = LocalFileSystemDataStore(data_store_location)
148    self._upload_to_partner_bucket = parse_options.upload_to_partner_bucket
149    self._alternate_upload_dir = parse_options.alternate_upload_dir
150    self._telemetry_binaries_dir = os.path.join(parse_options.chrome_src_path,
151                                                'tools', 'perf')
152    self._catapult_dir = os.path.join(parse_options.chrome_src_path,
153                                      'third_party', 'catapult')
154
155    self._local_skp_dir = os.path.join(
156        parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, SKPICTURES_DIR_NAME)
157    self._local_record_webpages_archive_dir = os.path.join(
158        parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, 'webpages_archive')
159
160    # List of SKP files generated by this script.
161    self._skp_files = []
162
163  def _ParsePageSets(self, page_sets):
164    if not page_sets:
165      raise ValueError('Must specify at least one page_set!')
166    elif self._all_page_sets_specified:
167      # Get everything from the page_sets directory.
168      page_sets_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
169                                   'page_sets')
170      ps = [os.path.join(page_sets_dir, page_set)
171            for page_set in os.listdir(page_sets_dir)
172            if not os.path.isdir(os.path.join(page_sets_dir, page_set)) and
173               page_set.endswith('.py')]
174      chromium_ps = [
175          os.path.join(self._chrome_page_sets_path, cr_page_set)
176          for cr_page_set in CHROMIUM_PAGE_SETS_TO_PREFIX]
177      ps.extend(chromium_ps)
178    elif '*' in page_sets:
179      # Explode and return the glob.
180      ps = glob.glob(page_sets)
181    else:
182      ps = page_sets.split(',')
183    ps.sort()
184    return ps
185
186  def _IsChromiumPageSet(self, page_set):
187    """Returns true if the specified page set is a Chromium page set."""
188    return page_set.startswith(self._chrome_page_sets_path)
189
190  def Run(self):
191    """Run the SkPicturePlayback BuildStep."""
192
193    # Delete any left over data files in the data directory.
194    for archive_file in glob.glob(
195        os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 'skia_*')):
196      os.remove(archive_file)
197
198    # Create the required local storage directories.
199    self._CreateLocalStorageDirs()
200
201    # Start the timer.
202    start_time = time.time()
203
204    # Loop through all page_sets.
205    for page_set in self._page_sets:
206
207      page_set_basename = os.path.basename(page_set).split('.')[0]
208      page_set_json_name = page_set_basename + '.json'
209      wpr_data_file = (
210          page_set.split(os.path.sep)[-1].split('.')[0] + '_000.wprgo')
211      page_set_dir = os.path.dirname(page_set)
212
213      if self._IsChromiumPageSet(page_set):
214        print 'Using Chromium\'s captured archives for Chromium\'s page sets.'
215      elif self._record:
216        # Create an archive of the specified webpages if '--record=True' is
217        # specified.
218        record_wpr_cmd = (
219          'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir),
220          'DISPLAY=%s' % X11_DISPLAY,
221          os.path.join(self._telemetry_binaries_dir, 'record_wpr'),
222          '--extra-browser-args="%s"' % self._browser_args,
223          '--browser=exact',
224          '--browser-executable=%s' % self._browser_executable,
225          '%s_page_set' % page_set_basename,
226          '--page-set-base-dir=%s' % page_set_dir
227        )
228        for _ in range(RETRY_RECORD_WPR_COUNT):
229          try:
230            subprocess.check_call(' '.join(record_wpr_cmd), shell=True)
231
232            # Copy over the created archive into the local webpages archive
233            # directory.
234            shutil.copy(
235              os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, wpr_data_file),
236              self._local_record_webpages_archive_dir)
237            shutil.copy(
238              os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
239                           page_set_json_name),
240              self._local_record_webpages_archive_dir)
241
242            # Break out of the retry loop since there were no errors.
243            break
244          except Exception:
245            # There was a failure continue with the loop.
246            traceback.print_exc()
247        else:
248          # If we get here then record_wpr did not succeed and thus did not
249          # break out of the loop.
250          raise Exception('record_wpr failed for page_set: %s' % page_set)
251
252      else:
253        # Get the webpages archive so that it can be replayed.
254        self._DownloadWebpagesArchive(wpr_data_file, page_set_json_name)
255
256      run_benchmark_cmd = [
257          'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir),
258          'DISPLAY=%s' % X11_DISPLAY,
259          'timeout', '1800',
260          os.path.join(self._telemetry_binaries_dir, 'run_benchmark'),
261          '--extra-browser-args="%s"' % self._browser_args,
262          '--browser=exact',
263          '--browser-executable=%s' % self._browser_executable,
264          SKP_BENCHMARK,
265          '--page-set-name=%s' % page_set_basename,
266          '--page-set-base-dir=%s' % page_set_dir,
267          '--skp-outdir=%s' % TMP_SKP_DIR,
268          '--also-run-disabled-tests',
269      ]
270
271      exclusions = PAGE_SETS_TO_EXCLUSIONS.get(os.path.basename(page_set))
272      if exclusions:
273        run_benchmark_cmd.append('--story-filter-exclude=' + exclusions)
274
275      for _ in range(RETRY_RUN_MEASUREMENT_COUNT):
276        try:
277          print '\n\n=======Capturing SKP of %s=======\n\n' % page_set
278          subprocess.check_call(' '.join(run_benchmark_cmd), shell=True)
279        except subprocess.CalledProcessError:
280          # There was a failure continue with the loop.
281          traceback.print_exc()
282          print '\n\n=======Retrying %s=======\n\n' % page_set
283          time.sleep(10)
284          continue
285
286        # Rename generated SKP files into more descriptive names.
287        self._RenameSkpFiles(page_set)
288        # Break out of the retry loop since there were no errors.
289        break
290      else:
291        # If we get here then run_benchmark did not succeed and thus did not
292        # break out of the loop.
293        raise Exception('run_benchmark failed for page_set: %s' % page_set)
294
295    print '\n\n=======Capturing SKP files took %s seconds=======\n\n' % (
296        time.time() - start_time)
297
298    if self._skia_tools:
299      render_pictures_cmd = [
300          os.path.join(self._skia_tools, 'render_pictures'),
301          '-r', self._local_skp_dir
302      ]
303      render_pdfs_cmd = [
304          os.path.join(self._skia_tools, 'render_pdfs'),
305          '-r', self._local_skp_dir
306      ]
307
308      for tools_cmd in (render_pictures_cmd, render_pdfs_cmd):
309        print '\n\n=======Running %s=======' % ' '.join(tools_cmd)
310        subprocess.check_call(tools_cmd)
311
312      if not self._non_interactive:
313        print '\n\n=======Running debugger======='
314        os.system('%s %s' % (os.path.join(self._skia_tools, 'debugger'),
315                             self._local_skp_dir))
316
317    print '\n\n'
318
319    if self._upload:
320      print '\n\n=======Uploading to %s=======\n\n' % self.gs.target_type()
321      # Copy the directory structure in the root directory into Google Storage.
322      dest_dir_name = ROOT_PLAYBACK_DIR_NAME
323      if self._alternate_upload_dir:
324        dest_dir_name = self._alternate_upload_dir
325
326      self.gs.upload_dir_contents(
327          self._local_skp_dir, dest_dir=dest_dir_name)
328
329      print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % (
330          posixpath.join(self.gs.target_name(), dest_dir_name,
331                         SKPICTURES_DIR_NAME))
332
333    else:
334      print '\n\n=======Not Uploading to %s=======\n\n' % self.gs.target_type()
335      print 'Generated resources are available in %s\n\n' % (
336          self._local_skp_dir)
337
338    if self._upload_to_partner_bucket:
339      print '\n\n=======Uploading to Partner bucket %s =======\n\n' % (
340          PARTNERS_GS_BUCKET)
341      partner_gs = GoogleStorageDataStore(PARTNERS_GS_BUCKET)
342      partner_gs.delete_path(SKPICTURES_DIR_NAME)
343      print 'Uploading %s to %s' % (self._local_skp_dir, SKPICTURES_DIR_NAME)
344      partner_gs.upload_dir_contents(self._local_skp_dir, SKPICTURES_DIR_NAME)
345      print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % (
346          posixpath.join(partner_gs.target_name(), SKPICTURES_DIR_NAME))
347
348    return 0
349
350  def _GetSkiaSkpFileName(self, page_set):
351    """Returns the SKP file name for Skia page sets."""
352    # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py
353    ps_filename = os.path.basename(page_set)
354    # skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop
355    ps_basename, _ = os.path.splitext(ps_filename)
356    # skia_yahooanswers_desktop -> skia, yahooanswers, desktop
357    _, page_name, device = ps_basename.split('_')
358    basename = '%s_%s' % (DEVICE_TO_PLATFORM_PREFIX[device], page_name)
359    return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp'
360
361  def _GetChromiumSkpFileName(self, page_set, site):
362    """Returns the SKP file name for Chromium page sets."""
363    # /path/to/http___mobile_news_sandbox_pt0 -> http___mobile_news_sandbox_pt0
364    _, webpage = os.path.split(site)
365    # http___mobile_news_sandbox_pt0 -> mobile_news_sandbox_pt0
366    for prefix in ('http___', 'https___', 'www_'):
367      if webpage.startswith(prefix):
368        webpage = webpage[len(prefix):]
369    # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py
370    ps_filename = os.path.basename(page_set)
371    # http___mobile_news_sandbox -> pagesetprefix_http___mobile_news_sandbox
372    basename = '%s_%s' % (CHROMIUM_PAGE_SETS_TO_PREFIX[ps_filename], webpage)
373    return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp'
374
375  def _RenameSkpFiles(self, page_set):
376    """Rename generated SKP files into more descriptive names.
377
378    Look into the subdirectory of TMP_SKP_DIR and find the most interesting
379    .skp in there to be this page_set's representative .skp.
380    """
381    subdirs = glob.glob(os.path.join(TMP_SKP_DIR, '*'))
382    for site in subdirs:
383      if self._IsChromiumPageSet(page_set):
384        filename = self._GetChromiumSkpFileName(page_set, site)
385      else:
386        filename = self._GetSkiaSkpFileName(page_set)
387      filename = filename.lower()
388
389      if self._skp_prefix:
390        filename = '%s%s' % (self._skp_prefix, filename)
391
392      # We choose the largest .skp as the most likely to be interesting.
393      largest_skp = max(glob.glob(os.path.join(site, '*.skp')),
394                        key=lambda path: os.stat(path).st_size)
395      dest = os.path.join(self._local_skp_dir, filename)
396      print 'Moving', largest_skp, 'to', dest
397      shutil.move(largest_skp, dest)
398      self._skp_files.append(filename)
399      shutil.rmtree(site)
400
401  def _CreateLocalStorageDirs(self):
402    """Creates required local storage directories for this script."""
403    for d in (self._local_record_webpages_archive_dir,
404              self._local_skp_dir):
405      if os.path.exists(d):
406        shutil.rmtree(d)
407      os.makedirs(d)
408
409  def _DownloadWebpagesArchive(self, wpr_data_file, page_set_json_name):
410    """Downloads the webpages archive and its required page set from GS."""
411    wpr_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 'webpages_archive',
412                                wpr_data_file)
413    page_set_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME,
414                                     'webpages_archive',
415                                     page_set_json_name)
416    gs = self.gs
417    if (gs.does_storage_object_exist(wpr_source) and
418        gs.does_storage_object_exist(page_set_source)):
419      gs.download_file(wpr_source,
420                       os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
421                                    wpr_data_file))
422      gs.download_file(page_set_source,
423                       os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR,
424                                    page_set_json_name))
425    else:
426      raise Exception('%s and %s do not exist in %s!' % (gs.target_type(),
427        wpr_source, page_set_source))
428
429class DataStore:
430  """An abstract base class for uploading recordings to a data storage.
431  The interface emulates the google storage api."""
432  def target_name(self):
433    raise NotImplementedError()
434  def target_type(self):
435    raise NotImplementedError()
436  def does_storage_object_exist(self, name):
437    raise NotImplementedError()
438  def download_file(self, name, local_path):
439    raise NotImplementedError()
440  def upload_dir_contents(self, source_dir, dest_dir):
441    raise NotImplementedError()
442
443
444class GoogleStorageDataStore(DataStore):
445  def __init__(self, data_store_url):
446    self._url = data_store_url.rstrip('/')
447
448  def target_name(self):
449    return self._url
450
451  def target_type(self):
452    return 'Google Storage'
453
454  def does_storage_object_exist(self, name):
455    try:
456      output = subprocess.check_output([
457          'gsutil', 'ls', '/'.join((self._url, name))])
458    except subprocess.CalledProcessError:
459      return False
460    if len(output.splitlines()) != 1:
461      return False
462    return True
463
464  def delete_path(self, path):
465    subprocess.check_call(['gsutil', 'rm', '-r', '/'.join((self._url, path))])
466
467  def download_file(self, name, local_path):
468    subprocess.check_call([
469        'gsutil', 'cp', '/'.join((self._url, name)), local_path])
470
471  def upload_dir_contents(self, source_dir, dest_dir):
472    subprocess.check_call([
473        'gsutil', 'cp', '-r', source_dir, '/'.join((self._url, dest_dir))])
474
475
476class LocalFileSystemDataStore(DataStore):
477  def __init__(self, data_store_location):
478    self._base_dir = data_store_location
479  def target_name(self):
480    return self._base_dir
481  def target_type(self):
482    return self._base_dir
483  def does_storage_object_exist(self, name):
484    return os.path.isfile(os.path.join(self._base_dir, name))
485  def delete_path(self, path):
486    shutil.rmtree(path)
487  def download_file(self, name, local_path):
488    shutil.copyfile(os.path.join(self._base_dir, name), local_path)
489  def upload_dir_contents(self, source_dir, dest_dir):
490    def copytree(source_dir, dest_dir):
491      if not os.path.exists(dest_dir):
492        os.makedirs(dest_dir)
493      for item in os.listdir(source_dir):
494        source = os.path.join(source_dir, item)
495        dest = os.path.join(dest_dir, item)
496        if os.path.isdir(source):
497          copytree(source, dest)
498        else:
499          shutil.copy2(source, dest)
500    copytree(source_dir, os.path.join(self._base_dir, dest_dir))
501
502if '__main__' == __name__:
503  option_parser = optparse.OptionParser()
504  option_parser.add_option(
505      '', '--page_sets',
506      help='Specifies the page sets to use to archive. Supports globs.',
507      default='all')
508  option_parser.add_option(
509      '', '--record', action='store_true',
510      help='Specifies whether a new website archive should be created.',
511      default=False)
512  option_parser.add_option(
513      '', '--skia_tools',
514      help=('Path to compiled Skia executable tools. '
515            'render_pictures/render_pdfs is run on the set '
516            'after all SKPs are captured. If the script is run without '
517            '--non-interactive then the debugger is also run at the end. Debug '
518            'builds are recommended because they seem to catch more failures '
519            'than Release builds.'),
520      default=None)
521  option_parser.add_option(
522      '', '--upload', action='store_true',
523      help=('Uploads to Google Storage or copies to local filesystem storage '
524            ' if this is True.'),
525      default=False)
526  option_parser.add_option(
527      '', '--upload_to_partner_bucket', action='store_true',
528      help=('Uploads SKPs to the chrome-partner-telemetry Google Storage '
529            'bucket if true.'),
530      default=False)
531  option_parser.add_option(
532      '', '--data_store',
533    help=('The location of the file storage to use to download and upload '
534          'files. Can be \'gs://<bucket>\' for Google Storage, or '
535          'a directory for local filesystem storage'),
536      default='gs://skia-skps')
537  option_parser.add_option(
538      '', '--alternate_upload_dir',
539      help= ('Uploads to a different directory in Google Storage or local '
540             'storage if this flag is specified'),
541      default=None)
542  option_parser.add_option(
543      '', '--output_dir',
544      help=('Temporary directory where SKPs and webpage archives will be '
545            'outputted to.'),
546      default=tempfile.gettempdir())
547  option_parser.add_option(
548      '', '--browser_executable',
549      help='The exact browser executable to run.',
550      default=None)
551  option_parser.add_option(
552      '', '--browser_extra_args',
553      help='Additional arguments to pass to the browser.',
554      default=None)
555  option_parser.add_option(
556      '', '--chrome_src_path',
557      help='Path to the chromium src directory.',
558      default=None)
559  option_parser.add_option(
560      '', '--non-interactive', action='store_true',
561      help='Runs the script without any prompts. If this flag is specified and '
562           '--skia_tools is specified then the debugger is not run.',
563      default=False)
564  option_parser.add_option(
565      '', '--skp_prefix',
566      help='Prefix to add to the names of generated SKPs.',
567      default=None)
568  options, unused_args = option_parser.parse_args()
569
570  playback = SkPicturePlayback(options)
571  sys.exit(playback.Run())
572