1# Copyright 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import csv 6import inspect 7import os 8 9from telemetry.page import page as page_module 10from telemetry.page import page_set_archive_info 11from telemetry.util import cloud_storage 12 13PUBLIC_BUCKET = cloud_storage.PUBLIC_BUCKET 14PARTNER_BUCKET = cloud_storage.PARTNER_BUCKET 15INTERNAL_BUCKET = cloud_storage.INTERNAL_BUCKET 16 17 18class PageSetError(Exception): 19 pass 20 21 22class PageSet(object): 23 def __init__(self, file_path=None, archive_data_file='', 24 credentials_path=None, user_agent_type=None, 25 make_javascript_deterministic=True, startup_url='', 26 serving_dirs=None, bucket=None): 27 # The default value of file_path is location of the file that define this 28 # page set instance's class. 29 if file_path is None: 30 file_path = inspect.getfile(self.__class__) 31 # Turn pyc file into py files if we can 32 if file_path.endswith('.pyc') and os.path.exists(file_path[:-1]): 33 file_path = file_path[:-1] 34 35 self.file_path = file_path 36 # These attributes can be set dynamically by the page set. 37 self.archive_data_file = archive_data_file 38 self.credentials_path = credentials_path 39 self.user_agent_type = user_agent_type 40 self.make_javascript_deterministic = make_javascript_deterministic 41 self._wpr_archive_info = None 42 self.startup_url = startup_url 43 self.pages = [] 44 self.serving_dirs = set() 45 serving_dirs = [] if serving_dirs is None else serving_dirs 46 # Makes sure that page_set's serving_dirs are absolute paths 47 for sd in serving_dirs: 48 if os.path.isabs(sd): 49 self.serving_dirs.add(os.path.realpath(sd)) 50 else: 51 self.serving_dirs.add(os.path.realpath(os.path.join(self.base_dir, sd))) 52 if self._IsValidPrivacyBucket(bucket): 53 self._bucket = bucket 54 else: 55 raise ValueError("Pageset privacy bucket %s is invalid" % bucket) 56 57 @classmethod 58 def Name(cls): 59 return cls.__module__.split('.')[-1] 60 61 @classmethod 62 def Description(cls): 63 if cls.__doc__: 64 return cls.__doc__.splitlines()[0] 65 else: 66 return '' 67 68 def AddPage(self, page): 69 assert page.page_set is self 70 self.pages.append(page) 71 72 def AddPageWithDefaultRunNavigate(self, page_url): 73 """ Add a simple page with url equals to page_url that contains only default 74 RunNavigateSteps. 75 """ 76 self.AddPage(page_module.Page( 77 page_url, self, self.base_dir)) 78 79 @staticmethod 80 def _IsValidPrivacyBucket(bucket_name): 81 if not bucket_name: 82 return True 83 if (bucket_name in [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]): 84 return True 85 return False 86 87 @property 88 def base_dir(self): 89 if os.path.isfile(self.file_path): 90 return os.path.dirname(self.file_path) 91 else: 92 return self.file_path 93 94 @property 95 def wpr_archive_info(self): # pylint: disable=E0202 96 """Lazily constructs wpr_archive_info if it's not set and returns it.""" 97 if self.archive_data_file and not self._wpr_archive_info: 98 self._wpr_archive_info = ( 99 page_set_archive_info.PageSetArchiveInfo.FromFile( 100 os.path.join(self.base_dir, self.archive_data_file))) 101 return self._wpr_archive_info 102 103 @property 104 def bucket(self): 105 return self._bucket 106 107 @wpr_archive_info.setter 108 def wpr_archive_info(self, value): # pylint: disable=E0202 109 self._wpr_archive_info = value 110 111 def ContainsOnlyFileURLs(self): 112 for page in self.pages: 113 if not page.is_file: 114 return False 115 return True 116 117 def ReorderPageSet(self, results_file): 118 """Reorders this page set based on the results of a past run.""" 119 page_set_dict = {} 120 for page in self.pages: 121 page_set_dict[page.url] = page 122 123 pages = [] 124 with open(results_file, 'rb') as csv_file: 125 csv_reader = csv.reader(csv_file) 126 csv_header = csv_reader.next() 127 128 if 'url' not in csv_header: 129 raise Exception('Unusable results_file.') 130 131 url_index = csv_header.index('url') 132 133 for csv_row in csv_reader: 134 if csv_row[url_index] in page_set_dict: 135 self.AddPage(page_set_dict[csv_row[url_index]]) 136 else: 137 raise Exception('Unusable results_file.') 138 139 return pages 140 141 def WprFilePathForPage(self, page): 142 if not self.wpr_archive_info: 143 return None 144 return self.wpr_archive_info.WprFilePathForPage(page) 145 146 def __iter__(self): 147 return self.pages.__iter__() 148 149 def __len__(self): 150 return len(self.pages) 151 152 def __getitem__(self, key): 153 return self.pages[key] 154 155 def __setitem__(self, key, value): 156 self.pages[key] = value 157