1# Copyright 2012 Google Inc. All Rights Reserved. 2# Author: mrdmnd@ (Matt Redmond) 3"""A client to pull data from Bartlett. 4 5Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py 6 7The server houses perf.data.gz, board, chrome version for each upload. 8This script first authenticates with a proper @google.com account, then 9downloads a sample (if it's not already cached) and unzips perf.data 10 11 Authenticate(): Gets login info and returns an auth token 12 DownloadSamples(): Download and unzip samples. 13 _GetServePage(): Pulls /serve page from the app engine server 14 _DownloadSampleFromServer(): Downloads a local compressed copy of a sample 15 _UncompressSample(): Decompresses a sample, deleting the compressed version. 16""" 17import cookielib 18import getpass 19import gzip 20import optparse 21import os 22import urllib 23import urllib2 24 25SERVER_NAME = 'http://chromeoswideprofiling.appspot.com' 26APP_NAME = 'chromeoswideprofiling' 27DELIMITER = '~' 28 29 30def Authenticate(server_name): 31 """Gets credentials from user and attempts to retrieve auth token. 32 TODO: Accept OAuth2 instead of password. 33 Args: 34 server_name: (string) URL that the app engine code is living on. 35 Returns: 36 authtoken: (string) The authorization token that can be used 37 to grab other pages. 38 """ 39 40 if server_name.endswith('/'): 41 server_name = server_name.rstrip('/') 42 # Grab username and password from user through stdin. 43 username = raw_input('Email (must be @google.com account): ') 44 password = getpass.getpass('Password: ') 45 # Use a cookie to authenticate with GAE. 46 cookiejar = cookielib.LWPCookieJar() 47 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) 48 urllib2.install_opener(opener) 49 # Get an AuthToken from Google accounts service. 50 auth_uri = 'https://www.google.com/accounts/ClientLogin' 51 authreq_data = urllib.urlencode({'Email': username, 52 'Passwd': password, 53 'service': 'ah', 54 'source': APP_NAME, 55 'accountType': 'HOSTED_OR_GOOGLE'}) 56 auth_req = urllib2.Request(auth_uri, data=authreq_data) 57 try: 58 auth_resp = urllib2.urlopen(auth_req) 59 except urllib2.URLError: 60 print 'Error logging in to Google accounts service.' 61 return None 62 body = auth_resp.read() 63 # Auth response contains several fields. 64 # We care about the part after Auth= 65 auth_resp_dict = dict(x.split('=') for x in body.split('\n') if x) 66 authtoken = auth_resp_dict['Auth'] 67 return authtoken 68 69 70def DownloadSamples(server_name, authtoken, output_dir, start, stop): 71 """Download every sample and write unzipped version 72 to output directory. 73 Args: 74 server_name: (string) URL that the app engine code is living on. 75 authtoken: (string) Authorization token. 76 output_dir (string) Filepath to write output to. 77 start: (int) Index to start downloading from, starting at top. 78 stop: (int) Index to stop downloading, non-inclusive. -1 for end. 79 Returns: 80 None 81 """ 82 83 if server_name.endswith('/'): 84 server_name = server_name.rstrip('/') 85 86 serve_page_string = _GetServePage(server_name, authtoken) 87 if serve_page_string is None: 88 print 'Error getting /serve page.' 89 return 90 91 sample_list = serve_page_string.split('</br>') 92 print 'Will download:' 93 sample_list_subset = sample_list[start:stop] 94 for sample in sample_list_subset: 95 print sample 96 for sample in sample_list_subset: 97 assert sample, 'Sample should be valid.' 98 sample_info = [s.strip() for s in sample.split(DELIMITER)] 99 key = sample_info[0] 100 time = sample_info[1] 101 time = time.replace(' ', '_') # No space between date and time. 102 # sample_md5 = sample_info[2] 103 board = sample_info[3] 104 version = sample_info[4] 105 106 # Put a compressed copy of the samples in output directory. 107 _DownloadSampleFromServer(server_name, authtoken, key, time, board, version, 108 output_dir) 109 _UncompressSample(key, time, board, version, output_dir) 110 111 112def _BuildFilenameFromParams(key, time, board, version): 113 """Return the filename for our sample. 114 Args: 115 key: (string) Key indexing our sample in the datastore. 116 time: (string) Date that the sample was uploaded. 117 board: (string) Board that the sample was taken on. 118 version: (string) Version string from /etc/lsb-release 119 Returns: 120 filename (string) 121 """ 122 filename = DELIMITER.join([key, time, board, version]) 123 return filename 124 125 126def _DownloadSampleFromServer(server_name, authtoken, key, time, board, version, 127 output_dir): 128 """Downloads sample_$(samplekey).gz to current dir. 129 Args: 130 server_name: (string) URL that the app engine code is living on. 131 authtoken: (string) Authorization token. 132 key: (string) Key indexing our sample in the datastore 133 time: (string) Date that the sample was uploaded. 134 board: (string) Board that the sample was taken on. 135 version: (string) Version string from /etc/lsb-release 136 output_dir: (string) Filepath to write to output to. 137 Returns: 138 None 139 """ 140 filename = _BuildFilenameFromParams(key, time, board, version) 141 compressed_filename = filename + '.gz' 142 143 if os.path.exists(os.path.join(output_dir, filename)): 144 print 'Already downloaded %s, skipping.' % filename 145 return 146 147 serv_uri = server_name + '/serve/' + key 148 serv_args = {'continue': serv_uri, 'auth': authtoken} 149 full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args) 150 serv_req = urllib2.Request(full_serv_uri) 151 serv_resp = urllib2.urlopen(serv_req) 152 f = open(os.path.join(output_dir, compressed_filename), 'w+') 153 f.write(serv_resp.read()) 154 f.close() 155 156 157def _UncompressSample(key, time, board, version, output_dir): 158 """Uncompresses a given sample.gz file and deletes the compressed version. 159 Args: 160 key: (string) Sample key to uncompress. 161 time: (string) Date that the sample was uploaded. 162 board: (string) Board that the sample was taken on. 163 version: (string) Version string from /etc/lsb-release 164 output_dir: (string) Filepath to find sample key in. 165 Returns: 166 None 167 """ 168 filename = _BuildFilenameFromParams(key, time, board, version) 169 compressed_filename = filename + '.gz' 170 171 if os.path.exists(os.path.join(output_dir, filename)): 172 print 'Already decompressed %s, skipping.' % filename 173 return 174 175 out_file = open(os.path.join(output_dir, filename), 'wb') 176 in_file = gzip.open(os.path.join(output_dir, compressed_filename), 'rb') 177 out_file.write(in_file.read()) 178 in_file.close() 179 out_file.close() 180 os.remove(os.path.join(output_dir, compressed_filename)) 181 182 183def _DeleteSampleFromServer(server_name, authtoken, key): 184 """Opens the /delete page with the specified key 185 to delete the sample off the datastore. 186 Args: 187 server_name: (string) URL that the app engine code is living on. 188 authtoken: (string) Authorization token. 189 key: (string) Key to delete. 190 Returns: 191 None 192 """ 193 194 serv_uri = server_name + '/del/' + key 195 serv_args = {'continue': serv_uri, 'auth': authtoken} 196 full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args) 197 serv_req = urllib2.Request(full_serv_uri) 198 urllib2.urlopen(serv_req) 199 200 201def _GetServePage(server_name, authtoken): 202 """Opens the /serve page and lists all keys. 203 Args: 204 server_name: (string) URL the app engine code is living on. 205 authtoken: (string) Authorization token. 206 Returns: 207 The text of the /serve page (including HTML tags) 208 """ 209 210 serv_uri = server_name + '/serve' 211 serv_args = {'continue': serv_uri, 'auth': authtoken} 212 full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args) 213 serv_req = urllib2.Request(full_serv_uri) 214 serv_resp = urllib2.urlopen(serv_req) 215 return serv_resp.read() 216 217 218def main(): 219 parser = optparse.OptionParser() 220 parser.add_option('--output_dir', 221 dest='output_dir', 222 action='store', 223 help='Path to output perf data files.') 224 parser.add_option('--start', 225 dest='start_ind', 226 action='store', 227 default=0, 228 help='Start index.') 229 parser.add_option('--stop', 230 dest='stop_ind', 231 action='store', 232 default=-1, 233 help='Stop index.') 234 options = parser.parse_args()[0] 235 if not options.output_dir: 236 print 'Must specify --output_dir.' 237 return 1 238 if not os.path.exists(options.output_dir): 239 print 'Specified output_dir does not exist.' 240 return 1 241 242 authtoken = Authenticate(SERVER_NAME) 243 if not authtoken: 244 print 'Could not obtain authtoken, exiting.' 245 return 1 246 DownloadSamples(SERVER_NAME, authtoken, options.output_dir, options.start_ind, 247 options.stop_ind) 248 print 'Downloaded samples.' 249 return 0 250 251 252if __name__ == '__main__': 253 exit(main()) 254