1#!/usr/bin/env python 2 3# Copyright 2016 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7""" 8This module is used to upload csv files generated by performance related tests 9to cns. More details about the implementation can be found in crbug.com/598504. 10 11The overall work flow is as follows. 121. Query tko_test_attributes table for perf_csv_folder attribute. The attribute 13contains a path to csv files need to be uploaded to cns. 142. Filter the perf_csv_folder attributes only for test jobs have finished an 15hour before. This is to make sure the results have already being uploaded to GS. 163. Locate the csv files in GS, and upload them to desired cns location. 17 18After every run, the script saves the maximum test idx to a local file, and 19repeats the workflow. 20 21""" 22 23import argparse 24import datetime 25import logging 26import os 27import shutil 28import tempfile 29import time 30 31import common 32from autotest_lib.client.bin import utils 33from autotest_lib.client.common_lib import logging_config 34from autotest_lib.client.common_lib.cros import retry 35from autotest_lib.frontend import setup_django_environment 36from autotest_lib.frontend.tko import models as tko_models 37 38 39# Number of hours that a test has to be finished for the script to process. 40# This allows gs_offloader to have enough time to upload the results to GS. 41CUTOFF_TIME_HOURS = 1 42 43# Default wait time in seconds after each run. 44DEFAULT_INTERVAL_SEC = 60 45 46# Timeout in minutes for upload attempts for a given folder. 47UPLOAD_TIMEOUT_MINS = 5 48 49class CsvNonexistenceException(Exception): 50 """Exception raised when csv files not found in GS.""" 51 52 53class CsvFolder(object): 54 """A class contains the information of a folder storing csv files to be 55 uploaded, and logic to upload the csv files. 56 """ 57 58 # A class variable whose value is the GoogleStorage path to the test 59 # results. 60 gs_path = None 61 62 # A class variable whose value is the cns path to upload the csv files to. 63 cns_path = None 64 65 def __init__(self, test_attribute_id, perf_csv_folder, test_view): 66 """Initialize a CsvFolder object. 67 68 @param test_attribute_id: ID of test attribute record. 69 @param perf_csv_folder: Path of the folder contains csv files in test 70 results. It's the value of perf_csv_folder attribute from 71 tko_test_attributes table. 72 @param test_view: A db object from querying tko_test_view_2 for the 73 related tko_test_attributes. 74 """ 75 self.test_attribute_id = test_attribute_id 76 self.perf_csv_folder = perf_csv_folder 77 self.test_view = test_view 78 79 80 def __str__(self): 81 return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag, 82 self.perf_csv_folder) 83 84 85 def _get_url(self): 86 """Get the url to the folder storing csv files in GS. 87 88 The url can be formulated based on csv folder, test_name and hostname. 89 For example: 90 gs://chromeos-autotest-results/123-chromeos-test/host1/ 91 gsutil is used to download the csv files with this gs url. 92 """ 93 return os.path.join(self.gs_path, self.test_view.job_tag) 94 95 96 def _download(self, dest_dir): 97 """Download the folder containing csv files to the given dest_dir. 98 99 @param dest_dir: A directory to store the downloaded csv files. 100 101 @return: A list of strings, each is a path to a csv file in the 102 downloaded folder. 103 @raise CsvNonexistenceException: If no csv file found in the GS. 104 """ 105 gs_url = self._get_url() 106 # Find all csv files in given GS url recursively 107 files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' % 108 gs_url, ignore_status=True).stdout.strip().split('\n') 109 if not files or files == ['']: 110 raise CsvNonexistenceException('No csv file found in %s', gs_url) 111 112 # Copy files from GS to temp_dir 113 for f in files: 114 utils.run('gsutil cp %s %s' % (f, dest_dir)) 115 116 117 @retry.retry(Exception, blacklist=[CsvNonexistenceException], 118 timeout_min=UPLOAD_TIMEOUT_MINS) 119 def upload(self): 120 """Upload the folder to cns. 121 """ 122 temp_dir = tempfile.mkdtemp(suffix='perf_csv') 123 try: 124 self._download(temp_dir) 125 files = os.listdir(temp_dir) 126 # File in cns is stored under folder with format of: 127 # <test_name>/<host_name>/YYYY/mm/dd/hh/mm 128 path_in_cns = os.path.join( 129 self.cns_path, 130 self.test_view.test_name, self.test_view.hostname, 131 str(self.test_view.job_finished_time.year), 132 str(self.test_view.job_finished_time.month).zfill(2), 133 str(self.test_view.job_finished_time.day).zfill(2), 134 str(self.test_view.job_finished_time.hour).zfill(2), 135 str(self.test_view.job_finished_time.minute).zfill(2)) 136 utils.run('fileutil mkdir -p %s' % path_in_cns) 137 for f in files: 138 utils.run('fileutil copytodir -f %s %s' % 139 (os.path.join(temp_dir, f), path_in_cns)) 140 finally: 141 shutil.rmtree(temp_dir) 142 143 144class DBScanner(object): 145 """Class contains the logic to query tko_test_attributes table for 146 new perf_csv_folder attributes and create CsvFolder object for each 147 new perf_csv_folder attribute. 148 """ 149 150 # Minimum test_attribute id for querying tko_test_attributes table. 151 min_test_attribute_id = -1 152 153 @classmethod 154 def get_perf_csv_folders(cls): 155 """Query tko_test_attributes table for new entries of perf_csv_folder. 156 157 @return: A list of CsvFolder objects for each new entry of 158 perf_csv_folder attribute in tko_test_attributes table. 159 """ 160 attributes = tko_models.TestAttribute.objects.filter( 161 attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id) 162 folders = [] 163 164 cutoff_time = (datetime.datetime.now() - 165 datetime.timedelta(hours=CUTOFF_TIME_HOURS)) 166 for attribute in attributes: 167 test_views = tko_models.TestView.objects.filter( 168 test_idx=attribute.test_id) 169 if test_views[0].job_finished_time > cutoff_time: 170 continue 171 folders.append(CsvFolder(attribute.id, attribute.value, 172 test_views[0])) 173 return folders 174 175 176def setup_logging(log_dir): 177 """Setup logging information. 178 179 @param log_dir: Path to the directory storing logs of this script. 180 """ 181 config = logging_config.LoggingConfig() 182 logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log') 183 config.add_file_handler(file_path=logfile, level=logging.DEBUG) 184 185 186def save_min_test_attribute_id(test_attribute_id_file): 187 """Save the minimum test attribute id to a cached file. 188 189 @param test_attribute_id_file: Path to the file storing the value of 190 min_test_attribute_id. 191 """ 192 with open(test_attribute_id_file, 'w') as f: 193 return f.write(str(DBScanner.min_test_attribute_id)) 194 195 196def get_min_test_attribute_id(test_attribute_id_file): 197 """Get the minimum test attribute id from a cached file. 198 199 @param test_attribute_id_file: Path to the file storing the value of 200 min_test_attribute_id. 201 """ 202 try: 203 with open(test_attribute_id_file, 'r') as f: 204 return int(f.read()) 205 except IOError: 206 # min_test_attribute_id has not been set, default to -1. 207 return -1 208 209 210def get_options(): 211 """Get the command line options. 212 213 @return: Command line options of the script. 214 """ 215 parser = argparse.ArgumentParser() 216 parser.add_argument('--gs_path', type=str, dest='gs_path', 217 help='GoogleStorage path that stores test results.') 218 parser.add_argument('--cns_path', type=str, dest='cns_path', 219 help='cns path to where csv files are uploaded to.') 220 parser.add_argument('--log_dir', type=str, dest='log_dir', 221 help='Directory used to store logs.') 222 223 options = parser.parse_args() 224 CsvFolder.gs_path = options.gs_path 225 CsvFolder.cns_path = options.cns_path 226 227 return options 228 229 230def main(): 231 """Main process to repeat the workflow of searching/uploading csv files. 232 """ 233 options = get_options() 234 setup_logging(options.log_dir) 235 test_attribute_id_file = os.path.join(options.log_dir, 236 'perf_csv_uploader_test_attr_id') 237 DBScanner.min_test_attribute_id = get_min_test_attribute_id( 238 test_attribute_id_file) 239 240 while True: 241 folders = DBScanner.get_perf_csv_folders() 242 if not folders: 243 logging.info('No new folders found. Wait...') 244 time.sleep(DEFAULT_INTERVAL_SEC) 245 continue 246 247 failed_folders = [] 248 for folder in folders: 249 try: 250 logging.info('Uploading folder: %s', folder) 251 folder.upload() 252 except CsvNonexistenceException: 253 # Ignore the failure if CSV files are not found in GS. 254 pass 255 except Exception as e: 256 failed_folders.append(folder) 257 logging.error('Failed to upload folder %s, error: %s', 258 folder, e) 259 if failed_folders: 260 # Set the min_test_attribute_id to be the smallest one that failed 261 # to upload. 262 min_test_attribute_id = min([folder.test_attribute_id for folder in 263 failed_folders]) 264 else: 265 min_test_attribute_id = max([folder.test_attribute_id for folder in 266 folders]) + 1 267 if DBScanner.min_test_attribute_id != min_test_attribute_id: 268 DBScanner.min_test_attribute_id = min_test_attribute_id 269 save_min_test_attribute_id(test_attribute_id_file) 270 271 272if __name__ == '__main__': 273 main() 274