1#!/usr/bin/env python 2# Copyright 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Report whether DUTs are working or broken. 7 8usage: dut_status [ <options> ] [hostname ...] 9 10Reports on the history and status of selected DUT hosts, to 11determine whether they're "working" or "broken". For purposes of 12the script, "broken" means "the DUT requires manual intervention 13before it can be used for further testing", and "working" means "not 14broken". The status determination is based on the history of 15completed jobs for the DUT in a given time interval; still-running 16jobs are not considered. 17 18Time Interval Selection 19~~~~~~~~~~~~~~~~~~~~~~~ 20A DUT's reported status is based on the DUT's job history in a time 21interval determined by command line options. The interval is 22specified with up to two of three options: 23 --until/-u DATE/TIME - Specifies an end time for the search 24 range. (default: now) 25 --since/-s DATE/TIME - Specifies a start time for the search 26 range. (no default) 27 --duration/-d HOURS - Specifies the length of the search interval 28 in hours. (default: 24 hours) 29 30Any two time options completely specify the time interval. If only 31one option is provided, these defaults are used: 32 --until - Use the given end time with the default duration. 33 --since - Use the given start time with the default end time. 34 --duration - Use the given duration with the default end time. 35 36If no time options are given, use the default end time and duration. 37 38DATE/TIME values are of the form '2014-11-06 17:21:34'. 39 40DUT Selection 41~~~~~~~~~~~~~ 42By default, information is reported for DUTs named as command-line 43arguments. Options are also available for selecting groups of 44hosts: 45 --board/-b BOARD - Only include hosts with the given board. 46 --pool/-p POOL - Only include hosts in the given pool. The user 47 might be interested in the following pools: bvt, cq, 48 continuous, cts, or suites. 49 50 51The selected hosts may also be filtered based on status: 52 -w/--working - Only include hosts in a working state. 53 -n/--broken - Only include hosts in a non-working state. Hosts 54 with no job history are considered non-working. 55 56Output Formats 57~~~~~~~~~~~~~~ 58There are four available output formats: 59 * A simple list of host names. 60 * A status summary showing one line per host. 61 * A detailed job history for all selected DUTs, sorted by 62 time of execution. 63 * A job history for all selected DUTs showing only the history 64 surrounding the DUT's last change from working to broken, 65 or vice versa. 66 67The default format depends on whether hosts are filtered by 68status: 69 * With the --working or --broken options, the list of host names 70 is the default format. 71 * Without those options, the default format is the one-line status 72 summary. 73 74These options override the default formats: 75 -o/--oneline - Use the one-line summary with the --working or 76 --broken options. 77 -f/--full_history - Print detailed per-host job history. 78 -g/--diagnosis - Print the job history surrounding a status 79 change. 80 81Examples 82~~~~~~~~ 83 $ dut_status chromeos2-row4-rack2-host12 84 hostname S last checked URL 85 chromeos2-row4-rack2-host12 NO 2014-11-06 15:25:29 http://... 86 87'NO' means the DUT is broken. That diagnosis is based on a job that 88failed: 'last checked' is the time of the failed job, and the URL 89points to the job's logs. 90 91 $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12 92 chromeos2-row4-rack2-host12 93 2014-11-06 15:25:29 NO http://... 94 2014-11-06 14:44:07 -- http://... 95 2014-11-06 14:42:56 OK http://... 96 97The times are the start times of the jobs; the URL points to the 98job's logs. The status indicates the working or broken status after 99the job: 100 'NO' Indicates that the DUT was believed broken after the job. 101 'OK' Indicates that the DUT was believed working after the job. 102 '--' Indicates that the job probably didn't change the DUT's 103 status. 104Typically, logs of the actual failure will be found at the last job 105to report 'OK', or the first job to report '--'. 106 107""" 108 109import argparse 110import sys 111import time 112 113import common 114from autotest_lib.client.common_lib import time_utils 115from autotest_lib.server import frontend 116from autotest_lib.server.lib import status_history 117from autotest_lib.site_utils import lab_inventory 118 119# The fully qualified name makes for lines that are too long, so 120# shorten it locally. 121HostJobHistory = status_history.HostJobHistory 122 123# _DIAGNOSIS_IDS - 124# Dictionary to map the known diagnosis codes to string values. 125 126_DIAGNOSIS_IDS = { 127 status_history.UNUSED: '??', 128 status_history.UNKNOWN: '--', 129 status_history.WORKING: 'OK', 130 status_history.BROKEN: 'NO' 131} 132 133 134# Default time interval for the --duration option when a value isn't 135# specified on the command line. 136_DEFAULT_DURATION = 24 137 138 139def _include_status(status, arguments): 140 """Determine whether the given status should be filtered. 141 142 Checks the given `status` against the command line options in 143 `arguments`. Return whether a host with that status should be 144 printed based on the options. 145 146 @param status Status of a host to be printed or skipped. 147 @param arguments Parsed arguments object as returned by 148 ArgumentParser.parse_args(). 149 150 @return Returns `True` if the command-line options call for 151 printing hosts with the status, or `False` otherwise. 152 153 """ 154 if status == status_history.WORKING: 155 return arguments.working 156 else: 157 return arguments.broken 158 159 160def _print_host_summaries(history_list, arguments): 161 """Print one-line summaries of host history. 162 163 This function handles the output format of the --oneline option. 164 165 @param history_list A list of HostHistory objects to be printed. 166 @param arguments Parsed arguments object as returned by 167 ArgumentParser.parse_args(). 168 169 """ 170 fmt = '%-30s %-2s %-19s %s' 171 print fmt % ('hostname', 'S', 'last checked', 'URL') 172 for history in history_list: 173 status, event = history.last_diagnosis() 174 if not _include_status(status, arguments): 175 continue 176 datestr = '---' 177 url = '---' 178 if event is not None: 179 datestr = time_utils.epoch_time_to_date_string( 180 event.start_time) 181 url = event.job_url 182 183 print fmt % (history.hostname, 184 _DIAGNOSIS_IDS[status], 185 datestr, 186 url) 187 188 189def _print_event_summary(event): 190 """Print a one-line summary of a job or special task.""" 191 start_time = time_utils.epoch_time_to_date_string( 192 event.start_time) 193 print ' %s %s %s' % ( 194 start_time, 195 _DIAGNOSIS_IDS[event.diagnosis], 196 event.job_url) 197 198 199def _print_hosts(history_list, arguments): 200 """Print hosts, optionally with a job history. 201 202 This function handles both the default format for --working 203 and --broken options, as well as the output for the 204 --full_history and --diagnosis options. The `arguments` 205 parameter determines the format to use. 206 207 @param history_list A list of HostHistory objects to be printed. 208 @param arguments Parsed arguments object as returned by 209 ArgumentParser.parse_args(). 210 211 """ 212 for history in history_list: 213 status, _ = history.last_diagnosis() 214 if not _include_status(status, arguments): 215 continue 216 print history.hostname 217 if arguments.full_history: 218 for event in history: 219 _print_event_summary(event) 220 elif arguments.diagnosis: 221 for event in history.diagnosis_interval(): 222 _print_event_summary(event) 223 224 225def _validate_time_range(arguments): 226 """Validate the time range requested on the command line. 227 228 Enforces the rules for the --until, --since, and --duration 229 options are followed, and calculates defaults: 230 * It isn't allowed to supply all three options. 231 * If only two options are supplied, they completely determine 232 the time interval. 233 * If only one option is supplied, or no options, then apply 234 specified defaults to the arguments object. 235 236 @param arguments Parsed arguments object as returned by 237 ArgumentParser.parse_args(). 238 239 """ 240 if (arguments.duration is not None and 241 arguments.since is not None and arguments.until is not None): 242 print >>sys.stderr, ('FATAL: Can specify at most two of ' 243 '--since, --until, and --duration') 244 sys.exit(1) 245 if (arguments.until is None and (arguments.since is None or 246 arguments.duration is None)): 247 arguments.until = int(time.time()) 248 if arguments.since is None: 249 if arguments.duration is None: 250 arguments.duration = _DEFAULT_DURATION 251 arguments.since = (arguments.until - 252 arguments.duration * 60 * 60) 253 elif arguments.until is None: 254 arguments.until = (arguments.since + 255 arguments.duration * 60 * 60) 256 257 258def _get_host_histories(afe, arguments): 259 """Return HostJobHistory objects for the requested hosts. 260 261 Checks that individual hosts specified on the command line are 262 valid. Invalid hosts generate a warning message, and are 263 omitted from futher processing. 264 265 The return value is a list of HostJobHistory objects for the 266 valid requested hostnames, using the time range supplied on the 267 command line. 268 269 @param afe Autotest frontend 270 @param arguments Parsed arguments object as returned by 271 ArgumentParser.parse_args(). 272 @return List of HostJobHistory objects for the hosts requested 273 on the command line. 274 275 """ 276 histories = [] 277 saw_error = False 278 for hostname in arguments.hostnames: 279 try: 280 h = HostJobHistory.get_host_history( 281 afe, hostname, arguments.since, arguments.until) 282 histories.append(h) 283 except: 284 print >>sys.stderr, ('WARNING: Ignoring unknown host %s' % 285 hostname) 286 saw_error = True 287 if saw_error: 288 # Create separation from the output that follows 289 print >>sys.stderr 290 return histories 291 292 293def _validate_host_list(afe, arguments): 294 """Validate the user-specified list of hosts. 295 296 Hosts may be specified implicitly with --board or --pool, or 297 explictly as command line arguments. This enforces these 298 rules: 299 * If --board or --pool, or both are specified, individual 300 hosts may not be specified. 301 * However specified, there must be at least one host. 302 303 The return value is a list of HostJobHistory objects for the 304 requested hosts, using the time range supplied on the command 305 line. 306 307 @param afe Autotest frontend 308 @param arguments Parsed arguments object as returned by 309 ArgumentParser.parse_args(). 310 @return List of HostJobHistory objects for the hosts requested 311 on the command line. 312 313 """ 314 if arguments.board or arguments.pool: 315 if arguments.hostnames: 316 print >>sys.stderr, ('FATAL: Hostname arguments provided ' 317 'with --board or --pool') 318 sys.exit(1) 319 histories = HostJobHistory.get_multiple_histories( 320 afe, arguments.since, arguments.until, 321 board=arguments.board, pool=arguments.pool) 322 else: 323 histories = _get_host_histories(afe, arguments) 324 if not histories: 325 print >>sys.stderr, 'FATAL: no valid hosts found' 326 sys.exit(1) 327 return histories 328 329 330def _validate_format_options(arguments): 331 """Check the options for what output format to use. 332 333 Enforce these rules: 334 * If neither --broken nor --working was used, then --oneline 335 becomes the selected format. 336 * If neither --broken nor --working was used, included both 337 working and broken DUTs. 338 339 @param arguments Parsed arguments object as returned by 340 ArgumentParser.parse_args(). 341 342 """ 343 if (not arguments.oneline and not arguments.diagnosis and 344 not arguments.full_history): 345 arguments.oneline = (not arguments.working and 346 not arguments.broken) 347 if not arguments.working and not arguments.broken: 348 arguments.working = True 349 arguments.broken = True 350 351 352def _validate_command(afe, arguments): 353 """Check that the command's arguments are valid. 354 355 This performs command line checking to enforce command line 356 rules that ArgumentParser can't handle. Additionally, this 357 handles calculation of default arguments/options when a simple 358 constant default won't do. 359 360 Areas checked: 361 * Check that a valid time range was provided, supplying 362 defaults as necessary. 363 * Identify invalid host names. 364 365 @param afe Autotest frontend 366 @param arguments Parsed arguments object as returned by 367 ArgumentParser.parse_args(). 368 @return List of HostJobHistory objects for the hosts requested 369 on the command line. 370 371 """ 372 _validate_time_range(arguments) 373 _validate_format_options(arguments) 374 return _validate_host_list(afe, arguments) 375 376 377def _parse_command(argv): 378 """Parse the command line arguments. 379 380 Create an argument parser for this command's syntax, parse the 381 command line, and return the result of the ArgumentParser 382 parse_args() method. 383 384 @param argv Standard command line argument vector; argv[0] is 385 assumed to be the command name. 386 @return Result returned by ArgumentParser.parse_args(). 387 388 """ 389 parser = argparse.ArgumentParser( 390 prog=argv[0], 391 description='Report DUT status and execution history', 392 epilog='You can specify one or two of --since, --until, ' 393 'and --duration, but not all three.') 394 parser.add_argument('-s', '--since', type=status_history.parse_time, 395 metavar='DATE/TIME', 396 help=('Starting time for history display. ' 397 'Format: "YYYY-MM-DD HH:MM:SS"')) 398 parser.add_argument('-u', '--until', type=status_history.parse_time, 399 metavar='DATE/TIME', 400 help=('Ending time for history display. ' 401 'Format: "YYYY-MM-DD HH:MM:SS" ' 402 'Default: now')) 403 parser.add_argument('-d', '--duration', type=int, 404 metavar='HOURS', 405 help='Number of hours of history to display' 406 ' (default: %d)' % _DEFAULT_DURATION) 407 408 format_group = parser.add_mutually_exclusive_group() 409 format_group.add_argument('-f', '--full_history', action='store_true', 410 help='Display host history from most ' 411 'to least recent for each DUT') 412 format_group.add_argument('-g', '--diagnosis', action='store_true', 413 help='Display host history for the ' 414 'most recent DUT status change') 415 format_group.add_argument('-o', '--oneline', action='store_true', 416 help='Display host status summary') 417 418 parser.add_argument('-w', '--working', action='store_true', 419 help='List working devices by name only') 420 parser.add_argument('-n', '--broken', action='store_true', 421 help='List non-working devices by name only') 422 423 parser.add_argument('-b', '--board', 424 help='Display history for all DUTs ' 425 'of the given board') 426 parser.add_argument('-p', '--pool', 427 help='Display history for all DUTs ' 428 'in the given pool. You might ' 429 'be interested in the following pools: ' 430 + ', '.join(lab_inventory.MANAGED_POOLS[:-1]) 431 +', or '+ lab_inventory.MANAGED_POOLS[-1] +'.') 432 parser.add_argument('hostnames', 433 nargs='*', 434 help='Host names of DUTs to report on') 435 parser.add_argument('--web', 436 help='Master autotest frontend hostname. If no value ' 437 'is given, the one in global config will be used.', 438 default=None) 439 arguments = parser.parse_args(argv[1:]) 440 return arguments 441 442 443def main(argv): 444 """Standard main() for command line processing. 445 446 @param argv Command line arguments (normally sys.argv). 447 448 """ 449 arguments = _parse_command(argv) 450 afe = frontend.AFE(server=arguments.web) 451 history_list = _validate_command(afe, arguments) 452 if arguments.oneline: 453 _print_host_summaries(history_list, arguments) 454 else: 455 _print_hosts(history_list, arguments) 456 457 458if __name__ == '__main__': 459 main(sys.argv) 460