1#!/usr/bin/env python 2# Copyright 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Report whether DUTs are working or broken. 7 8usage: dut_status [ <options> ] [hostname ...] 9 10Reports on the history and status of selected DUT hosts, to 11determine whether they're "working" or "broken". For purposes of 12the script, "broken" means "the DUT requires manual intervention 13before it can be used for further testing", and "working" means "not 14broken". The status determination is based on the history of 15completed jobs for the DUT in a given time interval; still-running 16jobs are not considered. 17 18Time Interval Selection 19~~~~~~~~~~~~~~~~~~~~~~~ 20A DUT's reported status is based on the DUT's job history in a time 21interval determined by command line options. The interval is 22specified with up to two of three options: 23 --until/-u DATE/TIME - Specifies an end time for the search 24 range. (default: now) 25 --since/-s DATE/TIME - Specifies a start time for the search 26 range. (no default) 27 --duration/-d HOURS - Specifies the length of the search interval 28 in hours. (default: 24 hours) 29 30Any two time options completely specify the time interval. If only 31one option is provided, these defaults are used: 32 --until - Use the given end time with the default duration. 33 --since - Use the given start time with the default end time. 34 --duration - Use the given duration with the default end time. 35 36If no time options are given, use the default end time and duration. 37 38DATE/TIME values are of the form '2014-11-06 17:21:34'. 39 40DUT Selection 41~~~~~~~~~~~~~ 42By default, information is reported for DUTs named as command-line 43arguments. Options are also available for selecting groups of 44hosts: 45 --board/-b BOARD - Only include hosts with the given board. 46 --pool/-p POOL - Only include hosts in the given pool. The user 47 might be interested in the following pools: bvt, cq, 48 continuous, cts, or suites. 49 50 51The selected hosts may also be filtered based on status: 52 -w/--working - Only include hosts in a working state. 53 -n/--broken - Only include hosts in a non-working state. Hosts 54 with no job history are considered non-working. 55 56Output Formats 57~~~~~~~~~~~~~~ 58There are four available output formats: 59 * A simple list of host names. 60 * A status summary showing one line per host. 61 * A detailed job history for all selected DUTs, sorted by 62 time of execution. 63 * A job history for all selected DUTs showing only the history 64 surrounding the DUT's last change from working to broken, 65 or vice versa. 66 67The default format depends on whether hosts are filtered by 68status: 69 * With the --working or --broken options, the list of host names 70 is the default format. 71 * Without those options, the default format is the one-line status 72 summary. 73 74These options override the default formats: 75 -o/--oneline - Use the one-line summary with the --working or 76 --broken options. 77 -f/--full_history - Print detailed per-host job history. 78 -g/--diagnosis - Print the job history surrounding a status 79 change. 80 81Examples 82~~~~~~~~ 83 $ dut_status chromeos2-row4-rack2-host12 84 hostname S last checked URL 85 chromeos2-row4-rack2-host12 NO 2014-11-06 15:25:29 http://... 86 87'NO' means the DUT is broken. That diagnosis is based on a job that 88failed: 'last checked' is the time of the failed job, and the URL 89points to the job's logs. 90 91 $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12 92 chromeos2-row4-rack2-host12 93 2014-11-06 15:25:29 NO http://... 94 2014-11-06 14:44:07 -- http://... 95 2014-11-06 14:42:56 OK http://... 96 97The times are the start times of the jobs; the URL points to the 98job's logs. The status indicates the working or broken status after 99the job: 100 'NO' Indicates that the DUT was believed broken after the job. 101 'OK' Indicates that the DUT was believed working after the job. 102 '--' Indicates that the job probably didn't change the DUT's 103 status. 104Typically, logs of the actual failure will be found at the last job 105to report 'OK', or the first job to report '--'. 106 107""" 108 109import argparse 110import sys 111import time 112 113import common 114from autotest_lib.client.common_lib import time_utils 115from autotest_lib.server import constants 116from autotest_lib.server import frontend 117from autotest_lib.server.lib import status_history 118from autotest_lib.utils import labellib 119 120# The fully qualified name makes for lines that are too long, so 121# shorten it locally. 122HostJobHistory = status_history.HostJobHistory 123 124# _DIAGNOSIS_IDS - 125# Dictionary to map the known diagnosis codes to string values. 126 127_DIAGNOSIS_IDS = { 128 status_history.UNUSED: '??', 129 status_history.UNKNOWN: '--', 130 status_history.WORKING: 'OK', 131 status_history.BROKEN: 'NO' 132} 133 134 135# Default time interval for the --duration option when a value isn't 136# specified on the command line. 137_DEFAULT_DURATION = 24 138 139 140def _include_status(status, arguments): 141 """Determine whether the given status should be filtered. 142 143 Checks the given `status` against the command line options in 144 `arguments`. Return whether a host with that status should be 145 printed based on the options. 146 147 @param status Status of a host to be printed or skipped. 148 @param arguments Parsed arguments object as returned by 149 ArgumentParser.parse_args(). 150 151 @return Returns `True` if the command-line options call for 152 printing hosts with the status, or `False` otherwise. 153 154 """ 155 if status == status_history.WORKING: 156 return arguments.working 157 else: 158 return arguments.broken 159 160 161def _print_host_summaries(history_list, arguments): 162 """Print one-line summaries of host history. 163 164 This function handles the output format of the --oneline option. 165 166 @param history_list A list of HostHistory objects to be printed. 167 @param arguments Parsed arguments object as returned by 168 ArgumentParser.parse_args(). 169 170 """ 171 fmt = '%-30s %-2s %-19s %s' 172 print fmt % ('hostname', 'S', 'last checked', 'URL') 173 for history in history_list: 174 status, event = history.last_diagnosis() 175 if not _include_status(status, arguments): 176 continue 177 datestr = '---' 178 url = '---' 179 if event is not None: 180 datestr = time_utils.epoch_time_to_date_string( 181 event.start_time) 182 url = event.job_url 183 184 print fmt % (history.hostname, 185 _DIAGNOSIS_IDS[status], 186 datestr, 187 url) 188 189 190def _print_event_summary(event): 191 """Print a one-line summary of a job or special task.""" 192 start_time = time_utils.epoch_time_to_date_string( 193 event.start_time) 194 print ' %s %s %s' % ( 195 start_time, 196 _DIAGNOSIS_IDS[event.diagnosis], 197 event.job_url) 198 199 200def _print_hosts(history_list, arguments): 201 """Print hosts, optionally with a job history. 202 203 This function handles both the default format for --working 204 and --broken options, as well as the output for the 205 --full_history and --diagnosis options. The `arguments` 206 parameter determines the format to use. 207 208 @param history_list A list of HostHistory objects to be printed. 209 @param arguments Parsed arguments object as returned by 210 ArgumentParser.parse_args(). 211 212 """ 213 for history in history_list: 214 status, _ = history.last_diagnosis() 215 if not _include_status(status, arguments): 216 continue 217 print history.hostname 218 if arguments.full_history: 219 for event in history: 220 _print_event_summary(event) 221 elif arguments.diagnosis: 222 for event in history.diagnosis_interval(): 223 _print_event_summary(event) 224 225 226def _validate_time_range(arguments): 227 """Validate the time range requested on the command line. 228 229 Enforces the rules for the --until, --since, and --duration 230 options are followed, and calculates defaults: 231 * It isn't allowed to supply all three options. 232 * If only two options are supplied, they completely determine 233 the time interval. 234 * If only one option is supplied, or no options, then apply 235 specified defaults to the arguments object. 236 237 @param arguments Parsed arguments object as returned by 238 ArgumentParser.parse_args(). 239 240 """ 241 if (arguments.duration is not None and 242 arguments.since is not None and arguments.until is not None): 243 print >>sys.stderr, ('FATAL: Can specify at most two of ' 244 '--since, --until, and --duration') 245 sys.exit(1) 246 if (arguments.until is None and (arguments.since is None or 247 arguments.duration is None)): 248 arguments.until = int(time.time()) 249 if arguments.since is None: 250 if arguments.duration is None: 251 arguments.duration = _DEFAULT_DURATION 252 arguments.since = (arguments.until - 253 arguments.duration * 60 * 60) 254 elif arguments.until is None: 255 arguments.until = (arguments.since + 256 arguments.duration * 60 * 60) 257 258 259def _get_host_histories(afe, arguments): 260 """Return HostJobHistory objects for the requested hosts. 261 262 Checks that individual hosts specified on the command line are 263 valid. Invalid hosts generate a warning message, and are 264 omitted from futher processing. 265 266 The return value is a list of HostJobHistory objects for the 267 valid requested hostnames, using the time range supplied on the 268 command line. 269 270 @param afe Autotest frontend 271 @param arguments Parsed arguments object as returned by 272 ArgumentParser.parse_args(). 273 @return List of HostJobHistory objects for the hosts requested 274 on the command line. 275 276 """ 277 histories = [] 278 saw_error = False 279 for hostname in arguments.hostnames: 280 try: 281 h = HostJobHistory.get_host_history( 282 afe, hostname, arguments.since, arguments.until) 283 histories.append(h) 284 except: 285 print >>sys.stderr, ('WARNING: Ignoring unknown host %s' % 286 hostname) 287 saw_error = True 288 if saw_error: 289 # Create separation from the output that follows 290 print >>sys.stderr 291 return histories 292 293 294def _validate_host_list(afe, arguments): 295 """Validate the user-specified list of hosts. 296 297 Hosts may be specified implicitly with --board or --pool, or 298 explictly as command line arguments. This enforces these 299 rules: 300 * If --board or --pool, or both are specified, individual 301 hosts may not be specified. 302 * However specified, there must be at least one host. 303 304 The return value is a list of HostJobHistory objects for the 305 requested hosts, using the time range supplied on the command 306 line. 307 308 @param afe Autotest frontend 309 @param arguments Parsed arguments object as returned by 310 ArgumentParser.parse_args(). 311 @return List of HostJobHistory objects for the hosts requested 312 on the command line. 313 314 """ 315 if arguments.board or arguments.pool or arguments.model: 316 if arguments.hostnames: 317 print >>sys.stderr, ('FATAL: Hostname arguments provided ' 318 'with --board or --pool') 319 sys.exit(1) 320 321 labels = labellib.LabelsMapping() 322 labels['board'] = arguments.board 323 labels['pool'] = arguments.pool 324 labels['model'] = arguments.model 325 histories = HostJobHistory.get_multiple_histories( 326 afe, arguments.since, arguments.until, labels.getlabels()) 327 else: 328 histories = _get_host_histories(afe, arguments) 329 if not histories: 330 print >>sys.stderr, 'FATAL: no valid hosts found' 331 sys.exit(1) 332 return histories 333 334 335def _validate_format_options(arguments): 336 """Check the options for what output format to use. 337 338 Enforce these rules: 339 * If neither --broken nor --working was used, then --oneline 340 becomes the selected format. 341 * If neither --broken nor --working was used, included both 342 working and broken DUTs. 343 344 @param arguments Parsed arguments object as returned by 345 ArgumentParser.parse_args(). 346 347 """ 348 if (not arguments.oneline and not arguments.diagnosis and 349 not arguments.full_history): 350 arguments.oneline = (not arguments.working and 351 not arguments.broken) 352 if not arguments.working and not arguments.broken: 353 arguments.working = True 354 arguments.broken = True 355 356 357def _validate_command(afe, arguments): 358 """Check that the command's arguments are valid. 359 360 This performs command line checking to enforce command line 361 rules that ArgumentParser can't handle. Additionally, this 362 handles calculation of default arguments/options when a simple 363 constant default won't do. 364 365 Areas checked: 366 * Check that a valid time range was provided, supplying 367 defaults as necessary. 368 * Identify invalid host names. 369 370 @param afe Autotest frontend 371 @param arguments Parsed arguments object as returned by 372 ArgumentParser.parse_args(). 373 @return List of HostJobHistory objects for the hosts requested 374 on the command line. 375 376 """ 377 _validate_time_range(arguments) 378 _validate_format_options(arguments) 379 return _validate_host_list(afe, arguments) 380 381 382def _parse_command(argv): 383 """Parse the command line arguments. 384 385 Create an argument parser for this command's syntax, parse the 386 command line, and return the result of the ArgumentParser 387 parse_args() method. 388 389 @param argv Standard command line argument vector; argv[0] is 390 assumed to be the command name. 391 @return Result returned by ArgumentParser.parse_args(). 392 393 """ 394 parser = argparse.ArgumentParser( 395 prog=argv[0], 396 description='Report DUT status and execution history', 397 epilog='You can specify one or two of --since, --until, ' 398 'and --duration, but not all three.') 399 parser.add_argument('-s', '--since', type=status_history.parse_time, 400 metavar='DATE/TIME', 401 help=('Starting time for history display. ' 402 'Format: "YYYY-MM-DD HH:MM:SS"')) 403 parser.add_argument('-u', '--until', type=status_history.parse_time, 404 metavar='DATE/TIME', 405 help=('Ending time for history display. ' 406 'Format: "YYYY-MM-DD HH:MM:SS" ' 407 'Default: now')) 408 parser.add_argument('-d', '--duration', type=int, 409 metavar='HOURS', 410 help='Number of hours of history to display' 411 ' (default: %d)' % _DEFAULT_DURATION) 412 413 format_group = parser.add_mutually_exclusive_group() 414 format_group.add_argument('-f', '--full_history', action='store_true', 415 help='Display host history from most ' 416 'to least recent for each DUT') 417 format_group.add_argument('-g', '--diagnosis', action='store_true', 418 help='Display host history for the ' 419 'most recent DUT status change') 420 format_group.add_argument('-o', '--oneline', action='store_true', 421 help='Display host status summary') 422 423 parser.add_argument('-w', '--working', action='store_true', 424 help='List working devices by name only') 425 parser.add_argument('-n', '--broken', action='store_true', 426 help='List non-working devices by name only') 427 428 parser.add_argument('-b', '--board', 429 help='Display history for all DUTs ' 430 'of the given board') 431 parser.add_argument('-m', '--model', 432 help='Display history for all DUTs of the given model.') 433 parser.add_argument('-p', '--pool', 434 help='Display history for all DUTs ' 435 'in the given pool. You might ' 436 'be interested in the following pools: ' 437 + ', '.join(constants.Pools.MANAGED_POOLS[:-1]) 438 +', or '+ constants.Pools.MANAGED_POOLS[-1] +'.') 439 parser.add_argument('hostnames', 440 nargs='*', 441 help='Host names of DUTs to report on') 442 parser.add_argument('--web', 443 help='Master autotest frontend hostname. If no value ' 444 'is given, the one in global config will be used.', 445 default=None) 446 arguments = parser.parse_args(argv[1:]) 447 return arguments 448 449 450def main(argv): 451 """Standard main() for command line processing. 452 453 @param argv Command line arguments (normally sys.argv). 454 455 """ 456 arguments = _parse_command(argv) 457 afe = frontend.AFE(server=arguments.web) 458 history_list = _validate_command(afe, arguments) 459 if arguments.oneline: 460 _print_host_summaries(history_list, arguments) 461 else: 462 _print_hosts(history_list, arguments) 463 464 465if __name__ == '__main__': 466 main(sys.argv) 467