1# Copyright 2014 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import bz2 6import glob 7import json 8import logging 9import os 10import re 11import shutil 12import tempfile 13import time 14import xml.etree.ElementTree as et 15from autotest_lib.client.bin import test, utils 16from autotest_lib.client.common_lib import error 17from autotest_lib.client.cros import cros_logging, service_stopper 18from autotest_lib.client.cros.graphics import graphics_utils 19 20RERUN_RATIO = 0.02 # Ratio to rerun failing test for hasty mode 21 22 23class graphics_dEQP(graphics_utils.GraphicsTest): 24 """Run the drawElements Quality Program test suite.""" 25 version = 1 26 _services = None 27 _hasty = False 28 _hasty_batch_size = 100 # Batch size in hasty mode. 29 _shard_number = 0 30 _shard_count = 1 31 _board = None 32 _cpu_type = None 33 _gpu_type = None 34 _surface = None 35 _filter = None 36 _width = 256 # Use smallest width for which all tests run/pass. 37 _height = 256 # Use smallest height for which all tests run/pass. 38 _timeout = 70 # Larger than twice the dEQP watchdog timeout at 30s. 39 _test_names = None 40 _test_names_file = None 41 _log_path = None # Location for detailed test output logs (in /tmp/). 42 _debug = False # Analyze kernel messages. 43 _log_reader = None # Reader to analyze (kernel) messages log. 44 _log_filter = re.compile('.* .* kernel:') # kernel messages filter. 45 _env = None # environment for test processes 46 DEQP_MODULES = { 47 'dEQP-EGL': 'egl', 48 'dEQP-GLES2': 'gles2', 49 'dEQP-GLES3': 'gles3', 50 'dEQP-GLES31': 'gles31', 51 'dEQP-VK': 'vk', 52 } 53 # We do not consider these results as failures. 54 TEST_RESULT_FILTER = [ 55 'pass', 'notsupported', 'internalerror', 'qualitywarning', 56 'compatibilitywarning', 'skipped' 57 ] 58 59 def initialize(self): 60 super(graphics_dEQP, self).initialize() 61 self._api_helper = graphics_utils.GraphicsApiHelper() 62 self._board = utils.get_board() 63 self._cpu_type = utils.get_cpu_soc_family() 64 self._gpu_type = utils.get_gpu_family() 65 66 # deqp may depend on libraries that are present only on test images. 67 # Those libraries are installed in /usr/local. 68 self._env = os.environ.copy() 69 old_ld_path = self._env.get('LD_LIBRARY_PATH', '') 70 if old_ld_path: 71 self._env[ 72 'LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64:' + old_ld_path 73 else: 74 self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64' 75 76 self._services = service_stopper.ServiceStopper(['ui', 'powerd']) 77 # Valid choices are fbo and pbuffer. The latter avoids dEQP assumptions. 78 self._surface = 'pbuffer' 79 self._services.stop_services() 80 81 def cleanup(self): 82 if self._services: 83 self._services.restore_services() 84 super(graphics_dEQP, self).cleanup() 85 86 def _archive_test_results(self, result_filename): 87 """Reduce space usage. 88 89 The default /tmp result file location is memory backed and capped at 1/2 90 of main memory. We have experienced out of storage situations. Avoid 91 this for instance by using compression. 92 """ 93 os.system('gzip %s' % result_filename) 94 95 def _parse_test_results(self, 96 result_filename, 97 test_results=None, 98 failing_test=None): 99 """Handles result files with one or more test results. 100 101 @param result_filename: log file to parse. 102 @param test_results: Result parsed will be appended to it. 103 @param failing_test: Tests considered failed will append to it. 104 105 @return: dictionary of parsed test results. 106 """ 107 xml = '' 108 xml_start = False 109 xml_complete = False 110 xml_bad = False 111 result = 'ParseTestResultFail' 112 113 if test_results is None: 114 test_results = {} 115 116 if not os.path.isfile(result_filename): 117 logging.error('Did not find file %s', result_filename) 118 return test_results 119 120 with open(result_filename) as result_file: 121 for line in result_file.readlines(): 122 # If the test terminates early, the XML will be incomplete 123 # and should not be parsed. 124 if line.startswith('#terminateTestCaseResult'): 125 result = line.strip().split()[1] 126 xml_bad = True 127 # Will only see #endTestCaseResult if the test does not 128 # terminate early. 129 elif line.startswith('#endTestCaseResult'): 130 xml_complete = True 131 elif xml_start: 132 xml += line 133 elif line.startswith('#beginTestCaseResult'): 134 # If we see another begin before an end then something is 135 # wrong. 136 if xml_start: 137 xml_bad = True 138 else: 139 xml_start = True 140 test_case = line.split(' ')[1] 141 142 if xml_complete or xml_bad: 143 if xml_complete: 144 myparser = et.XMLParser(encoding='ISO-8859-1') 145 root = et.fromstring(xml, parser=myparser) 146 test_case = root.attrib['CasePath'] 147 result = root.find('Result').get('StatusCode').strip() 148 xml_complete = False 149 test_results[result] = test_results.get(result, 0) + 1 150 if (result.lower() not in self.TEST_RESULT_FILTER and 151 failing_test != None): 152 failing_test.append(test_case) 153 xml_bad = False 154 xml_start = False 155 result = 'ParseTestResultFail' 156 xml = '' 157 158 return test_results 159 160 def _load_not_passing_cases(self, test_filter): 161 """Load all test cases that are in non-'Pass' expectations.""" 162 not_passing_cases = [] 163 expectations_dir = os.path.join(self.bindir, 'expectations', 164 self._gpu_type) 165 subset_spec = '%s.*' % test_filter 166 subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec)) 167 for subset_file in subset_paths: 168 # Filter against extra hasty failures only in hasty mode. 169 if (not '.Pass.bz2' in subset_file and 170 (self._hasty or '.hasty.' not in subset_file)): 171 not_passing_cases.extend( 172 bz2.BZ2File(subset_file).read().splitlines()) 173 not_passing_cases.sort() 174 return not_passing_cases 175 176 def _translate_name_to_api(self, name): 177 """Translate test_names or test_filter to api.""" 178 test_prefix = name.split('.')[0] 179 if test_prefix in self.DEQP_MODULES: 180 api = self.DEQP_MODULES[test_prefix] 181 else: 182 raise error.TestFail('Failed: Invalid test name: %s' % name) 183 return api 184 185 def _get_executable(self, api): 186 """Return the executable path of the api.""" 187 return self._api_helper.get_deqp_executable(api) 188 189 def _can_run(self, api): 190 """Check if specific api is supported in this board.""" 191 return api in self._api_helper.get_supported_apis() 192 193 def _bootstrap_new_test_cases(self, test_filter): 194 """Ask dEQP for all test cases and removes non-Pass'ing ones. 195 196 This function will query dEQP for test cases and remove all cases that 197 are not in 'Pass'ing expectations from the list. This can be used 198 incrementally updating failing/hangin tests over several runs. 199 200 @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'. 201 202 @return: List of dEQP tests to run. 203 """ 204 test_cases = [] 205 api = self._translate_name_to_api(test_filter) 206 if self._can_run(api): 207 executable = self._get_executable(api) 208 else: 209 return test_cases 210 211 # Must be in the executable directory when running for it to find it's 212 # test data files! 213 os.chdir(os.path.dirname(executable)) 214 215 not_passing_cases = self._load_not_passing_cases(test_filter) 216 # We did not find passing cases in expectations. Assume everything else 217 # that is there should not be run this time. 218 expectations_dir = os.path.join(self.bindir, 'expectations', 219 self._gpu_type) 220 subset_spec = '%s.*' % test_filter 221 subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec)) 222 for subset_file in subset_paths: 223 # Filter against hasty failures only in hasty mode. 224 if self._hasty or '.hasty.' not in subset_file: 225 not_passing_cases.extend( 226 bz2.BZ2File(subset_file).read().splitlines()) 227 228 # Now ask dEQP executable nicely for whole list of tests. Needs to be 229 # run in executable directory. Output file is plain text file named 230 # e.g. 'dEQP-GLES2-cases.txt'. 231 command = ('%s ' 232 '--deqp-runmode=txt-caselist ' 233 '--deqp-surface-type=%s ' 234 '--deqp-gl-config-name=rgba8888d24s8ms0 ' % 235 (executable, self._surface)) 236 logging.info('Running command %s', command) 237 utils.run( 238 command, 239 env=self._env, 240 timeout=60, 241 stderr_is_expected=False, 242 ignore_status=False, 243 stdin=None) 244 245 # Now read this caselist file. 246 caselist_name = '%s-cases.txt' % test_filter.split('.')[0] 247 caselist_file = os.path.join(os.path.dirname(executable), caselist_name) 248 if not os.path.isfile(caselist_file): 249 raise error.TestFail( 250 'Failed: No caselist file at %s!' % caselist_file) 251 252 # And remove non-Pass'ing expectations from caselist. 253 caselist = open(caselist_file).read().splitlines() 254 # Contains lines like "TEST: dEQP-GLES2.capability" 255 test_cases = [] 256 match = 'TEST: %s' % test_filter 257 logging.info('Bootstrapping test cases matching "%s".', match) 258 for case in caselist: 259 if case.startswith(match): 260 case = case.split('TEST: ')[1] 261 test_cases.append(case) 262 263 test_cases = list(set(test_cases) - set(not_passing_cases)) 264 if not test_cases: 265 raise error.TestFail( 266 'Failed: Unable to bootstrap %s!' % test_filter) 267 268 test_cases.sort() 269 return test_cases 270 271 def _get_test_cases_from_names_file(self): 272 if os.path.exists(self._test_names_file): 273 file_path = self._test_names_file 274 test_cases = [line.rstrip('\n') for line in open(file_path)] 275 return [test for test in test_cases if test and not test.isspace()] 276 return [] 277 278 def _get_test_cases(self, test_filter, subset): 279 """Gets the test cases for 'Pass', 'Fail' etc. 280 281 expectations. 282 283 This function supports bootstrapping of new GPU families and dEQP 284 binaries. In particular if there are not 'Pass' expectations found for 285 this GPU family it will query the dEQP executable for a list of all 286 available tests. It will then remove known non-'Pass'ing tests from 287 this list to avoid getting into hangs/crashes etc. 288 289 @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'. 290 @param subset: string from 'Pass', 'Fail', 'Timeout' etc. 291 292 @return: List of dEQP tests to run. 293 """ 294 expectations_dir = os.path.join(self.bindir, 'expectations', 295 self._gpu_type) 296 subset_name = '%s.%s.bz2' % (test_filter, subset) 297 subset_path = os.path.join(expectations_dir, subset_name) 298 if not os.path.isfile(subset_path): 299 if subset == 'NotPass': 300 # TODO(ihf): Running hasty and NotPass together is an invitation 301 # for trouble (stability). Decide if it should be disallowed. 302 return self._load_not_passing_cases(test_filter) 303 if subset != 'Pass': 304 raise error.TestFail( 305 'Failed: No subset file found for %s!' % subset_path) 306 # Ask dEQP for all cases and remove the failing ones. 307 return self._bootstrap_new_test_cases(test_filter) 308 309 test_cases = bz2.BZ2File(subset_path).read().splitlines() 310 if not test_cases: 311 raise error.TestFail( 312 'Failed: No test cases found in subset file %s!' % subset_path) 313 return test_cases 314 315 def _run_tests_individually(self, test_cases, failing_test=None): 316 """Runs tests as isolated from each other, but slowly. 317 318 This function runs each test case separately as a command. 319 This means a new context for each test etc. Failures will be more 320 isolated, but runtime quite high due to overhead. 321 322 @param test_cases: List of dEQP test case strings. 323 @param failing_test: Tests considered failed will be appended to it. 324 325 @return: dictionary of test results. 326 """ 327 test_results = {} 328 width = self._width 329 height = self._height 330 331 i = 0 332 for test_case in test_cases: 333 i += 1 334 logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case) 335 result_prefix = os.path.join(self._log_path, test_case) 336 log_file = '%s.log' % result_prefix 337 debug_file = '%s.debug' % result_prefix 338 api = self._translate_name_to_api(test_case) 339 if not self._can_run(api): 340 result = 'Skipped' 341 logging.info('Skipping on %s: %s', self._gpu_type, test_case) 342 else: 343 executable = self._get_executable(api) 344 command = ('%s ' 345 '--deqp-case=%s ' 346 '--deqp-surface-type=%s ' 347 '--deqp-gl-config-name=rgba8888d24s8ms0 ' 348 '--deqp-log-images=disable ' 349 '--deqp-watchdog=enable ' 350 '--deqp-surface-width=%d ' 351 '--deqp-surface-height=%d ' 352 '--deqp-log-filename=%s' % 353 (executable, test_case, self._surface, width, height, 354 log_file)) 355 logging.debug('Running single: %s', command) 356 357 # Must be in the executable directory when running for it to find it's 358 # test data files! 359 os.chdir(os.path.dirname(executable)) 360 361 # Must initialize because some errors don't repopulate 362 # run_result, leaving old results. 363 run_result = {} 364 start_time = time.time() 365 try: 366 run_result = utils.run( 367 command, 368 env=self._env, 369 timeout=self._timeout, 370 stderr_is_expected=False, 371 ignore_status=True) 372 result_counts = self._parse_test_results( 373 log_file, failing_test=failing_test) 374 self._archive_test_results(log_file) 375 if result_counts: 376 result = result_counts.keys()[0] 377 else: 378 result = 'Unknown' 379 except error.CmdTimeoutError: 380 result = 'TestTimeout' 381 except error.CmdError: 382 result = 'CommandFailed' 383 except Exception: 384 result = 'UnexpectedError' 385 end_time = time.time() 386 387 if self._debug: 388 # Collect debug info and save to json file. 389 output_msgs = { 390 'start_time': start_time, 391 'end_time': end_time, 392 'stdout': [], 393 'stderr': [], 394 'dmesg': [] 395 } 396 logs = self._log_reader.get_logs() 397 self._log_reader.set_start_by_current() 398 output_msgs['dmesg'] = [ 399 msg for msg in logs.splitlines() 400 if self._log_filter.match(msg) 401 ] 402 if run_result: 403 output_msgs['stdout'] = run_result.stdout.splitlines() 404 output_msgs['stderr'] = run_result.stderr.splitlines() 405 with open(debug_file, 'w') as fd: 406 json.dump( 407 output_msgs, 408 fd, 409 indent=4, 410 separators=(',', ' : '), 411 sort_keys=True) 412 413 logging.info('Result: %s', result) 414 test_results[result] = test_results.get(result, 0) + 1 415 416 return test_results 417 418 def _run_tests_hasty(self, test_cases, failing_test=None): 419 """Runs tests as quickly as possible. 420 421 This function runs all the test cases, but does not isolate tests and 422 may take shortcuts/not run all tests to provide maximum coverage at 423 minumum runtime. 424 425 @param test_cases: List of dEQP test case strings. 426 @param failing_test: Test considered failed will append to it. 427 428 @return: dictionary of test results. 429 """ 430 # TODO(ihf): It saves half the test time to use 32*32 but a few tests 431 # fail as they need surfaces larger than 200*200. 432 width = self._width 433 height = self._height 434 results = {} 435 436 # All tests combined less than 1h in hasty. 437 batch_timeout = min(3600, self._timeout * self._hasty_batch_size) 438 num_test_cases = len(test_cases) 439 440 # We are dividing the number of tests into several shards but run them 441 # in smaller batches. We start and end at multiples of batch_size 442 # boundaries. 443 shard_start = self._hasty_batch_size * ( 444 (self._shard_number * 445 (num_test_cases / self._shard_count)) / self._hasty_batch_size) 446 shard_end = self._hasty_batch_size * ( 447 ((self._shard_number + 1) * 448 (num_test_cases / self._shard_count)) / self._hasty_batch_size) 449 # The last shard will be slightly larger than the others. Extend it to 450 # cover all test cases avoiding rounding problems with the integer 451 # arithmetics done to compute shard_start and shard_end. 452 if self._shard_number + 1 == self._shard_count: 453 shard_end = num_test_cases 454 455 for batch in xrange(shard_start, shard_end, self._hasty_batch_size): 456 batch_to = min(batch + self._hasty_batch_size, shard_end) 457 batch_cases = '\n'.join(test_cases[batch:batch_to]) 458 # This assumes all tests in the batch are kicked off via the same 459 # executable. 460 api = self._translate_name_to_api(test_cases[batch]) 461 if not self._can_run(api): 462 logging.info('Skipping tests on %s: %s', self._gpu_type, 463 batch_cases) 464 else: 465 executable = self._get_executable(api) 466 log_file = os.path.join( 467 self._log_path, '%s_hasty_%d.log' % (self._filter, batch)) 468 command = ('%s ' 469 '--deqp-stdin-caselist ' 470 '--deqp-surface-type=%s ' 471 '--deqp-gl-config-name=rgba8888d24s8ms0 ' 472 '--deqp-log-images=disable ' 473 '--deqp-visibility=hidden ' 474 '--deqp-watchdog=enable ' 475 '--deqp-surface-width=%d ' 476 '--deqp-surface-height=%d ' 477 '--deqp-log-filename=%s' % 478 (executable, self._surface, width, height, log_file)) 479 480 logging.info('Running tests %d...%d out of %d:\n%s\n%s', 481 batch + 1, batch_to, num_test_cases, command, 482 batch_cases) 483 484 # Must be in the executable directory when running for it to 485 # find it's test data files! 486 os.chdir(os.path.dirname(executable)) 487 488 try: 489 utils.run( 490 command, 491 env=self._env, 492 timeout=batch_timeout, 493 stderr_is_expected=False, 494 ignore_status=False, 495 stdin=batch_cases) 496 except Exception: 497 pass 498 # We are trying to handle all errors by parsing the log file. 499 results = self._parse_test_results(log_file, results, 500 failing_test) 501 self._archive_test_results(log_file) 502 logging.info(results) 503 return results 504 505 def _run_once(self, test_cases): 506 """Run dEQP test_cases in individual/hasty mode. 507 508 @param test_cases: test cases to run. 509 """ 510 failing_test = [] 511 if self._hasty: 512 logging.info('Running in hasty mode.') 513 test_results = self._run_tests_hasty(test_cases, failing_test) 514 else: 515 logging.info('Running each test individually.') 516 test_results = self._run_tests_individually(test_cases, 517 failing_test) 518 return test_results, failing_test 519 520 def run_once(self, opts=None): 521 options = dict( 522 filter='', 523 test_names='', # e.g., dEQP-GLES3.info.version, 524 # dEQP-GLES2.functional, 525 # dEQP-GLES3.accuracy.texture, etc. 526 test_names_file='', 527 timeout=self._timeout, 528 subset_to_run='Pass', # Pass, Fail, Timeout, NotPass... 529 hasty='False', 530 shard_number='0', 531 shard_count='1', 532 debug='False', 533 perf_failure_description=None) 534 if opts is None: 535 opts = [] 536 options.update(utils.args_to_dict(opts)) 537 logging.info('Test Options: %s', options) 538 539 self._hasty = (options['hasty'] == 'True') 540 self._timeout = int(options['timeout']) 541 self._test_names_file = options['test_names_file'] 542 self._test_names = options['test_names'] 543 self._shard_number = int(options['shard_number']) 544 self._shard_count = int(options['shard_count']) 545 self._debug = (options['debug'] == 'True') 546 if not (self._test_names_file or self._test_names): 547 self._filter = options['filter'] 548 if not self._filter: 549 raise error.TestFail('Failed: No dEQP test filter specified') 550 if options['perf_failure_description']: 551 self._test_failure_description = options['perf_failure_description'] 552 else: 553 # Do not report failure if failure description is not specified. 554 self._test_failure_report_enable = False 555 556 # Some information to help post-process logs into blacklists later. 557 logging.info('ChromeOS BOARD = %s', self._board) 558 logging.info('ChromeOS CPU family = %s', self._cpu_type) 559 logging.info('ChromeOS GPU family = %s', self._gpu_type) 560 561 # Create a place to put detailed test output logs. 562 filter_name = self._filter or os.path.basename(self._test_names_file) 563 logging.info('dEQP test filter = %s', filter_name) 564 self._log_path = os.path.join(tempfile.gettempdir(), 565 '%s-logs' % filter_name) 566 shutil.rmtree(self._log_path, ignore_errors=True) 567 os.mkdir(self._log_path) 568 569 # Load either tests specified by test_names_file, test_names or filter. 570 test_cases = [] 571 if self._test_names_file: 572 test_cases = self._get_test_cases_from_names_file() 573 elif self._test_names: 574 test_cases = [] 575 for name in self._test_names.split(','): 576 test_cases.extend(self._get_test_cases(name, 'Pass')) 577 elif self._filter: 578 test_cases = self._get_test_cases(self._filter, 579 options['subset_to_run']) 580 581 if self._debug: 582 # LogReader works on /var/log/messages by default. 583 self._log_reader = cros_logging.LogReader() 584 self._log_reader.set_start_by_current() 585 586 # Assume all tests failed at the beginning. 587 for test_case in test_cases: 588 self.add_failures(test_case) 589 590 test_results, failing_test = self._run_once(test_cases) 591 # Rerun the test if we are in hasty mode. 592 if self._hasty and len(failing_test) > 0: 593 if len(failing_test) < sum(test_results.values()) * RERUN_RATIO: 594 logging.info('Because we are in hasty mode, we will rerun the ' 595 'failing tests one at a time') 596 rerun_results, failing_test = self._run_once(failing_test) 597 # Update failing test result from the test_results 598 for result in test_results: 599 if result.lower() not in self.TEST_RESULT_FILTER: 600 test_results[result] = 0 601 for result in rerun_results: 602 test_results[result] = ( 603 test_results.get(result, 0) + rerun_results[result]) 604 else: 605 logging.info('There are too many failing tests. It would ' 606 'take too long to rerun them. Giving up.') 607 608 # Update failing tests to the chrome perf dashboard records. 609 for test_case in test_cases: 610 if test_case not in failing_test: 611 self.remove_failures(test_case) 612 613 logging.info('Test results:') 614 logging.info(test_results) 615 logging.debug('Test Failed: %s', failing_test) 616 self.write_perf_keyval(test_results) 617 618 test_count = 0 619 test_failures = 0 620 test_passes = 0 621 test_skipped = 0 622 for result in test_results: 623 test_count += test_results[result] 624 if result.lower() in ['pass']: 625 test_passes += test_results[result] 626 if result.lower() not in self.TEST_RESULT_FILTER: 627 test_failures += test_results[result] 628 if result.lower() in ['skipped']: 629 test_skipped += test_results[result] 630 # The text "Completed all tests." is used by the process_log.py script 631 # and should always appear at the end of a completed test run. 632 logging.info( 633 'Completed all tests. Saw %d tests, %d passes and %d failures.', 634 test_count, test_passes, test_failures) 635 636 if self._filter and test_count == 0 and options[ 637 'subset_to_run'] != 'NotPass': 638 logging.warning('No test cases found for filter: %s!', self._filter) 639 640 if test_failures: 641 raise error.TestFail('Failed: on %s %d/%d tests failed.' % 642 (self._gpu_type, test_failures, test_count)) 643 if test_skipped > 0: 644 logging.info('On %s %d tests skipped, %d passes', self._gpu_type, 645 test_skipped, test_passes) 646