1#!/usr/bin/python 2# Copyright 2015 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Generates incremental code coverage reports for Java code in Chromium. 7 8Usage: 9 10 build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir 11 <EMMA file directory> --lines-for-coverage-file 12 <path to file containing lines for coverage> 13 14 Creates a JSON representation of the overall and file coverage stats and saves 15 this information to the specified output file. 16""" 17 18import argparse 19import collections 20import json 21import logging 22import os 23import re 24import sys 25from xml.etree import ElementTree 26 27import devil_chromium 28from devil.utils import run_tests_helper 29 30NOT_EXECUTABLE = -1 31NOT_COVERED = 0 32COVERED = 1 33PARTIALLY_COVERED = 2 34 35# Coverage information about a single line of code. 36LineCoverage = collections.namedtuple( 37 'LineCoverage', 38 ['lineno', 'source', 'covered_status', 'fractional_line_coverage']) 39 40 41class _EmmaHtmlParser(object): 42 """Encapsulates HTML file parsing operations. 43 44 This class contains all operations related to parsing HTML files that were 45 produced using the EMMA code coverage tool. 46 47 Example HTML: 48 49 Package links: 50 <a href="_files/1.html">org.chromium.chrome</a> 51 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|. 52 53 Class links: 54 <a href="1e.html">DoActivity.java</a> 55 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|. 56 57 Line coverage data: 58 <tr class="p"> 59 <td class="l" title="78% line coverage (7 out of 9)">108</td> 60 <td title="78% line coverage (7 out of 9 instructions)"> 61 if (index < 0 || index = mSelectors.size()) index = 0;</td> 62 </tr> 63 <tr> 64 <td class="l">109</td> 65 <td> </td> 66 </tr> 67 <tr class="c"> 68 <td class="l">110</td> 69 <td> if (mSelectors.get(index) != null) {</td> 70 </tr> 71 <tr class="z"> 72 <td class="l">111</td> 73 <td> for (int i = 0; i < mSelectors.size(); i++) {</td> 74 </tr> 75 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|. 76 77 We can parse this to get: 78 1. Line number 79 2. Line of source code 80 3. Coverage status (c, z, or p) 81 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED) 82 """ 83 # Selector to match all <a> elements within the rows that are in the table 84 # that displays all of the different packages. 85 _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A' 86 87 # Selector to match all <a> elements within the rows that are in the table 88 # that displays all of the different classes within a package. 89 _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A' 90 91 # Selector to match all <tr> elements within the table containing Java source 92 # code in an EMMA HTML file. 93 _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR' 94 95 # Children of HTML elements are represented as a list in ElementTree. These 96 # constants represent list indices corresponding to relevant child elements. 97 98 # Child 1 contains percentage covered for a line. 99 _ELEMENT_PERCENT_COVERED = 1 100 101 # Child 1 contains the original line of source code. 102 _ELEMENT_CONTAINING_SOURCE_CODE = 1 103 104 # Child 0 contains the line number. 105 _ELEMENT_CONTAINING_LINENO = 0 106 107 # Maps CSS class names to corresponding coverage constants. 108 _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED} 109 110 # UTF-8 no break space. 111 _NO_BREAK_SPACE = '\xc2\xa0' 112 113 def __init__(self, emma_file_base_dir): 114 """Initializes _EmmaHtmlParser. 115 116 Args: 117 emma_file_base_dir: Path to the location where EMMA report files are 118 stored. Should be where index.html is stored. 119 """ 120 self._base_dir = emma_file_base_dir 121 self._emma_files_path = os.path.join(self._base_dir, '_files') 122 self._index_path = os.path.join(self._base_dir, 'index.html') 123 124 def GetLineCoverage(self, emma_file_path): 125 """Returns a list of LineCoverage objects for the given EMMA HTML file. 126 127 Args: 128 emma_file_path: String representing the path to the EMMA HTML file. 129 130 Returns: 131 A list of LineCoverage objects. 132 """ 133 line_tr_elements = self._FindElements( 134 emma_file_path, self._XPATH_SELECT_LOC) 135 line_coverage = [] 136 for tr in line_tr_elements: 137 # Get the coverage status. 138 coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE) 139 # Get the fractional coverage value. 140 if coverage_status == PARTIALLY_COVERED: 141 title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE')) 142 # Parse string that contains percent covered: "83% line coverage ...". 143 percent_covered = title_attribute.split('%')[0] 144 fractional_coverage = int(percent_covered) / 100.0 145 else: 146 fractional_coverage = 1.0 147 148 # Get the line number. 149 lineno_element = tr[self._ELEMENT_CONTAINING_LINENO] 150 # Handles oddly formatted HTML (where there is an extra <a> tag). 151 lineno = int(lineno_element.text or 152 lineno_element[self._ELEMENT_CONTAINING_LINENO].text) 153 # Get the original line of Java source code. 154 raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text 155 utf8_source = raw_source.encode('UTF-8') 156 source = utf8_source.replace(self._NO_BREAK_SPACE, ' ') 157 158 line = LineCoverage(lineno, source, coverage_status, fractional_coverage) 159 line_coverage.append(line) 160 161 return line_coverage 162 163 def GetPackageNameToEmmaFileDict(self): 164 """Returns a dict mapping Java packages to EMMA HTML coverage files. 165 166 Parses the EMMA index.html file to get a list of packages, then parses each 167 package HTML file to get a list of classes for that package, and creates 168 a dict with this info. 169 170 Returns: 171 A dict mapping string representation of Java packages (with class 172 names appended) to the corresponding file paths of EMMA HTML files. 173 """ 174 # These <a> elements contain each package name and the path of the file 175 # where all classes within said package are listed. 176 package_link_elements = self._FindElements( 177 self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS) 178 # Maps file path of package directory (EMMA generated) to package name. 179 # Example: emma_dir/f.html: org.chromium.chrome. 180 package_links = { 181 os.path.join(self._base_dir, link.attrib['HREF']): link.text 182 for link in package_link_elements if 'HREF' in link.attrib 183 } 184 185 package_to_emma = {} 186 for package_emma_file_path, package_name in package_links.iteritems(): 187 # These <a> elements contain each class name in the current package and 188 # the path of the file where the coverage info is stored for each class. 189 coverage_file_link_elements = self._FindElements( 190 package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS) 191 192 for class_name_element in coverage_file_link_elements: 193 emma_coverage_file_path = os.path.join( 194 self._emma_files_path, class_name_element.attrib['HREF']) 195 full_package_name = '%s.%s' % (package_name, class_name_element.text) 196 package_to_emma[full_package_name] = emma_coverage_file_path 197 198 return package_to_emma 199 200 # pylint: disable=no-self-use 201 def _FindElements(self, file_path, xpath_selector): 202 """Reads a HTML file and performs an XPath match. 203 204 Args: 205 file_path: String representing the path to the HTML file. 206 xpath_selector: String representing xpath search pattern. 207 208 Returns: 209 A list of ElementTree.Elements matching the given XPath selector. 210 Returns an empty list if there is no match. 211 """ 212 with open(file_path) as f: 213 file_contents = f.read().decode('ISO-8859-1').encode('UTF-8') 214 root = ElementTree.fromstring(file_contents) 215 return root.findall(xpath_selector) 216 217 218class _EmmaCoverageStats(object): 219 """Computes code coverage stats for Java code using the coverage tool EMMA. 220 221 This class provides an API that allows users to capture absolute code coverage 222 and code coverage on a subset of lines for each Java source file. Coverage 223 reports are generated in JSON format. 224 """ 225 # Regular expression to get package name from Java package statement. 226 RE_PACKAGE_MATCH_GROUP = 'package' 227 RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP) 228 229 def __init__(self, emma_file_base_dir, files_for_coverage): 230 """Initialize _EmmaCoverageStats. 231 232 Args: 233 emma_file_base_dir: String representing the path to the base directory 234 where EMMA HTML coverage files are stored, i.e. parent of index.html. 235 files_for_coverage: A list of Java source code file paths to get EMMA 236 coverage for. 237 """ 238 self._emma_parser = _EmmaHtmlParser(emma_file_base_dir) 239 self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage) 240 241 def GetCoverageDict(self, lines_for_coverage): 242 """Returns a dict containing detailed coverage information. 243 244 Gets detailed coverage stats for each file specified in the 245 |lines_for_coverage| dict and the total incremental number of lines covered 246 and executable for all files in |lines_for_coverage|. 247 248 Args: 249 lines_for_coverage: A dict mapping Java source file paths to lists of line 250 numbers. 251 252 Returns: 253 A dict containing coverage stats for the given dict of files and lines. 254 Contains absolute coverage stats for each file, coverage stats for each 255 file's lines specified in |lines_for_coverage|, line by line coverage 256 for each file, and overall coverage stats for the lines specified in 257 |lines_for_coverage|. 258 """ 259 file_coverage = {} 260 for file_path, line_numbers in lines_for_coverage.iteritems(): 261 file_coverage_dict = self.GetCoverageDictForFile(file_path, line_numbers) 262 if file_coverage_dict: 263 file_coverage[file_path] = file_coverage_dict 264 else: 265 logging.warning( 266 'No code coverage data for %s, skipping.', file_path) 267 268 covered_statuses = [s['incremental'] for s in file_coverage.itervalues()] 269 num_covered_lines = sum(s['covered'] for s in covered_statuses) 270 num_total_lines = sum(s['total'] for s in covered_statuses) 271 return { 272 'files': file_coverage, 273 'patch': { 274 'incremental': { 275 'covered': num_covered_lines, 276 'total': num_total_lines 277 } 278 } 279 } 280 281 def GetCoverageDictForFile(self, file_path, line_numbers): 282 """Returns a dict containing detailed coverage info for the given file. 283 284 Args: 285 file_path: The path to the Java source file that we want to create the 286 coverage dict for. 287 line_numbers: A list of integer line numbers to retrieve additional stats 288 for. 289 290 Returns: 291 A dict containing absolute, incremental, and line by line coverage for 292 a file. 293 """ 294 if file_path not in self._source_to_emma: 295 return None 296 emma_file = self._source_to_emma[file_path] 297 total_line_coverage = self._emma_parser.GetLineCoverage(emma_file) 298 incremental_line_coverage = [line for line in total_line_coverage 299 if line.lineno in line_numbers] 300 line_by_line_coverage = [ 301 { 302 'line': line.source, 303 'coverage': line.covered_status, 304 'changed': line.lineno in line_numbers, 305 'fractional_coverage': line.fractional_line_coverage, 306 } 307 for line in total_line_coverage 308 ] 309 total_covered_lines, total_lines = ( 310 self.GetSummaryStatsForLines(total_line_coverage)) 311 incremental_covered_lines, incremental_total_lines = ( 312 self.GetSummaryStatsForLines(incremental_line_coverage)) 313 314 file_coverage_stats = { 315 'absolute': { 316 'covered': total_covered_lines, 317 'total': total_lines 318 }, 319 'incremental': { 320 'covered': incremental_covered_lines, 321 'total': incremental_total_lines 322 }, 323 'source': line_by_line_coverage, 324 } 325 return file_coverage_stats 326 327 # pylint: disable=no-self-use 328 def GetSummaryStatsForLines(self, line_coverage): 329 """Gets summary stats for a given list of LineCoverage objects. 330 331 Args: 332 line_coverage: A list of LineCoverage objects. 333 334 Returns: 335 A tuple containing the number of lines that are covered and the total 336 number of lines that are executable, respectively 337 """ 338 partially_covered_sum = 0 339 covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0} 340 for line in line_coverage: 341 status = line.covered_status 342 if status == NOT_EXECUTABLE: 343 continue 344 covered_status_totals[status] += 1 345 if status == PARTIALLY_COVERED: 346 partially_covered_sum += line.fractional_line_coverage 347 348 total_covered = covered_status_totals[COVERED] + partially_covered_sum 349 total_lines = sum(covered_status_totals.values()) 350 return total_covered, total_lines 351 352 def _GetSourceFileToEmmaFileDict(self, files): 353 """Gets a dict used to correlate Java source files with EMMA HTML files. 354 355 This method gathers the information needed to correlate EMMA HTML 356 files with Java source files. EMMA XML and plain text reports do not provide 357 line by line coverage data, so HTML reports must be used instead. 358 Unfortunately, the HTML files that are created are given garbage names 359 (i.e 1.html) so we need to manually correlate EMMA HTML files 360 with the original Java source files. 361 362 Args: 363 files: A list of file names for which coverage information is desired. 364 365 Returns: 366 A dict mapping Java source file paths to EMMA HTML file paths. 367 """ 368 # Maps Java source file paths to package names. 369 # Example: /usr/code/file.java -> org.chromium.file.java. 370 source_to_package = {} 371 for file_path in files: 372 package = self.GetPackageNameFromFile(file_path) 373 if package: 374 source_to_package[file_path] = package 375 else: 376 logging.warning("Skipping %s because it doesn\'t have a package " 377 "statement.", file_path) 378 379 # Maps package names to EMMA report HTML files. 380 # Example: org.chromium.file.java -> out/coverage/1a.html. 381 package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict() 382 # Finally, we have a dict mapping Java file paths to EMMA report files. 383 # Example: /usr/code/file.java -> out/coverage/1a.html. 384 source_to_emma = {source: package_to_emma[package] 385 for source, package in source_to_package.iteritems() 386 if package in package_to_emma} 387 return source_to_emma 388 389 @staticmethod 390 def NeedsCoverage(file_path): 391 """Checks to see if the file needs to be analyzed for code coverage. 392 393 Args: 394 file_path: A string representing path to the file. 395 396 Returns: 397 True for Java files that exist, False for all others. 398 """ 399 if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path): 400 return True 401 else: 402 logging.info('Skipping file %s, cannot compute code coverage.', file_path) 403 return False 404 405 @staticmethod 406 def GetPackageNameFromFile(file_path): 407 """Gets the full package name including the file name for a given file path. 408 409 Args: 410 file_path: String representing the path to the Java source file. 411 412 Returns: 413 A string representing the full package name with file name appended or 414 None if there is no package statement in the file. 415 """ 416 with open(file_path) as f: 417 file_content = f.read() 418 package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content) 419 if package_match: 420 package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP) 421 file_name = os.path.basename(file_path) 422 return '%s.%s' % (package, file_name) 423 else: 424 return None 425 426 427def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir): 428 """Generates a coverage report for a given set of lines. 429 430 Writes the results of the coverage analysis to the file specified by 431 |out_file_path|. 432 433 Args: 434 line_coverage_file: The path to a file which contains a dict mapping file 435 names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means 436 that we should compute coverage information on lines 1 - 3 for file1. 437 out_file_path: A string representing the location to write the JSON report. 438 coverage_dir: A string representing the file path where the EMMA 439 HTML coverage files are located (i.e. folder where index.html is located). 440 """ 441 with open(line_coverage_file) as f: 442 potential_files_for_coverage = json.load(f) 443 444 files_for_coverage = {f: lines 445 for f, lines in potential_files_for_coverage.iteritems() 446 if _EmmaCoverageStats.NeedsCoverage(f)} 447 448 coverage_results = {} 449 if files_for_coverage: 450 code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys()) 451 coverage_results = code_coverage.GetCoverageDict(files_for_coverage) 452 else: 453 logging.info('No Java files requiring coverage were included in %s.', 454 line_coverage_file) 455 456 with open(out_file_path, 'w+') as out_status_file: 457 json.dump(coverage_results, out_status_file) 458 459 460def main(): 461 argparser = argparse.ArgumentParser() 462 argparser.add_argument('--out', required=True, type=str, 463 help='Report output file path.') 464 argparser.add_argument('--emma-dir', required=True, type=str, 465 help='EMMA HTML report directory.') 466 argparser.add_argument('--lines-for-coverage-file', required=True, type=str, 467 help='File containing a JSON object. Should contain a ' 468 'dict mapping file names to lists of line numbers of ' 469 'code for which coverage information is desired.') 470 argparser.add_argument('-v', '--verbose', action='count', 471 help='Print verbose log information.') 472 args = argparser.parse_args() 473 run_tests_helper.SetLogLevel(args.verbose) 474 devil_chromium.Initialize() 475 GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir) 476 477 478if __name__ == '__main__': 479 sys.exit(main()) 480