1# Copyright 2020 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Functions to merge multiple JavaScript coverage files into one""" 5 6import base64 7import logging 8import json 9import os 10import sys 11 12_HERE_PATH = os.path.dirname(__file__) 13_THIRD_PARTY_PATH = os.path.normpath( 14 os.path.join(_HERE_PATH, '..', '..', '..', 'third_party')) 15_SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..')) 16sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node')) 17sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage')) 18import node 19import coverage_modules 20 21logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', 22 level=logging.DEBUG) 23 24_PREFIXES_TO_CHECK = ['//', 'import ', '/*', '*'] 25 26 27def _parse_json_file(path): 28 """Opens file and parses data into JSON 29 30 Args: 31 path (str): The path to a JSON file to parse. 32 """ 33 with open(path, 'r') as json_file: 34 return json.load(json_file) 35 36 37def _get_paths_with_suffix(input_dir, suffix): 38 """Gets all JSON files in the input directory. 39 40 Args: 41 input_dir (str): The path to recursively search for 42 JSON files. 43 44 Returns: 45 A list of absolute file paths. 46 """ 47 paths = [] 48 for dir_path, _sub_dirs, file_names in os.walk(input_dir): 49 paths.extend([ 50 os.path.join(dir_path, fn) for fn in file_names 51 if fn.endswith(suffix) 52 ]) 53 return paths 54 55 56def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH): 57 """Extract parsed script contents and write back to original folder 58 structure. 59 60 Args: 61 task_output_dir (str): The output directory for the sharded task. This will 62 contain the raw JavaScript v8 parsed files that are identified by 63 their ".js.json" suffix. 64 65 Returns: 66 The absolute file path to the raw parsed scripts or None if no parsed 67 scripts were identified (or any of the raw data contains invalid JSON). 68 """ 69 _SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,' 70 71 scripts = _get_paths_with_suffix(task_output_dir, '.js.json') 72 output_dir = os.path.join(task_output_dir, 'parsed_scripts') 73 74 # The original file is extracted from the inline sourcemaps, this 75 # information is not available from the coverage data. So we have to 76 # maintain a URL to path map to ensure the coverage data knows the original 77 # source location. 78 url_to_path_map = {} 79 80 if not scripts: 81 return None 82 83 for file_path in scripts: 84 # TODO(crbug.com/1224786): Some of the raw script data is being saved 85 # with a trailing curly brace leading to invalid JSON. Bail out if this 86 # is encountered and ensure we log the file path. 87 script_data = None 88 try: 89 script_data = _parse_json_file(file_path) 90 except ValueError as e: 91 logging.error('Failed to parse %s: %s', file_path, e) 92 return None 93 94 if any(key not in script_data 95 for key in ('url', 'text', 'sourceMapURL')): 96 logging.info('File %s is missing key url, text or sourceMapURL', 97 file_path) 98 continue 99 100 # TODO(crbug/1373753): For now we exclude any sourcemaps that are 0 101 # length and also that don't begin with a data URL designation. 102 if len(script_data['sourceMapURL']) == 0 or not script_data[ 103 'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX): 104 continue 105 106 decoded_sourcemap = base64.b64decode( 107 script_data['sourceMapURL'].replace(_SOURCEMAPPING_DATA_URL_PREFIX, 108 '')) 109 json_sourcemap = json.loads(decoded_sourcemap) 110 if len(json_sourcemap['sources']) == 0: 111 logging.warning('File %s has a valid sourcemap with no sources', 112 file_path) 113 continue 114 115 for source_idx in range(len(json_sourcemap['sources'])): 116 source_path = os.path.relpath( 117 os.path.normpath( 118 os.path.join(json_sourcemap['sourceRoot'], 119 json_sourcemap['sources'][source_idx])), 120 source_dir) 121 source_directory = os.path.join(output_dir, 122 os.path.dirname(source_path)) 123 if not os.path.exists(source_directory): 124 os.makedirs(source_directory) 125 126 with open(os.path.join(output_dir, source_path), 'wb') as f: 127 f.write(script_data['text'].encode('utf8')) 128 129 # Only write the first instance of the sources to the map. 130 # Sourcemaps require stability in their indexing as the mapping 131 # derived are based on the index location of the file in the 132 # "sources" and "sourcesContent" fields. Therefore the first index 133 # of the "sources" field will be the first file that was encountered 134 # during source map generation, i.e. this should be the actual 135 # chromium/src original file. 136 if script_data['url'] not in url_to_path_map: 137 url_to_path_map[script_data['url']] = source_path 138 139 if not url_to_path_map: 140 return None 141 142 with open(os.path.join(output_dir, 'parsed_scripts.json'), 143 'w+', 144 encoding='utf-8') as f: 145 json.dump(url_to_path_map, f) 146 147 return output_dir 148 149def should_exclude(line_contents): 150 """Whether we exclude the line from coverage map.""" 151 line_contents = line_contents.strip() 152 # Exclude empty lines. 153 if line_contents == '': 154 return True 155 156 # Exclude comments and imports. 157 for prefix in _PREFIXES_TO_CHECK: 158 if line_contents.startswith(prefix): 159 return True 160 161 return False 162 163def exclude_uninteresting_lines(coverage_file_path): 164 """Removes lines from Istanbul coverage reports that correspond to lines in 165 the source file that are empty. These lines provide no additional coverage 166 information and in fact inflate the coverage metrics. 167 168 Args: 169 coverage_file_path (str): The path to the merged coverage.json file. 170 """ 171 with open(coverage_file_path, 'r+') as f: 172 coverage = json.load(f) 173 174 def exclude_line(coverage_map, key): 175 """Exclude an individual line from the coverage map. This relies on 176 the key 'statementMap' which maintains a map of statements to lines 177 as well as the key 's' which contains the invocation counts of each 178 line. 179 """ 180 del coverage_map['statementMap'][key] 181 del coverage_map['s'][key] 182 183 for file_path in coverage: 184 istanbul_coverage = coverage[file_path] 185 lines = [] 186 with open(file_path) as fd: 187 lines = fd.readlines() 188 189 # Force list of the keys to allow removal of items whilst iterating. 190 for key in list(istanbul_coverage['statementMap']): 191 statement_map = istanbul_coverage['statementMap'][key] 192 line_num = statement_map['start']['line'] 193 194 assert statement_map['start']['line'] == statement_map['end'][ 195 'line'] 196 197 if should_exclude(lines[line_num - 1]): 198 exclude_line(istanbul_coverage, key) 199 continue 200 201 # Overwrite the current coverage file with new contents. 202 f.seek(0) 203 f.truncate() 204 json.dump(coverage, f) 205 206 207def remap_paths_to_relative(coverage_file_path, chromium_src_dir, build_dir): 208 """Remap paths to be relative to the chromium_src_dir. 209 210 Args: 211 coverage_file_path (str): The path to the merged coverage.json file. 212 chromium_src_dir (str): The absolute location to chromium/src. 213 build_dir (str): The absolute path to the output dir in chromium/src. 214 """ 215 with open(coverage_file_path, 'r+') as f: 216 coverage_json = json.load(f) 217 excluded_paths = 0 218 remapped_paths = 0 219 220 for key in list(coverage_json.keys()): 221 222 if key.startswith(build_dir): 223 del coverage_json[key] 224 excluded_paths += 1 225 continue 226 227 if not key.startswith(chromium_src_dir): 228 del coverage_json[key] 229 excluded_paths += 1 230 continue 231 232 relative_src_path = os.path.relpath(key, chromium_src_dir).replace( 233 '\\', '/') 234 value = coverage_json[key] 235 value['path'] = relative_src_path 236 coverage_json[relative_src_path] = value 237 del coverage_json[key] 238 remapped_paths += 1 239 240 logging.info('Remapped %s paths' % (remapped_paths)) 241 logging.info('Excluded %s paths' % (excluded_paths)) 242 243 # Overwrite the current coverage file with new contents. 244 f.seek(0) 245 f.truncate() 246 json.dump(coverage_json, f) 247 248 249def get_raw_coverage_dirs(task_output_dir): 250 """Returns a list of directories containing raw v8 coverage. 251 252 Args: 253 task_output_dir (str): The output directory for the sharded task. This will 254 contain the raw JavaScript v8 coverage files that are identified by 255 their ".cov.json" suffix. 256 """ 257 coverage_directories = set() 258 for dir_path, _sub_dirs, file_names in os.walk(task_output_dir): 259 for name in file_names: 260 if name.endswith('.cov.json'): 261 coverage_directories.add(dir_path) 262 continue 263 264 return coverage_directories 265 266 267def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir, 268 task_output_dir): 269 """Calls the node helper script convert_to_istanbul.js 270 271 Args: 272 raw_coverage_dirs (list): Directory that contains raw v8 code coverage. 273 source_dir (str): Root directory containing the instrumented source. 274 275 Raises: 276 RuntimeError: If the underlying node command fails. 277 """ 278 return node.RunNode([ 279 os.path.join(_HERE_PATH, 'convert_to_istanbul.js'), 280 '--source-dir', 281 source_dir, 282 '--output-dir', 283 task_output_dir, 284 '--raw-coverage-dirs', 285 *raw_coverage_dirs, 286 ]) 287 288 289def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file): 290 """Merges all disparate istanbul reports into a single report. 291 292 Args: 293 istanbul_coverage_dir (str): Directory containing separate coverage files. 294 source_dir (str): Directory containing instrumented source code. 295 output_file (str): File path to output merged coverage. 296 297 Raises: 298 RuntimeError: If the underlying node command fails. 299 """ 300 return node.RunNode([ 301 coverage_modules.PathToNyc(), 302 'merge', 303 istanbul_coverage_dir, 304 output_file, 305 '--cwd', 306 source_dir, 307 ]) 308 309 310def generate_coverage_reports(coverage_file_dir, output_dir): 311 """Generate a LCOV report. 312 313 Args: 314 coverage_file_dir (str): Directory containing the coverage.json file. 315 output_dir (str): Directory to output the reports. 316 """ 317 return node.RunNode([ 318 coverage_modules.PathToNyc(), 319 'report', 320 '--temp-dir', 321 coverage_file_dir, 322 '--reporter', 323 'lcov', 324 '--report-dir', 325 output_dir, 326 '--exclude-after-remap', 327 'false', 328 ]) 329