1# Copyright 2020 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Functions to merge multiple JavaScript coverage files into one""" 5 6import base64 7import logging 8import json 9import os 10import sys 11 12_HERE_PATH = os.path.dirname(__file__) 13_THIRD_PARTY_PATH = os.path.normpath( 14 os.path.join(_HERE_PATH, '..', '..', '..', 'third_party')) 15_SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..')) 16 17# //third_party/node imports. 18sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node')) 19import node 20 21# //third_party/js_code_coverage imports. 22sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage')) 23import coverage_modules 24 25logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', 26 level=logging.DEBUG) 27 28_PREFIXES_TO_CHECK = ['//', 'import ', '/*', '*'] 29 30 31def _parse_json_file(path): 32 """Opens file and parses data into JSON 33 34 Args: 35 path (str): The path to a JSON file to parse. 36 """ 37 with open(path, 'r') as json_file: 38 # Some JSON files erroroneously end with double curly brace, prefer to 39 # strip it out instead of throwing an error message. 40 json_string = json_file.read() 41 if json_string[0] == '{' and json_string[-2:] == '}}': 42 logging.warning('Found additional trailing curly brace for path: %s', 43 path) 44 return json.loads(json_string[:-1]) 45 return json.loads(json_string) 46 47 48def _get_paths_with_suffix(input_dir, suffix): 49 """Gets all JSON files in the input directory. 50 51 Args: 52 input_dir (str): The path to recursively search for 53 JSON files. 54 55 Returns: 56 A list of absolute file paths. 57 """ 58 paths = [] 59 for dir_path, _sub_dirs, file_names in os.walk(input_dir): 60 paths.extend([ 61 os.path.join(dir_path, fn) for fn in file_names if fn.endswith(suffix) 62 ]) 63 return paths 64 65 66def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH): 67 """Extract parsed script contents and write back to original folder 68 structure. 69 70 Args: 71 task_output_dir (str): The output directory for the sharded task. This will 72 contain the raw JavaScript v8 parsed files that are identified by 73 their ".js.json" suffix. 74 75 Returns: 76 The absolute file path to the raw parsed scripts or None if no parsed 77 scripts were identified (or any of the raw data contains invalid JSON). 78 """ 79 _SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,' 80 81 scripts = _get_paths_with_suffix(task_output_dir, '.js.json') 82 output_dir = os.path.join(task_output_dir, 'parsed_scripts') 83 84 # The original file is extracted from the inline sourcemaps, this 85 # information is not available from the coverage data. So we have to 86 # maintain a URL to path map to ensure the coverage data knows the original 87 # source location. 88 url_to_path_map = {} 89 90 if not scripts: 91 return None 92 93 for file_path in scripts: 94 script_data = None 95 try: 96 script_data = _parse_json_file(file_path) 97 except ValueError as e: 98 logging.error('Failed to parse %s: %s', file_path, e) 99 return None 100 101 if any(key not in script_data for key in ('url', 'text', 'sourceMapURL')): 102 logging.info('File %s is missing key url, text or sourceMapURL', 103 file_path) 104 continue 105 106 # TODO(crbug.com/40242180): For now we exclude any sourcemaps that are 0 107 # length and also that don't begin with a data URL designation. 108 if len(script_data['sourceMapURL']) == 0 or not script_data[ 109 'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX): 110 continue 111 112 decoded_sourcemap = base64.b64decode(script_data['sourceMapURL'].replace( 113 _SOURCEMAPPING_DATA_URL_PREFIX, '')) 114 json_sourcemap = json.loads(decoded_sourcemap) 115 if len(json_sourcemap['sources']) == 0: 116 logging.warning('File %s has a valid sourcemap with no sources', 117 file_path) 118 continue 119 120 for source_idx in range(len(json_sourcemap['sources'])): 121 source_path = os.path.relpath( 122 os.path.normpath( 123 os.path.join(json_sourcemap['sourceRoot'], 124 json_sourcemap['sources'][source_idx])), source_dir) 125 source_directory = os.path.join(output_dir, os.path.dirname(source_path)) 126 if not os.path.exists(source_directory): 127 os.makedirs(source_directory) 128 129 with open(os.path.join(output_dir, source_path), 'wb') as f: 130 f.write(script_data['text'].encode('utf8')) 131 132 # Only write the first instance of the sources to the map. 133 # Sourcemaps require stability in their indexing as the mapping 134 # derived are based on the index location of the file in the 135 # "sources" and "sourcesContent" fields. Therefore the first index 136 # of the "sources" field will be the first file that was encountered 137 # during source map generation, i.e. this should be the actual 138 # chromium/src original file. 139 if script_data['url'] not in url_to_path_map: 140 url_to_path_map[script_data['url']] = source_path 141 142 if not url_to_path_map: 143 return None 144 145 with open(os.path.join(output_dir, 'parsed_scripts.json'), 146 'w+', 147 encoding='utf-8') as f: 148 json.dump(url_to_path_map, f) 149 150 return output_dir 151 152 153def should_exclude(line_contents): 154 """Whether we exclude the line from coverage map.""" 155 line_contents = line_contents.strip() 156 # Exclude empty lines. 157 if line_contents == '': 158 return True 159 160 # Exclude comments and imports. 161 for prefix in _PREFIXES_TO_CHECK: 162 if line_contents.startswith(prefix): 163 return True 164 165 return False 166 167 168def exclude_uninteresting_lines(coverage_file_path): 169 """Removes lines from Istanbul coverage reports that correspond to lines in 170 the source file that are empty. These lines provide no additional coverage 171 information and in fact inflate the coverage metrics. 172 173 Args: 174 coverage_file_path (str): The path to the merged coverage.json file. 175 """ 176 with open(coverage_file_path, 'r+') as f: 177 coverage = json.load(f) 178 179 def exclude_line(coverage_map, key): 180 """Exclude an individual line from the coverage map. This relies on 181 the key 'statementMap' which maintains a map of statements to lines 182 as well as the key 's' which contains the invocation counts of each 183 line. 184 """ 185 del coverage_map['statementMap'][key] 186 del coverage_map['s'][key] 187 188 for file_path in coverage: 189 istanbul_coverage = coverage[file_path] 190 lines = [] 191 with open(file_path) as fd: 192 lines = fd.readlines() 193 194 # Force list of the keys to allow removal of items whilst iterating. 195 for key in list(istanbul_coverage['statementMap']): 196 statement_map = istanbul_coverage['statementMap'][key] 197 line_num = statement_map['start']['line'] 198 199 assert statement_map['start']['line'] == statement_map['end']['line'] 200 201 if should_exclude(lines[line_num - 1]): 202 exclude_line(istanbul_coverage, key) 203 continue 204 205 # Overwrite the current coverage file with new contents. 206 f.seek(0) 207 f.truncate() 208 json.dump(coverage, f) 209 210 211def remap_paths_to_relative(coverage_file_path, chromium_src_dir, build_dir): 212 """Remap paths to be relative to the chromium_src_dir. 213 214 Args: 215 coverage_file_path (str): The path to the merged coverage.json file. 216 chromium_src_dir (str): The absolute location to chromium/src. 217 build_dir (str): The absolute path to the output dir in chromium/src. 218 """ 219 with open(coverage_file_path, 'r+') as f: 220 coverage_json = json.load(f) 221 excluded_paths = 0 222 remapped_paths = 0 223 224 for key in list(coverage_json.keys()): 225 226 if key.startswith(build_dir): 227 del coverage_json[key] 228 excluded_paths += 1 229 continue 230 231 if not key.startswith(chromium_src_dir): 232 del coverage_json[key] 233 excluded_paths += 1 234 continue 235 236 relative_src_path = os.path.relpath(key, 237 chromium_src_dir).replace('\\', '/') 238 value = coverage_json[key] 239 value['path'] = relative_src_path 240 coverage_json[relative_src_path] = value 241 del coverage_json[key] 242 remapped_paths += 1 243 244 logging.info('Remapped %s paths', remapped_paths) 245 logging.info('Excluded %s paths', excluded_paths) 246 247 # Overwrite the current coverage file with new contents. 248 f.seek(0) 249 f.truncate() 250 json.dump(coverage_json, f) 251 252 253def get_raw_coverage_dirs(task_output_dir): 254 """Returns a list of directories containing raw v8 coverage. 255 256 Args: 257 task_output_dir (str): The output directory for the sharded task. This will 258 contain the raw JavaScript v8 coverage files that are identified by 259 their ".cov.json" suffix. 260 """ 261 coverage_directories = set() 262 for dir_path, _sub_dirs, file_names in os.walk(task_output_dir): 263 for name in file_names: 264 if name.endswith('.cov.json'): 265 coverage_directories.add(dir_path) 266 continue 267 268 return coverage_directories 269 270 271def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir, 272 task_output_dir): 273 """Calls the node helper script convert_to_istanbul.js 274 275 Args: 276 raw_coverage_dirs (list): Directory that contains raw v8 code coverage. 277 source_dir (str): Root directory containing the instrumented source. 278 279 Raises: 280 RuntimeError: If the underlying node command fails. 281 """ 282 stdout = node.RunNode([ 283 os.path.join(_HERE_PATH, 'convert_to_istanbul.js'), 284 '--source-dir', 285 source_dir, 286 '--output-dir', 287 task_output_dir, 288 '--raw-coverage-dirs', 289 *raw_coverage_dirs, 290 ]) 291 logging.info(stdout) 292 293 294def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file): 295 """Merges all disparate istanbul reports into a single report. 296 297 Args: 298 istanbul_coverage_dir (str): Directory containing separate coverage files. 299 source_dir (str): Directory containing instrumented source code. 300 output_file (str): File path to output merged coverage. 301 302 Raises: 303 RuntimeError: If the underlying node command fails. 304 """ 305 return node.RunNode([ 306 coverage_modules.PathToNyc(), 307 'merge', 308 istanbul_coverage_dir, 309 output_file, 310 '--cwd', 311 source_dir, 312 ]) 313 314 315def generate_coverage_reports(coverage_file_dir, output_dir): 316 """Generate a LCOV report. 317 318 Args: 319 coverage_file_dir (str): Directory containing the coverage.json file. 320 output_dir (str): Directory to output the reports. 321 """ 322 return node.RunNode([ 323 coverage_modules.PathToNyc(), 324 'report', 325 '--temp-dir', 326 coverage_file_dir, 327 '--reporter', 328 'lcov', 329 '--report-dir', 330 output_dir, 331 '--exclude-after-remap', 332 'false', 333 ]) 334