• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Functions to merge multiple JavaScript coverage files into one"""
5
6import base64
7import logging
8import json
9import os
10import sys
11
12_HERE_PATH = os.path.dirname(__file__)
13_THIRD_PARTY_PATH = os.path.normpath(
14    os.path.join(_HERE_PATH, '..', '..', '..', 'third_party'))
15_SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..'))
16
17# //third_party/node imports.
18sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node'))
19import node
20
21# //third_party/js_code_coverage imports.
22sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage'))
23import coverage_modules
24
25logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s',
26                    level=logging.DEBUG)
27
28_PREFIXES_TO_CHECK = ['//', 'import ', '/*', '*']
29
30
31def _parse_json_file(path):
32  """Opens file and parses data into JSON
33
34  Args:
35    path (str): The path to a JSON file to parse.
36  """
37  with open(path, 'r') as json_file:
38    # Some JSON files erroroneously end with double curly brace, prefer to
39    # strip it out instead of throwing an error message.
40    json_string = json_file.read()
41    if json_string[0] == '{' and json_string[-2:] == '}}':
42      logging.warning('Found additional trailing curly brace for path: %s',
43                      path)
44      return json.loads(json_string[:-1])
45    return json.loads(json_string)
46
47
48def _get_paths_with_suffix(input_dir, suffix):
49  """Gets all JSON files in the input directory.
50
51  Args:
52    input_dir (str): The path to recursively search for
53        JSON files.
54
55  Returns:
56    A list of absolute file paths.
57  """
58  paths = []
59  for dir_path, _sub_dirs, file_names in os.walk(input_dir):
60    paths.extend([
61        os.path.join(dir_path, fn) for fn in file_names if fn.endswith(suffix)
62    ])
63  return paths
64
65
66def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH):
67  """Extract parsed script contents and write back to original folder
68  structure.
69
70  Args:
71    task_output_dir (str): The output directory for the sharded task. This will
72        contain the raw JavaScript v8 parsed files that are identified by
73        their ".js.json" suffix.
74
75  Returns:
76    The absolute file path to the raw parsed scripts or None if no parsed
77    scripts were identified (or any of the raw data contains invalid JSON).
78  """
79  _SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,'
80
81  scripts = _get_paths_with_suffix(task_output_dir, '.js.json')
82  output_dir = os.path.join(task_output_dir, 'parsed_scripts')
83
84  # The original file is extracted from the inline sourcemaps, this
85  # information is not available from the coverage data. So we have to
86  # maintain a URL to path map to ensure the coverage data knows the original
87  # source location.
88  url_to_path_map = {}
89
90  if not scripts:
91    return None
92
93  for file_path in scripts:
94    script_data = None
95    try:
96      script_data = _parse_json_file(file_path)
97    except ValueError as e:
98      logging.error('Failed to parse %s: %s', file_path, e)
99      return None
100
101    if any(key not in script_data for key in ('url', 'text', 'sourceMapURL')):
102      logging.info('File %s is missing key url, text or sourceMapURL',
103                   file_path)
104      continue
105
106    # TODO(crbug.com/40242180): For now we exclude any sourcemaps that are 0
107    # length and also that don't begin with a data URL designation.
108    if len(script_data['sourceMapURL']) == 0 or not script_data[
109        'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX):
110      continue
111
112    decoded_sourcemap = base64.b64decode(script_data['sourceMapURL'].replace(
113        _SOURCEMAPPING_DATA_URL_PREFIX, ''))
114    json_sourcemap = json.loads(decoded_sourcemap)
115    if len(json_sourcemap['sources']) == 0:
116      logging.warning('File %s has a valid sourcemap with no sources',
117                      file_path)
118      continue
119
120    for source_idx in range(len(json_sourcemap['sources'])):
121      source_path = os.path.relpath(
122          os.path.normpath(
123              os.path.join(json_sourcemap['sourceRoot'],
124                           json_sourcemap['sources'][source_idx])), source_dir)
125      source_directory = os.path.join(output_dir, os.path.dirname(source_path))
126      if not os.path.exists(source_directory):
127        os.makedirs(source_directory)
128
129      with open(os.path.join(output_dir, source_path), 'wb') as f:
130        f.write(script_data['text'].encode('utf8'))
131
132      # Only write the first instance of the sources to the map.
133      # Sourcemaps require stability in their indexing as the mapping
134      # derived are based on the index location of the file in the
135      # "sources" and "sourcesContent" fields. Therefore the first index
136      # of the "sources" field will be the first file that was encountered
137      # during source map generation, i.e. this should be the actual
138      # chromium/src original file.
139      if script_data['url'] not in url_to_path_map:
140        url_to_path_map[script_data['url']] = source_path
141
142  if not url_to_path_map:
143    return None
144
145  with open(os.path.join(output_dir, 'parsed_scripts.json'),
146            'w+',
147            encoding='utf-8') as f:
148    json.dump(url_to_path_map, f)
149
150  return output_dir
151
152
153def should_exclude(line_contents):
154  """Whether we exclude the line from coverage map."""
155  line_contents = line_contents.strip()
156  # Exclude empty lines.
157  if line_contents == '':
158    return True
159
160  # Exclude comments and imports.
161  for prefix in _PREFIXES_TO_CHECK:
162    if line_contents.startswith(prefix):
163      return True
164
165  return False
166
167
168def exclude_uninteresting_lines(coverage_file_path):
169  """Removes lines from Istanbul coverage reports that correspond to lines in
170  the source file that are empty. These lines provide no additional coverage
171  information and in fact inflate the coverage metrics.
172
173  Args:
174    coverage_file_path (str): The path to the merged coverage.json file.
175  """
176  with open(coverage_file_path, 'r+') as f:
177    coverage = json.load(f)
178
179    def exclude_line(coverage_map, key):
180      """Exclude an individual line from the coverage map. This relies on
181            the key 'statementMap' which maintains a map of statements to lines
182            as well as the key 's' which contains the invocation counts of each
183            line.
184            """
185      del coverage_map['statementMap'][key]
186      del coverage_map['s'][key]
187
188    for file_path in coverage:
189      istanbul_coverage = coverage[file_path]
190      lines = []
191      with open(file_path) as fd:
192        lines = fd.readlines()
193
194      # Force list of the keys to allow removal of items whilst iterating.
195      for key in list(istanbul_coverage['statementMap']):
196        statement_map = istanbul_coverage['statementMap'][key]
197        line_num = statement_map['start']['line']
198
199        assert statement_map['start']['line'] == statement_map['end']['line']
200
201        if should_exclude(lines[line_num - 1]):
202          exclude_line(istanbul_coverage, key)
203          continue
204
205    # Overwrite the current coverage file with new contents.
206    f.seek(0)
207    f.truncate()
208    json.dump(coverage, f)
209
210
211def remap_paths_to_relative(coverage_file_path, chromium_src_dir, build_dir):
212  """Remap paths to be relative to the chromium_src_dir.
213
214  Args:
215    coverage_file_path (str): The path to the merged coverage.json file.
216    chromium_src_dir (str): The absolute location to chromium/src.
217    build_dir (str): The absolute path to the output dir in chromium/src.
218  """
219  with open(coverage_file_path, 'r+') as f:
220    coverage_json = json.load(f)
221    excluded_paths = 0
222    remapped_paths = 0
223
224    for key in list(coverage_json.keys()):
225
226      if key.startswith(build_dir):
227        del coverage_json[key]
228        excluded_paths += 1
229        continue
230
231      if not key.startswith(chromium_src_dir):
232        del coverage_json[key]
233        excluded_paths += 1
234        continue
235
236      relative_src_path = os.path.relpath(key,
237                                          chromium_src_dir).replace('\\', '/')
238      value = coverage_json[key]
239      value['path'] = relative_src_path
240      coverage_json[relative_src_path] = value
241      del coverage_json[key]
242      remapped_paths += 1
243
244    logging.info('Remapped %s paths', remapped_paths)
245    logging.info('Excluded %s paths', excluded_paths)
246
247    # Overwrite the current coverage file with new contents.
248    f.seek(0)
249    f.truncate()
250    json.dump(coverage_json, f)
251
252
253def get_raw_coverage_dirs(task_output_dir):
254  """Returns a list of directories containing raw v8 coverage.
255
256  Args:
257    task_output_dir (str): The output directory for the sharded task. This will
258        contain the raw JavaScript v8 coverage files that are identified by
259        their ".cov.json" suffix.
260  """
261  coverage_directories = set()
262  for dir_path, _sub_dirs, file_names in os.walk(task_output_dir):
263    for name in file_names:
264      if name.endswith('.cov.json'):
265        coverage_directories.add(dir_path)
266        continue
267
268  return coverage_directories
269
270
271def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir,
272                                     task_output_dir):
273  """Calls the node helper script convert_to_istanbul.js
274
275  Args:
276    raw_coverage_dirs (list): Directory that contains raw v8 code coverage.
277    source_dir (str): Root directory containing the instrumented source.
278
279  Raises:
280    RuntimeError: If the underlying node command fails.
281  """
282  stdout = node.RunNode([
283      os.path.join(_HERE_PATH, 'convert_to_istanbul.js'),
284      '--source-dir',
285      source_dir,
286      '--output-dir',
287      task_output_dir,
288      '--raw-coverage-dirs',
289      *raw_coverage_dirs,
290  ])
291  logging.info(stdout)
292
293
294def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file):
295  """Merges all disparate istanbul reports into a single report.
296
297  Args:
298    istanbul_coverage_dir (str): Directory containing separate coverage files.
299    source_dir (str): Directory containing instrumented source code.
300    output_file (str): File path to output merged coverage.
301
302  Raises:
303    RuntimeError: If the underlying node command fails.
304  """
305  return node.RunNode([
306      coverage_modules.PathToNyc(),
307      'merge',
308      istanbul_coverage_dir,
309      output_file,
310      '--cwd',
311      source_dir,
312  ])
313
314
315def generate_coverage_reports(coverage_file_dir, output_dir):
316  """Generate a LCOV report.
317
318  Args:
319    coverage_file_dir (str): Directory containing the coverage.json file.
320    output_dir (str): Directory to output the reports.
321  """
322  return node.RunNode([
323      coverage_modules.PathToNyc(),
324      'report',
325      '--temp-dir',
326      coverage_file_dir,
327      '--reporter',
328      'lcov',
329      '--report-dir',
330      output_dir,
331      '--exclude-after-remap',
332      'false',
333  ])
334