• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright 2018 The Chromium Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Generates a `foo.owners` file for a `fuzzer_test("foo", ...)` GN target.
7
8By default, the closest `OWNERS` file is located and copied, except for
9`//OWNERS` and `//third_party/OWNERS` for fear of spamming top-level owners with
10fuzzer bugs they know nothing about.
11
12If no such file can be located, then we attempt to use `git blame` to identify
13the author of the main fuzzer `.cc` file. Note that this does not work for code
14in git submodules (e.g. most code in `third_party/`), in which case we generate
15an empty file.
16
17Invoked by GN from `fuzzer_test.gni`.
18"""
19
20import argparse
21import os
22import re
23import subprocess
24import sys
25
26from typing import Optional
27
28AUTHOR_REGEX = re.compile('author-mail <(.+)>')
29CHROMIUM_SRC_DIR = os.path.dirname(
30    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
31OWNERS_FILENAME = 'OWNERS'
32THIRD_PARTY = 'third_party'
33THIRD_PARTY_SEARCH_STRING = THIRD_PARTY + os.path.sep
34
35
36def GetAuthorFromGitBlame(blame_output):
37  """Return author from git blame output."""
38  for line in blame_output.decode('utf-8').splitlines():
39    m = AUTHOR_REGEX.match(line)
40    if m:
41      return m.group(1)
42
43  return None
44
45
46def GetGitCommand():
47  """Returns a git command that does not need to be executed using shell=True.
48  On non-Windows platforms: 'git'. On Windows: 'git.bat'.
49  """
50  return 'git.bat' if sys.platform == 'win32' else 'git'
51
52
53def GetOwnersFromOwnersFile(source: str) -> Optional[str]:
54  """Finds the owners of `source` from the closest OWNERS file.
55
56  Both //OWNERS or */third_party/OWNERS are ignored so as not to spam top-level
57  owners with unowned fuzzer bugs.
58
59  Args:
60    source: Relative path from the chromium src directory to the target source
61      file.
62
63  Returns:
64    The entire contents of the closest OWNERS file. That is, the first OWNERS
65    file encountered while walking up through the ancestor directories of the
66    target source file.
67  """
68  # TODO(crbug.com/41486296): Use `pathlib` instead of `os.path` for
69  # better ergonomics and robustness.
70  dirs = source.split(os.path.sep)[:-1]
71
72  # Note: We never test for //OWNERS, i.e. when `dirs` is empty.
73  while dirs:
74    # Never return the contents of */third_party/OWNERS, and stop searching.
75    if dirs[-1] == THIRD_PARTY:
76      break
77
78    owners_file_path = os.path.join(CHROMIUM_SRC_DIR, *dirs, OWNERS_FILENAME)
79    if os.path.exists(owners_file_path):
80      # TODO(crbug.com/41486296): OWNERS files can reference others,
81      # have per-file directives, etc. We should be cleverer than this.
82      return open(owners_file_path).read()
83
84    dirs.pop()
85
86  return None
87
88
89def GetOwnersForFuzzer(sources):
90  """Return owners given a list of sources as input."""
91  if not sources:
92    return None
93
94  for source in sources:
95    full_source_path = os.path.join(CHROMIUM_SRC_DIR, source)
96    if not os.path.exists(full_source_path):
97      continue
98
99    with open(full_source_path, 'r') as source_file_handle:
100      source_content = source_file_handle.read()
101
102    if SubStringExistsIn(
103        ['FuzzOneInput', 'LLVMFuzzerTestOneInput', 'PROTO_FUZZER'],
104        source_content):
105      # Found the fuzzer source (and not dependency of fuzzer).
106
107      # Try finding the closest OWNERS file first.
108      owners = GetOwnersFromOwnersFile(source)
109      if owners:
110        return owners
111
112      git_dir = os.path.join(CHROMIUM_SRC_DIR, '.git')
113      git_command = GetGitCommand()
114      is_git_file = bool(
115          subprocess.check_output(
116              [git_command, '--git-dir', git_dir, 'ls-files', source],
117              cwd=CHROMIUM_SRC_DIR))
118      if not is_git_file:
119        # File is not in working tree. If no OWNERS file was found, we cannot
120        # tell who it belongs to.
121        return None
122
123      # `git log --follow` and `--reverse` don't work together and using just
124      # `--follow` is too slow. Make a best estimate with an assumption that the
125      # original author has authored the copyright block, which (generally) does
126      # not change even with file rename/move. Look at the last line of the
127      # block, as a copyright block update sweep in late 2022 made one person
128      # responsible for changing the first line of every copyright block in the
129      # repo, and it would be best to avoid assigning ownership of every fuzz
130      # issue predating that year to that one person.
131      blame_output = subprocess.check_output([
132          git_command, '--git-dir', git_dir, 'blame', '--porcelain', '-L3,3',
133          source
134      ],
135                                             cwd=CHROMIUM_SRC_DIR)
136      return GetAuthorFromGitBlame(blame_output)
137
138  return None
139
140
141def FindGroupsAndDepsInDeps(deps_list, build_dir):
142  """Return list of groups, as well as their deps, from a list of deps."""
143  groups = []
144  deps_for_groups = {}
145  for deps in deps_list:
146    output = subprocess.check_output(
147        [GNPath(), 'desc', '--fail-on-unused-args', build_dir,
148         deps]).decode('utf8')
149    needle = 'Type: '
150    for line in output.splitlines():
151      if needle and not line.startswith(needle):
152        continue
153      if needle == 'Type: ':
154        if line != 'Type: group':
155          break
156        groups.append(deps)
157        assert deps not in deps_for_groups
158        deps_for_groups[deps] = []
159        needle = 'Direct dependencies'
160      elif needle == 'Direct dependencies':
161        needle = ''
162      else:
163        assert needle == ''
164        if needle == line:
165          break
166        deps_for_groups[deps].append(line.strip())
167
168  return groups, deps_for_groups
169
170
171def TraverseGroups(deps_list, build_dir):
172  """Filter out groups from a deps list. Add groups' direct dependencies."""
173  full_deps_set = set(deps_list)
174  deps_to_check = full_deps_set.copy()
175
176  # Keep track of groups to break circular dependendies, if any.
177  seen_groups = set()
178
179  while deps_to_check:
180    # Look for groups from the deps set.
181    groups, deps_for_groups = FindGroupsAndDepsInDeps(deps_to_check, build_dir)
182    groups = set(groups).difference(seen_groups)
183    if not groups:
184      break
185
186    # Update sets. Filter out groups from the full deps set.
187    full_deps_set.difference_update(groups)
188    deps_to_check.clear()
189    seen_groups.update(groups)
190
191    # Get the direct dependencies, and filter out known groups there too.
192    for group in groups:
193      deps_to_check.update(deps_for_groups[group])
194    deps_to_check.difference_update(seen_groups)
195    full_deps_set.update(deps_to_check)
196  return list(full_deps_set)
197
198
199def GetSourcesFromDeps(deps_list, build_dir):
200  """Return list of sources from parsing deps."""
201  if not deps_list:
202    return None
203
204  full_deps_list = TraverseGroups(deps_list, build_dir)
205  all_sources = []
206  for deps in full_deps_list:
207    output = subprocess.check_output(
208        [GNPath(), 'desc', '--fail-on-unused-args', build_dir, deps, 'sources'])
209    for source in bytes(output).decode('utf8').splitlines():
210      if source.startswith('//'):
211        source = source[2:]
212      all_sources.append(source)
213
214  return all_sources
215
216
217def GNPath():
218  if sys.platform.startswith('linux'):
219    subdir, exe = 'linux64', 'gn'
220  elif sys.platform == 'darwin':
221    subdir, exe = 'mac', 'gn'
222  else:
223    subdir, exe = 'win', 'gn.exe'
224
225  return os.path.join(CHROMIUM_SRC_DIR, 'buildtools', subdir, exe)
226
227
228def SubStringExistsIn(substring_list, string):
229  """Return true if one of the substring in the list is found in |string|."""
230  return any(substring in string for substring in substring_list)
231
232
233def main():
234  parser = argparse.ArgumentParser(description='Generate fuzzer owners file.')
235  parser.add_argument('--owners', required=True)
236  parser.add_argument('--build-dir')
237  parser.add_argument('--deps', nargs='+')
238  parser.add_argument('--sources', nargs='+')
239  args = parser.parse_args()
240
241  # Generate owners file.
242  with open(args.owners, 'w') as owners_file:
243    # If we found an owner, then write it to file.
244    # Otherwise, leave empty file to keep ninja happy.
245    owners = GetOwnersForFuzzer(args.sources)
246    if owners:
247      owners_file.write(owners)
248      return
249
250    # Could not determine owners from |args.sources|.
251    # So, try parsing sources from |args.deps|.
252    deps_sources = GetSourcesFromDeps(args.deps, args.build_dir)
253    owners = GetOwnersForFuzzer(deps_sources)
254    if owners:
255      owners_file.write(owners)
256
257
258if __name__ == '__main__':
259  main()
260