• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Checks third-party licenses for the purposes of the Android WebView build.
7
8The Android tree includes a snapshot of Chromium in order to power the system
9WebView.  This tool checks that all code uses open-source licenses compatible
10with Android, and that we meet the requirements of those licenses. It can also
11be used to generate an Android NOTICE file for the third-party code.
12
13It makes use of src/tools/licenses.py and the README.chromium files on which
14it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15which whitelists indicidual files which contain third-party code but which
16aren't in a third-party directory with a README.chromium file.
17"""
18
19import glob
20import imp
21import optparse
22import os
23import re
24import subprocess
25import sys
26import textwrap
27
28
29REPOSITORY_ROOT = os.path.abspath(os.path.join(
30    os.path.dirname(__file__), '..', '..'))
31
32# Import third_party/PRESUBMIT.py via imp to avoid importing a random
33# PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
34sys.dont_write_bytecode = True
35third_party = \
36  imp.load_source('PRESUBMIT', \
37                  os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
38
39sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
40import licenses
41
42import known_issues
43
44class InputApi(object):
45  def __init__(self):
46    self.re = re
47
48def GetIncompatibleDirectories():
49  """Gets a list of third-party directories which use licenses incompatible
50  with Android. This is used by the snapshot tool.
51  Returns:
52    A list of directories.
53  """
54
55  result = []
56  for directory in _FindThirdPartyDirs():
57    if directory in known_issues.KNOWN_ISSUES:
58      result.append(directory)
59      continue
60    try:
61      metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
62                                   require_license_file=False)
63    except licenses.LicenseError as e:
64      print 'Got LicenseError while scanning ' + directory
65      raise
66    if metadata.get('License Android Compatible', 'no').upper() == 'YES':
67      continue
68    license = re.split(' [Ll]icenses?$', metadata['License'])[0]
69    if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license):
70      result.append(directory)
71  return result
72
73def GetUnknownIncompatibleDirectories():
74  """Gets a list of third-party directories which use licenses incompatible
75  with Android which are not present in the known_issues.py file.
76  This is used by the AOSP bot.
77  Returns:
78    A list of directories.
79  """
80  incompatible_directories = frozenset(GetIncompatibleDirectories())
81  known_incompatible = []
82  for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems():
83    for exclude in exclude_list:
84      if glob.has_magic(exclude):
85        exclude_dirname = os.path.dirname(exclude)
86        if glob.has_magic(exclude_dirname):
87          print ('Exclude path %s contains an unexpected glob expression,' \
88                 ' skipping.' % exclude)
89        exclude = exclude_dirname
90      known_incompatible.append(os.path.normpath(os.path.join(path, exclude)))
91  known_incompatible = frozenset(known_incompatible)
92  return incompatible_directories.difference(known_incompatible)
93
94
95class ScanResult(object):
96  Ok, Warnings, Errors = range(3)
97
98def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
99  """Checks that all files which are not in a listed third-party directory,
100  and which do not use the standard Chromium license, are whitelisted.
101  Args:
102    excluded_dirs_list: The list of directories to exclude from scanning.
103    whitelisted_files: The whitelist of files.
104  Returns:
105    ScanResult.Ok if all files with non-standard license headers are whitelisted
106    and the whitelist contains no stale entries;
107    ScanResult.Warnings if there are stale entries;
108    ScanResult.Errors if new non-whitelisted entries found.
109  """
110
111  excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d]
112  # Using a common pattern for third-partyies makes the ignore regexp shorter
113  excluded_dirs_list.append('third_party')
114  # VCS dirs
115  excluded_dirs_list.append('.git')
116  excluded_dirs_list.append('.svn')
117  # Build output
118  excluded_dirs_list.append('out/Debug')
119  excluded_dirs_list.append('out/Release')
120  # 'Copyright' appears in license agreements
121  excluded_dirs_list.append('chrome/app/resources')
122  # Quickoffice js files from internal src used on buildbots. crbug.com/350472.
123  excluded_dirs_list.append('chrome/browser/resources/chromeos/quickoffice')
124  # This is a test output directory
125  excluded_dirs_list.append('chrome/tools/test/reference_build')
126  # blink style copy right headers.
127  excluded_dirs_list.append('content/shell/renderer/test_runner')
128  # blink style copy right headers.
129  excluded_dirs_list.append('content/shell/tools/plugin')
130  # This is tests directory, doesn't exist in the snapshot
131  excluded_dirs_list.append('content/test/data')
132  # This is a tests directory that doesn't exist in the shipped product.
133  excluded_dirs_list.append('gin/test')
134  # This is a test output directory
135  excluded_dirs_list.append('data/dom_perf')
136  # This is a tests directory that doesn't exist in the shipped product.
137  excluded_dirs_list.append('tools/perf/page_sets')
138  excluded_dirs_list.append('tools/perf/page_sets/tough_animation_cases')
139  # Histogram tools, doesn't exist in the snapshot
140  excluded_dirs_list.append('tools/histograms')
141  # Swarming tools, doesn't exist in the snapshot
142  excluded_dirs_list.append('tools/swarming_client')
143  # Arm sysroot tools, doesn't exist in the snapshot
144  excluded_dirs_list.append('arm-sysroot')
145  # Data is not part of open source chromium, but are included on some bots.
146  excluded_dirs_list.append('data')
147  # This is not part of open source chromium, but are included on some bots.
148  excluded_dirs_list.append('skia/tools/clusterfuzz-data')
149
150  args = ['android_webview/tools/find_copyrights.pl',
151          '.'
152          ] + excluded_dirs_list
153  p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
154  lines = p.communicate()[0].splitlines()
155
156  offending_files = []
157  allowed_copyrights = '^(?:\*No copyright\*' \
158      '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
159      'All rights reserved.*)$'
160  allowed_copyrights_re = re.compile(allowed_copyrights)
161  for l in lines:
162    entries = l.split('\t')
163    if entries[1] == "GENERATED FILE":
164      continue
165    copyrights = entries[1].split(' / ')
166    for c in copyrights:
167      if c and not allowed_copyrights_re.match(c):
168        offending_files.append(os.path.normpath(entries[0]))
169        break
170
171  unknown = set(offending_files) - set(whitelisted_files)
172  if unknown:
173    print 'The following files contain a third-party license but are not in ' \
174          'a listed third-party directory and are not whitelisted. You must ' \
175          'add the following files to the whitelist.\n%s' % \
176          '\n'.join(sorted(unknown))
177
178  stale = set(whitelisted_files) - set(offending_files)
179  if stale:
180    print 'The following files are whitelisted unnecessarily. You must ' \
181          'remove the following files from the whitelist.\n%s' % \
182          '\n'.join(sorted(stale))
183  missing = [f for f in whitelisted_files if not os.path.exists(f)]
184  if missing:
185    print 'The following files are whitelisted, but do not exist.\n%s' % \
186        '\n'.join(sorted(missing))
187
188  if unknown:
189    return ScanResult.Errors
190  elif stale or missing:
191    return ScanResult.Warnings
192  else:
193    return ScanResult.Ok
194
195
196def _ReadFile(path):
197  """Reads a file from disk.
198  Args:
199    path: The path of the file to read, relative to the root of the repository.
200  Returns:
201    The contents of the file as a string.
202  """
203
204  return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
205
206
207def _FindThirdPartyDirs():
208  """Gets the list of third-party directories.
209  Returns:
210    The list of third-party directories.
211  """
212
213  # Please don't add here paths that have problems with license files,
214  # as they will end up included in Android WebView snapshot.
215  # Instead, add them into known_issues.py.
216  prune_paths = [
217    # Temporary until we figure out how not to check out quickoffice on the
218    # Android license check bot. Tracked in crbug.com/350472.
219    os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
220    # Placeholder directory, no third-party code.
221    os.path.join('third_party', 'adobe'),
222    # Apache 2.0 license. See
223    # https://code.google.com/p/chromium/issues/detail?id=140478.
224    os.path.join('third_party', 'bidichecker'),
225    # Isn't checked out on clients
226    os.path.join('third_party', 'gles2_conform'),
227    # The llvm-build doesn't exist for non-clang builder
228    os.path.join('third_party', 'llvm-build'),
229    # Binaries doesn't apply to android
230    os.path.join('third_party', 'widevine'),
231    # third_party directories in this tree aren't actually third party, but
232    # provide a way to shadow experimental buildfiles into those directories.
233    os.path.join('build', 'secondary'),
234    # Not shipped, Chromium code
235    os.path.join('tools', 'swarming_client'),
236  ]
237  third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
238  return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
239
240
241def _Scan():
242  """Checks that license meta-data is present for all third-party code and
243     that all non third-party code doesn't contain external copyrighted code.
244  Returns:
245    ScanResult.Ok if everything is in order;
246    ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
247      entries)
248    ScanResult.Errors otherwise.
249  """
250
251  third_party_dirs = _FindThirdPartyDirs()
252
253  # First, check designated third-party directories using src/tools/licenses.py.
254  all_licenses_valid = True
255  for path in sorted(third_party_dirs):
256    try:
257      licenses.ParseDir(path, REPOSITORY_ROOT)
258    except licenses.LicenseError, e:
259      if not (path in known_issues.KNOWN_ISSUES):
260        print 'Got LicenseError "%s" while scanning %s' % (e, path)
261        all_licenses_valid = False
262
263  # Second, check for non-standard license text.
264  files_data = _ReadFile(os.path.join('android_webview', 'tools',
265                                      'third_party_files_whitelist.txt'))
266  whitelisted_files = []
267  for line in files_data.splitlines():
268    match = re.match(r'([^#\s]+)', line)
269    if match:
270      whitelisted_files.append(match.group(1))
271  licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)
272
273  return licenses_check if all_licenses_valid else ScanResult.Errors
274
275
276def GenerateNoticeFile():
277  """Generates the contents of an Android NOTICE file for the third-party code.
278  This is used by the snapshot tool.
279  Returns:
280    The contents of the NOTICE file.
281  """
282
283  third_party_dirs = _FindThirdPartyDirs()
284
285  # Don't forget Chromium's LICENSE file
286  content = [_ReadFile('LICENSE')]
287
288  # We provide attribution for all third-party directories.
289  # TODO(steveblock): Limit this to only code used by the WebView binary.
290  for directory in sorted(third_party_dirs):
291    metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
292                                 require_license_file=False)
293    license_file = metadata['License File']
294    if license_file and license_file != licenses.NOT_SHIPPED:
295      content.append(_ReadFile(license_file))
296
297  return '\n'.join(content)
298
299
300def _ProcessIncompatibleResult(incompatible_directories):
301  if incompatible_directories:
302    print ("Incompatibly licensed directories found:\n" +
303           "\n".join(sorted(incompatible_directories)))
304    return ScanResult.Errors
305  return ScanResult.Ok
306
307def main():
308  class FormatterWithNewLines(optparse.IndentedHelpFormatter):
309    def format_description(self, description):
310      paras = description.split('\n')
311      formatted_paras = [textwrap.fill(para, self.width) for para in paras]
312      return '\n'.join(formatted_paras) + '\n'
313
314  parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
315                                 usage='%prog [options]')
316  parser.description = (__doc__ +
317                       '\nCommands:\n' \
318                       '  scan Check licenses.\n' \
319                       '  notice Generate Android NOTICE file on stdout.\n' \
320                       '  incompatible_directories Scan for incompatibly'
321                       ' licensed directories.\n'
322                       '  all_incompatible_directories Scan for incompatibly'
323                       ' licensed directories (even those in'
324                       ' known_issues.py).\n')
325  (_, args) = parser.parse_args()
326  if len(args) != 1:
327    parser.print_help()
328    return ScanResult.Errors
329
330  if args[0] == 'scan':
331    scan_result = _Scan()
332    if scan_result == ScanResult.Ok:
333      print 'OK!'
334    return scan_result
335  elif args[0] == 'notice':
336    print GenerateNoticeFile()
337    return ScanResult.Ok
338  elif args[0] == 'incompatible_directories':
339    return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
340  elif args[0] == 'all_incompatible_directories':
341    return _ProcessIncompatibleResult(GetIncompatibleDirectories())
342  parser.print_help()
343  return ScanResult.Errors
344
345if __name__ == '__main__':
346  sys.exit(main())
347