• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3from enum import Enum
4from pathlib import Path
5from typing import Sequence
6from typing import Tuple
7import argparse
8import os
9import re
10import sys
11
12# list of specific files to be ignored.
13IGNORE_FILE_NAME = [
14  # Exclude myself
15  "generate_notice.py",
16
17  # License files
18  "LICENSE",
19  "LICENSE.TXT",
20  "LICENSE_APACHE2.TXT",
21  "LICENSE_BSD_3_CLAUSE.TXT",
22  "LICENSE_FSFAP.TXT",
23  "LICENSE_MIT.TXT",
24  "LICENSE_MIT_MODERN_VARIANT.TXT",
25  "MODULE_LICENSE_BSD_LIKE",
26  "NOTICE",
27  "builds/unix/LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT",
28  "builds/unix/LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT",
29  "docs/FTL.TXT",
30  "docs/GPLv2.TXT",
31  "src/gzip/LICENSE_ZLIB.TXT",
32
33  # The patch file contains copyright line as a diff. Use it if Copyright is not
34  # in a unified diff line.
35  "src/gzip/patches/freetype-zlib.diff",
36]
37
38NO_COPYRIGHT_FILES = [
39  ".clang-format",
40  ".gitignore",
41  ".gitlab-ci.yml",
42  ".mailmap",
43  "Android.bp",
44  "METADATA",
45  "OWNERS",
46  "README.android",
47  "TEST_MAPPING",
48  "builds/atari/ATARI.H",
49  "builds/atari/FNames.SIC",
50  "builds/atari/FREETYPE.PRJ",
51  "builds/atari/README.TXT",
52  "builds/atari/deflinejoiner.awk",
53  "builds/atari/gen-purec-patch.sh",
54  "builds/mac/FreeType.m68k_cfm.make.txt",
55  "builds/mac/FreeType.m68k_far.make.txt",
56  "builds/mac/FreeType.ppc_carbon.make.txt",
57  "builds/mac/FreeType.ppc_classic.make.txt",
58  "builds/mac/README",
59  "builds/mac/ascii2mpw.py",
60  "builds/mac/freetype-Info.plist",
61  "builds/mac/ftlib.prj.xml",
62  "builds/unix/.gitignore",
63  "builds/unix/freetype2.in",
64  "builds/vms/apinames_vms.bash",
65  "builds/wince/vc2005-ce/freetype.sln",
66  "builds/wince/vc2005-ce/freetype.vcproj",
67  "builds/wince/vc2005-ce/index.html",
68  "builds/wince/vc2008-ce/freetype.sln",
69  "builds/wince/vc2008-ce/freetype.vcproj",
70  "builds/wince/vc2008-ce/index.html",
71  "builds/windows/.gitignore",
72  "builds/windows/vc2010/freetype.sln",
73  "builds/windows/vc2010/freetype.user.props",
74  "builds/windows/vc2010/freetype.vcxproj",
75  "builds/windows/vc2010/freetype.vcxproj.filters",
76  "builds/windows/vc2010/index.html",
77  "builds/windows/visualc/freetype.dsp",
78  "builds/windows/visualc/freetype.dsw",
79  "builds/windows/visualc/freetype.sln",
80  "builds/windows/visualc/freetype.vcproj",
81  "builds/windows/visualc/index.html",
82  "builds/windows/visualce/freetype.dsp",
83  "builds/windows/visualce/freetype.dsw",
84  "builds/windows/visualce/freetype.vcproj",
85  "builds/windows/visualce/index.html",
86  "devel-teeui/OWNERS",
87  "devel-teeui/README.md",
88  "devel-teeui/ftmodule.h",
89  "devel-teeui/rules.json",
90  "devel-teeui/rules.mk",
91  "docs/.gitignore",
92  "docs/CMAKE",
93  "docs/INSTALL.MAC",
94  "docs/MAKEPP",
95  "docs/PROBLEMS",
96  "docs/README",
97  "docs/freetype-config.1",
98  "docs/markdown/images/favico.ico",
99  "docs/markdown/javascripts/extra.js",
100  "docs/markdown/stylesheets/extra.css",
101  "include/freetype/config/ftmodule.h",
102  "include/freetype/ftchapters.h",
103  "libft2.map.txt",
104  "objs/.gitignore",
105  "objs/README",
106  "src/gzip/README.freetype",
107  "src/gzip/crc32.h",
108  "src/gzip/inffixed.h",
109  "src/tools/apinames.c",
110  "src/tools/chktrcmp.py",
111  "src/tools/cordic.py",
112  "src/tools/ftrandom/Makefile",
113  "src/tools/ftrandom/README",
114  "src/tools/make_distribution_archives.py",
115  "src/tools/no-copyright",
116  "src/tools/test_afm.c",
117  "src/tools/test_bbox.c",
118  "src/tools/test_trig.c",
119  "src/tools/update-copyright",
120  "subprojects/harfbuzz.wrap",
121  "subprojects/libpng.wrap",
122  "subprojects/zlib.wrap",
123  "tests/README.md",
124  "tests/issue-1063/main.c",
125  "tests/meson.build",
126  "tests/scripts/download-test-fonts.py",
127]
128
129class CommentType(Enum):
130  C_STYLE_BLOCK = 1  # /* ... */
131  C_STYLE_BLOCK_AS_LINE = 2  # /* ... */ but uses multiple lines of block comments.
132  C_STYLE_LINE = 3 # // ...
133  SCRIPT_STYLE_HASH = 4 #  # ...
134  SCRIPT_STYLE_DOLLER = 5 # $! ...
135  DOC_STYLE = 6 # no comment escape
136  UNKNOWN = 10000
137
138
139# Helper function of showing error message and immediate exit.
140def fatal(msg: str):
141  sys.stderr.write(msg)
142  sys.stderr.write("\n")
143  sys.exit(1)
144
145
146def warn(msg: str):
147  sys.stderr.write(msg)
148  sys.stderr.write("\n")
149
150
151def cleanup_and_join(out_lines: Sequence[str]):
152  while not out_lines[-1].strip():
153    out_lines.pop(-1)
154
155  # If all lines starts from empty space, strip it out.
156  while all([len(x) == 0 or x[0] == ' ' for x in out_lines]):
157    out_lines = [x[1:] for x in out_lines]
158
159  if not out_lines:
160    fatal("Failed to get copyright info")
161  return "\n".join(out_lines)
162
163
164def get_comment_type(copyright_line: str, path: str) -> CommentType:
165  # vms_make.com contains multiple copyright header as a string constants.
166  if path.endswith("/vms_make.com"):
167    return CommentType.SCRIPT_STYLE_DOLLER
168
169  if "docs/" in path or "README" in path:
170    return CommentType.DOC_STYLE
171
172  if copyright_line.startswith("#"):
173    return CommentType.SCRIPT_STYLE_HASH
174  if copyright_line.startswith("//"):
175    return CommentType.C_STYLE_LINE
176  if copyright_line.startswith("$!"):
177    return CommentType.SCRIPT_STYLE_DOLLER
178
179  if "/*" in copyright_line and "*/" in copyright_line:
180    # ftrandom.c uses single line block comment for the first Copyright line,
181    # and following license notice is wrapped with single block comment.
182    # This file can be handled by C_STYLE_BLOCK parser.
183    if path.endswith("src/tools/ftrandom/ftrandom.c"):
184      return CommentType.C_STYLE_BLOCK
185    else:
186      return CommentType.C_STYLE_BLOCK_AS_LINE
187  else:
188    return CommentType.C_STYLE_BLOCK
189
190
191# Extract copyright notice and returns next index.
192def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
193  commentType = get_comment_type(lines[i], path)
194
195  if commentType == CommentType.C_STYLE_BLOCK:
196    return extract_from_c_style_block_at(lines, i, path)
197  if commentType == CommentType.C_STYLE_BLOCK_AS_LINE:
198    return extract_from_c_style_block_as_line_at(lines, i, path)
199  elif commentType == CommentType.C_STYLE_LINE:
200    return extract_from_c_style_lines_at(lines, i, path)
201  elif commentType == CommentType.SCRIPT_STYLE_HASH:
202    return extract_from_script_hash_at(lines, i, path)
203  elif commentType == CommentType.SCRIPT_STYLE_DOLLER:
204    return extract_from_script_doller_at(lines, i, path)
205  elif commentType == CommentType.DOC_STYLE:
206    return extract_from_doc_style_at(lines, i, path)
207  else:
208    fatal("Uknown comment style: %s" % lines[i])
209
210
211def extract_from_doc_style_at(
212    lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
213  if not lines[i].startswith("Copyright"):
214    return (None, i + 1)
215
216  def is_copyright_end(lines: str, start: int, i: int) -> bool:
217    # treat double spacing as end of license header
218    if i - start > 4 and lines[i] == "" and lines[i + 1] == "":
219      return True
220    return False
221
222  start = i
223  while i < len(lines):
224    if is_copyright_end(lines, start, i):
225      break
226    i += 1
227  end = i
228
229  if start == end:
230    fatal("Failed to get copyright info")
231  out_lines = lines[start:end]
232
233  return (cleanup_and_join(out_lines), i + 1)
234
235
236def extract_from_c_style_lines_at(
237    lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
238  def is_copyright_end(line):
239    if line.startswith("//"):
240      return False
241    else:
242      return True
243  start = i
244  while i < len(lines):
245    if is_copyright_end(lines[i]):
246      break
247    i += 1
248  end = i
249
250  if start == end:
251    fatal("Failed to get copyright info")
252
253  out_lines = []
254  for line in lines[start:end]:
255    if line.startswith("// "):
256      out_lines.append(line[3:])
257    elif line == "//":
258      out_lines.append(line[2:])
259    else:
260      out_lines.append(line)
261
262  return (cleanup_and_join(out_lines), i + 1)
263
264
265def extract_from_script_hash_at(
266    lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
267  if lines[i].strip()[0] != "#":
268    return (None, i + 1)
269  def is_copyright_end(lines: str, i: int) -> bool:
270    if "#" not in lines[i]:
271      return True
272    # treat double spacing as end of license header
273    if lines[i] == "#" and lines[i+1] == "#":
274      return True
275    return False
276
277  start = i
278  while i < len(lines):
279    if is_copyright_end(lines, i):
280      break
281    i += 1
282  end = i
283
284  if start == end:
285    fatal("Failed to get copyright info")
286
287  out_lines = []
288  for line in lines[start:end]:
289    if line.startswith("# "):
290      out_lines.append(line[2:])
291    elif line == "#":
292      out_lines.append(line[1:])
293    else:
294      out_lines.append(line)
295
296  return (cleanup_and_join(out_lines), i + 1)
297
298
299def extract_from_script_doller_at(
300    lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
301  if not lines[i].strip().startswith("$!"):
302    return (None, i + 1)
303  def is_copyright_end(lines: str, i: int) -> bool:
304    if "$!" not in lines[i]:
305      return True
306    # treat double spacing as end of license header
307    if lines[i] == "$!" and lines[i+1] == "$!":
308      return True
309    return False
310
311  start = i
312  while i < len(lines):
313    if is_copyright_end(lines, i):
314      break
315    i += 1
316  end = i + 1
317
318  if start == end:
319    fatal("Failed to get copyright info")
320
321  out_lines = []
322  for line in lines[start:end]:
323    if line.startswith("$! "):
324      out_lines.append(line[3:])
325    elif line == "$!":
326      out_lines.append(line[2:])
327    else:
328      out_lines.append(line)
329
330  return (cleanup_and_join(out_lines), i + 1)
331
332
333def extract_from_c_style_block_at(
334    lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
335
336  def is_copyright_end(lines: str, i: int) -> bool:
337    if "*/" in lines[i]:
338      return True
339    if "understand and accept it fully." in lines[i]:
340      return True
341    if "see copyright notice in zlib.h" in lines[i]:
342      return True
343    if lines[i] == " *" and lines[i + 1] == " *":
344      return True
345    if lines[i] == "" and lines[i + 1] == "":
346      return True
347    return False
348
349  start = i
350  i += 1 # include at least one line
351  while i < len(lines):
352    if is_copyright_end(lines, i):
353      break
354    i += 1
355  end = i + 1
356
357  out_lines = []
358  for line in lines[start:end]:
359    clean_line = line
360
361    # Strip begining "/*" chars
362    if clean_line.startswith("/* "):
363      clean_line = clean_line[3:]
364    if clean_line == "/*":
365      clean_line = clean_line[2:]
366
367    # Strip ending "*/" chars
368    if clean_line.endswith(" */"):
369      clean_line = clean_line[:-3]
370    if clean_line.endswith("*/"):
371      clean_line = clean_line[:-2]
372
373    # Strip starting " *" chars
374    if clean_line.startswith(" * "):
375      clean_line = clean_line[3:]
376    if clean_line == " *":
377      clean_line = line[2:]
378
379    # Strip trailing spaces
380    clean_line = clean_line.rstrip()
381
382    out_lines.append(clean_line)
383
384  return (cleanup_and_join(out_lines), i + 1)
385
386
387def extract_from_c_style_block_as_line_at(
388    lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
389
390  def is_copyright_end(line: str) -> bool:
391    if "*/" in line:
392      return False
393    if re.match(r'/\*+/', line.strip()):
394      return False
395    return True
396
397  start = i
398  i += 1 # include at least one line
399  while i < len(lines):
400    if is_copyright_end(lines[i]):
401      break
402    i += 1
403  end = i + 1
404
405  out_lines = []
406  for line in lines[start:end]:
407    clean_line = line
408
409    if re.match(r'/\*+/', line.strip()):
410      continue
411
412    # Strip begining "/*" chars
413    if clean_line.startswith("/* "):
414      clean_line = clean_line[3:]
415    if clean_line == "/*":
416      clean_line = clean_line[2:]
417
418    # Strip ending "*/" chars
419    if clean_line.endswith(" */"):
420      clean_line = clean_line[:-3]
421    if clean_line.endswith("*/"):
422      clean_line = clean_line[:-2]
423
424    # Strip starting " *" chars
425    if clean_line.startswith(" * "):
426      clean_line = clean_line[3:]
427    if clean_line == " *":
428      clean_line = line[2:]
429
430    # Strip trailing spaces
431    clean_line = clean_line.rstrip()
432
433    out_lines.append(clean_line)
434
435  return (cleanup_and_join(out_lines), i + 1)
436
437# Returns true if the line shows the start of copyright notice.
438def is_copyright_line(line: str, path: str) -> bool:
439  if "Copyright" not in line:
440    return False
441
442  # For avoiding unexpected mismatches, exclude quoted Copyright string.
443  if "`Copyright'" in line: # For src/psaux/psobjs.c
444    return False
445  if "\"Copyright\"" in line:  # For src/cff/cfftoken.h
446    return False
447
448  if (path.endswith("src/tools/update-copyright-year") or
449      path.endswith("src/tools/glnames.py")):
450    # The comment contains string of Copyright. Use only immediate Copyright
451    # string followed by "# ".
452    return line.startswith("# Copyright ")
453
454  if path.endswith("src/gzip/inftrees.c"):
455    # The unused string constant contains word of Copyright. Use only immediate
456    # Copyright string followed by " * ".
457    return line.startswith(" * Copyright ")
458
459  if path.endswith("src/base/ftver.rc"):
460    # Copyright string matches with LegalCopyright key in the RC file.
461    return not "LegalCopyright" in line
462
463  return True
464
465
466# Extract the copyright notice and put it into copyrights arg.
467def do_file(path: str, copyrights: set, no_copyright_files: set):
468  raw = Path(path).read_bytes()
469  try:
470    content = raw.decode("utf-8")
471  except UnicodeDecodeError:
472    content = raw.decode("iso-8859-1")
473
474  lines = content.splitlines()
475
476  if not "Copyright" in content:
477    if path in no_copyright_files:
478      no_copyright_files.remove(path)
479    else:
480      fatal("%s does not contain Copyright line" % path)
481    return
482
483  i = 0
484  license_found = False
485  while i < len(lines):
486    if is_copyright_line(lines[i], path):
487      (notice, nexti) = extract_copyright_at(lines, i, path)
488      if notice:
489        if not notice in copyrights:
490          copyrights[notice] = []
491        copyrights[notice].append(path)
492        license_found = True
493
494      i = nexti
495    else:
496      i += 1
497
498  if not license_found:
499    fatal("License header could not found: %s" % path)
500
501def do_check(path, format):
502  if not path.endswith('/'): # make sure the path ends with slash
503    path = path + '/'
504
505  file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME])
506  no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES])
507  copyrights = {}
508
509  for directory, sub_directories,  filenames in os.walk(path):
510    # skip .git directory
511    if ".git" in sub_directories:
512      sub_directories.remove(".git")
513
514    for fname in filenames:
515      fpath = os.path.join(directory, fname)
516      if fpath in file_to_ignore:
517        file_to_ignore.remove(fpath)
518        continue
519      do_file(fpath, copyrights, no_copyright_files)
520
521  if len(file_to_ignore) != 0:
522    fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n"
523          + "\n".join(file_to_ignore))
524
525  if len(no_copyright_files) != 0:
526    fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n"
527          + "\n".join(no_copyright_files))
528
529  if format == Format.notice:
530    print_notice(copyrights, False)
531  elif format == Format.notice_with_filename:
532    print_notice(copyrights, True)
533  elif format == Format.html:
534    print_html(copyrights)
535
536def print_html(copyrights):
537  print('<html>')
538  print("""
539  <head>
540    <style>
541      table {
542        font-family: monospace
543      }
544
545      table tr td {
546        padding: 10px 10px 10px 10px
547      }
548    </style>
549  </head>
550  """)
551  print('<body>')
552  print('<table border="1" style="border-collapse:collapse">')
553  for notice in sorted(copyrights.keys()):
554    files = sorted(copyrights[notice])
555
556    print('<tr>')
557    print('<td>')
558    print('<ul>')
559    for file in files:
560      print('<li>%s</li>' % file)
561    print('</ul>')
562    print('</td>')
563    print('<td>')
564    print('<p>%s</p>' % notice.replace('\n', '<br>'))
565    print('</td>')
566
567    print('</tr>')
568
569
570  print('</table>')
571  print('</body></html>')
572
573def print_notice(copyrights, print_file):
574  # print the copyright in sorted order for stable output.
575  for notice in sorted(copyrights.keys()):
576    if print_file:
577      files = sorted(copyrights[notice])
578      print("\n".join(files))
579      print()
580    print(notice)
581    print()
582    print("-" * 67)
583    print()
584
585class Format(Enum):
586  notice = 'notice'
587  notice_with_filename = 'notice_with_filename'
588  html = 'html'
589
590  def __str__(self):
591    return self.value
592
593def main():
594  parser = argparse.ArgumentParser(description="Collect notice headers.")
595  parser.add_argument("--format", dest="format", type=Format, choices=list(Format),
596                      default=Format.notice, help="print filename before the license notice")
597  parser.add_argument("--target", dest="target", action='store',
598                      required=True, help="target directory to collect notice headers")
599  res = parser.parse_args()
600  do_check(res.target, res.format)
601
602if __name__ == "__main__":
603  main()
604
605