1#!/usr/bin/env python3 2 3from enum import Enum 4from pathlib import Path 5from typing import Sequence 6from typing import Tuple 7import argparse 8import os 9import re 10import sys 11 12# list of specific files to be ignored. 13IGNORE_FILE_NAME = [ 14 # Exclude myself 15 "generate_notice.py", 16 17 # License files 18 "LICENSE", 19 "LICENSE.TXT", 20 "LICENSE_APACHE2.TXT", 21 "LICENSE_BSD_3_CLAUSE.TXT", 22 "LICENSE_FSFAP.TXT", 23 "LICENSE_MIT.TXT", 24 "LICENSE_MIT_MODERN_VARIANT.TXT", 25 "MODULE_LICENSE_BSD_LIKE", 26 "NOTICE", 27 "builds/unix/LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT", 28 "builds/unix/LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT", 29 "docs/FTL.TXT", 30 "docs/GPLv2.TXT", 31 "src/gzip/LICENSE_ZLIB.TXT", 32 33 # The patch file contains copyright line as a diff. Use it if Copyright is not 34 # in a unified diff line. 35 "src/gzip/patches/freetype-zlib.diff", 36] 37 38NO_COPYRIGHT_FILES = [ 39 ".clang-format", 40 ".gitignore", 41 ".gitlab-ci.yml", 42 ".mailmap", 43 "Android.bp", 44 "METADATA", 45 "OWNERS", 46 "README.android", 47 "TEST_MAPPING", 48 "builds/atari/ATARI.H", 49 "builds/atari/FNames.SIC", 50 "builds/atari/FREETYPE.PRJ", 51 "builds/atari/README.TXT", 52 "builds/atari/deflinejoiner.awk", 53 "builds/atari/gen-purec-patch.sh", 54 "builds/mac/FreeType.m68k_cfm.make.txt", 55 "builds/mac/FreeType.m68k_far.make.txt", 56 "builds/mac/FreeType.ppc_carbon.make.txt", 57 "builds/mac/FreeType.ppc_classic.make.txt", 58 "builds/mac/README", 59 "builds/mac/ascii2mpw.py", 60 "builds/mac/freetype-Info.plist", 61 "builds/mac/ftlib.prj.xml", 62 "builds/unix/.gitignore", 63 "builds/unix/freetype2.in", 64 "builds/vms/apinames_vms.bash", 65 "builds/wince/vc2005-ce/freetype.sln", 66 "builds/wince/vc2005-ce/freetype.vcproj", 67 "builds/wince/vc2005-ce/index.html", 68 "builds/wince/vc2008-ce/freetype.sln", 69 "builds/wince/vc2008-ce/freetype.vcproj", 70 "builds/wince/vc2008-ce/index.html", 71 "builds/windows/.gitignore", 72 "builds/windows/vc2010/freetype.sln", 73 "builds/windows/vc2010/freetype.user.props", 74 "builds/windows/vc2010/freetype.vcxproj", 75 "builds/windows/vc2010/freetype.vcxproj.filters", 76 "builds/windows/vc2010/index.html", 77 "builds/windows/visualc/freetype.dsp", 78 "builds/windows/visualc/freetype.dsw", 79 "builds/windows/visualc/freetype.sln", 80 "builds/windows/visualc/freetype.vcproj", 81 "builds/windows/visualc/index.html", 82 "builds/windows/visualce/freetype.dsp", 83 "builds/windows/visualce/freetype.dsw", 84 "builds/windows/visualce/freetype.vcproj", 85 "builds/windows/visualce/index.html", 86 "devel-teeui/OWNERS", 87 "devel-teeui/README.md", 88 "devel-teeui/ftmodule.h", 89 "devel-teeui/rules.json", 90 "devel-teeui/rules.mk", 91 "docs/.gitignore", 92 "docs/CMAKE", 93 "docs/INSTALL.MAC", 94 "docs/MAKEPP", 95 "docs/PROBLEMS", 96 "docs/README", 97 "docs/freetype-config.1", 98 "docs/markdown/images/favico.ico", 99 "docs/markdown/javascripts/extra.js", 100 "docs/markdown/stylesheets/extra.css", 101 "include/freetype/config/ftmodule.h", 102 "include/freetype/ftchapters.h", 103 "libft2.map.txt", 104 "objs/.gitignore", 105 "objs/README", 106 "src/gzip/README.freetype", 107 "src/gzip/crc32.h", 108 "src/gzip/inffixed.h", 109 "src/tools/apinames.c", 110 "src/tools/chktrcmp.py", 111 "src/tools/cordic.py", 112 "src/tools/ftrandom/Makefile", 113 "src/tools/ftrandom/README", 114 "src/tools/make_distribution_archives.py", 115 "src/tools/no-copyright", 116 "src/tools/test_afm.c", 117 "src/tools/test_bbox.c", 118 "src/tools/test_trig.c", 119 "src/tools/update-copyright", 120 "subprojects/harfbuzz.wrap", 121 "subprojects/libpng.wrap", 122 "subprojects/zlib.wrap", 123 "tests/README.md", 124 "tests/issue-1063/main.c", 125 "tests/meson.build", 126 "tests/scripts/download-test-fonts.py", 127] 128 129class CommentType(Enum): 130 C_STYLE_BLOCK = 1 # /* ... */ 131 C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments. 132 C_STYLE_LINE = 3 # // ... 133 SCRIPT_STYLE_HASH = 4 # # ... 134 SCRIPT_STYLE_DOLLER = 5 # $! ... 135 DOC_STYLE = 6 # no comment escape 136 UNKNOWN = 10000 137 138 139# Helper function of showing error message and immediate exit. 140def fatal(msg: str): 141 sys.stderr.write(msg) 142 sys.stderr.write("\n") 143 sys.exit(1) 144 145 146def warn(msg: str): 147 sys.stderr.write(msg) 148 sys.stderr.write("\n") 149 150 151def cleanup_and_join(out_lines: Sequence[str]): 152 while not out_lines[-1].strip(): 153 out_lines.pop(-1) 154 155 # If all lines starts from empty space, strip it out. 156 while all([len(x) == 0 or x[0] == ' ' for x in out_lines]): 157 out_lines = [x[1:] for x in out_lines] 158 159 if not out_lines: 160 fatal("Failed to get copyright info") 161 return "\n".join(out_lines) 162 163 164def get_comment_type(copyright_line: str, path: str) -> CommentType: 165 # vms_make.com contains multiple copyright header as a string constants. 166 if path.endswith("/vms_make.com"): 167 return CommentType.SCRIPT_STYLE_DOLLER 168 169 if "docs/" in path or "README" in path: 170 return CommentType.DOC_STYLE 171 172 if copyright_line.startswith("#"): 173 return CommentType.SCRIPT_STYLE_HASH 174 if copyright_line.startswith("//"): 175 return CommentType.C_STYLE_LINE 176 if copyright_line.startswith("$!"): 177 return CommentType.SCRIPT_STYLE_DOLLER 178 179 if "/*" in copyright_line and "*/" in copyright_line: 180 # ftrandom.c uses single line block comment for the first Copyright line, 181 # and following license notice is wrapped with single block comment. 182 # This file can be handled by C_STYLE_BLOCK parser. 183 if path.endswith("src/tools/ftrandom/ftrandom.c"): 184 return CommentType.C_STYLE_BLOCK 185 else: 186 return CommentType.C_STYLE_BLOCK_AS_LINE 187 else: 188 return CommentType.C_STYLE_BLOCK 189 190 191# Extract copyright notice and returns next index. 192def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 193 commentType = get_comment_type(lines[i], path) 194 195 if commentType == CommentType.C_STYLE_BLOCK: 196 return extract_from_c_style_block_at(lines, i, path) 197 if commentType == CommentType.C_STYLE_BLOCK_AS_LINE: 198 return extract_from_c_style_block_as_line_at(lines, i, path) 199 elif commentType == CommentType.C_STYLE_LINE: 200 return extract_from_c_style_lines_at(lines, i, path) 201 elif commentType == CommentType.SCRIPT_STYLE_HASH: 202 return extract_from_script_hash_at(lines, i, path) 203 elif commentType == CommentType.SCRIPT_STYLE_DOLLER: 204 return extract_from_script_doller_at(lines, i, path) 205 elif commentType == CommentType.DOC_STYLE: 206 return extract_from_doc_style_at(lines, i, path) 207 else: 208 fatal("Uknown comment style: %s" % lines[i]) 209 210 211def extract_from_doc_style_at( 212 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 213 if not lines[i].startswith("Copyright"): 214 return (None, i + 1) 215 216 def is_copyright_end(lines: str, start: int, i: int) -> bool: 217 # treat double spacing as end of license header 218 if i - start > 4 and lines[i] == "" and lines[i + 1] == "": 219 return True 220 return False 221 222 start = i 223 while i < len(lines): 224 if is_copyright_end(lines, start, i): 225 break 226 i += 1 227 end = i 228 229 if start == end: 230 fatal("Failed to get copyright info") 231 out_lines = lines[start:end] 232 233 return (cleanup_and_join(out_lines), i + 1) 234 235 236def extract_from_c_style_lines_at( 237 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 238 def is_copyright_end(line): 239 if line.startswith("//"): 240 return False 241 else: 242 return True 243 start = i 244 while i < len(lines): 245 if is_copyright_end(lines[i]): 246 break 247 i += 1 248 end = i 249 250 if start == end: 251 fatal("Failed to get copyright info") 252 253 out_lines = [] 254 for line in lines[start:end]: 255 if line.startswith("// "): 256 out_lines.append(line[3:]) 257 elif line == "//": 258 out_lines.append(line[2:]) 259 else: 260 out_lines.append(line) 261 262 return (cleanup_and_join(out_lines), i + 1) 263 264 265def extract_from_script_hash_at( 266 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 267 if lines[i].strip()[0] != "#": 268 return (None, i + 1) 269 def is_copyright_end(lines: str, i: int) -> bool: 270 if "#" not in lines[i]: 271 return True 272 # treat double spacing as end of license header 273 if lines[i] == "#" and lines[i+1] == "#": 274 return True 275 return False 276 277 start = i 278 while i < len(lines): 279 if is_copyright_end(lines, i): 280 break 281 i += 1 282 end = i 283 284 if start == end: 285 fatal("Failed to get copyright info") 286 287 out_lines = [] 288 for line in lines[start:end]: 289 if line.startswith("# "): 290 out_lines.append(line[2:]) 291 elif line == "#": 292 out_lines.append(line[1:]) 293 else: 294 out_lines.append(line) 295 296 return (cleanup_and_join(out_lines), i + 1) 297 298 299def extract_from_script_doller_at( 300 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 301 if not lines[i].strip().startswith("$!"): 302 return (None, i + 1) 303 def is_copyright_end(lines: str, i: int) -> bool: 304 if "$!" not in lines[i]: 305 return True 306 # treat double spacing as end of license header 307 if lines[i] == "$!" and lines[i+1] == "$!": 308 return True 309 return False 310 311 start = i 312 while i < len(lines): 313 if is_copyright_end(lines, i): 314 break 315 i += 1 316 end = i + 1 317 318 if start == end: 319 fatal("Failed to get copyright info") 320 321 out_lines = [] 322 for line in lines[start:end]: 323 if line.startswith("$! "): 324 out_lines.append(line[3:]) 325 elif line == "$!": 326 out_lines.append(line[2:]) 327 else: 328 out_lines.append(line) 329 330 return (cleanup_and_join(out_lines), i + 1) 331 332 333def extract_from_c_style_block_at( 334 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 335 336 def is_copyright_end(lines: str, i: int) -> bool: 337 if "*/" in lines[i]: 338 return True 339 if "understand and accept it fully." in lines[i]: 340 return True 341 if "see copyright notice in zlib.h" in lines[i]: 342 return True 343 if lines[i] == " *" and lines[i + 1] == " *": 344 return True 345 if lines[i] == "" and lines[i + 1] == "": 346 return True 347 return False 348 349 start = i 350 i += 1 # include at least one line 351 while i < len(lines): 352 if is_copyright_end(lines, i): 353 break 354 i += 1 355 end = i + 1 356 357 out_lines = [] 358 for line in lines[start:end]: 359 clean_line = line 360 361 # Strip begining "/*" chars 362 if clean_line.startswith("/* "): 363 clean_line = clean_line[3:] 364 if clean_line == "/*": 365 clean_line = clean_line[2:] 366 367 # Strip ending "*/" chars 368 if clean_line.endswith(" */"): 369 clean_line = clean_line[:-3] 370 if clean_line.endswith("*/"): 371 clean_line = clean_line[:-2] 372 373 # Strip starting " *" chars 374 if clean_line.startswith(" * "): 375 clean_line = clean_line[3:] 376 if clean_line == " *": 377 clean_line = line[2:] 378 379 # Strip trailing spaces 380 clean_line = clean_line.rstrip() 381 382 out_lines.append(clean_line) 383 384 return (cleanup_and_join(out_lines), i + 1) 385 386 387def extract_from_c_style_block_as_line_at( 388 lines: Sequence[str], i: int, path: str) -> Tuple[str, int]: 389 390 def is_copyright_end(line: str) -> bool: 391 if "*/" in line: 392 return False 393 if re.match(r'/\*+/', line.strip()): 394 return False 395 return True 396 397 start = i 398 i += 1 # include at least one line 399 while i < len(lines): 400 if is_copyright_end(lines[i]): 401 break 402 i += 1 403 end = i + 1 404 405 out_lines = [] 406 for line in lines[start:end]: 407 clean_line = line 408 409 if re.match(r'/\*+/', line.strip()): 410 continue 411 412 # Strip begining "/*" chars 413 if clean_line.startswith("/* "): 414 clean_line = clean_line[3:] 415 if clean_line == "/*": 416 clean_line = clean_line[2:] 417 418 # Strip ending "*/" chars 419 if clean_line.endswith(" */"): 420 clean_line = clean_line[:-3] 421 if clean_line.endswith("*/"): 422 clean_line = clean_line[:-2] 423 424 # Strip starting " *" chars 425 if clean_line.startswith(" * "): 426 clean_line = clean_line[3:] 427 if clean_line == " *": 428 clean_line = line[2:] 429 430 # Strip trailing spaces 431 clean_line = clean_line.rstrip() 432 433 out_lines.append(clean_line) 434 435 return (cleanup_and_join(out_lines), i + 1) 436 437# Returns true if the line shows the start of copyright notice. 438def is_copyright_line(line: str, path: str) -> bool: 439 if "Copyright" not in line: 440 return False 441 442 # For avoiding unexpected mismatches, exclude quoted Copyright string. 443 if "`Copyright'" in line: # For src/psaux/psobjs.c 444 return False 445 if "\"Copyright\"" in line: # For src/cff/cfftoken.h 446 return False 447 448 if (path.endswith("src/tools/update-copyright-year") or 449 path.endswith("src/tools/glnames.py")): 450 # The comment contains string of Copyright. Use only immediate Copyright 451 # string followed by "# ". 452 return line.startswith("# Copyright ") 453 454 if path.endswith("src/gzip/inftrees.c"): 455 # The unused string constant contains word of Copyright. Use only immediate 456 # Copyright string followed by " * ". 457 return line.startswith(" * Copyright ") 458 459 if path.endswith("src/base/ftver.rc"): 460 # Copyright string matches with LegalCopyright key in the RC file. 461 return not "LegalCopyright" in line 462 463 return True 464 465 466# Extract the copyright notice and put it into copyrights arg. 467def do_file(path: str, copyrights: set, no_copyright_files: set): 468 raw = Path(path).read_bytes() 469 try: 470 content = raw.decode("utf-8") 471 except UnicodeDecodeError: 472 content = raw.decode("iso-8859-1") 473 474 lines = content.splitlines() 475 476 if not "Copyright" in content: 477 if path in no_copyright_files: 478 no_copyright_files.remove(path) 479 else: 480 fatal("%s does not contain Copyright line" % path) 481 return 482 483 i = 0 484 license_found = False 485 while i < len(lines): 486 if is_copyright_line(lines[i], path): 487 (notice, nexti) = extract_copyright_at(lines, i, path) 488 if notice: 489 if not notice in copyrights: 490 copyrights[notice] = [] 491 copyrights[notice].append(path) 492 license_found = True 493 494 i = nexti 495 else: 496 i += 1 497 498 if not license_found: 499 fatal("License header could not found: %s" % path) 500 501def do_check(path, format): 502 if not path.endswith('/'): # make sure the path ends with slash 503 path = path + '/' 504 505 file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME]) 506 no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES]) 507 copyrights = {} 508 509 for directory, sub_directories, filenames in os.walk(path): 510 # skip .git directory 511 if ".git" in sub_directories: 512 sub_directories.remove(".git") 513 514 for fname in filenames: 515 fpath = os.path.join(directory, fname) 516 if fpath in file_to_ignore: 517 file_to_ignore.remove(fpath) 518 continue 519 do_file(fpath, copyrights, no_copyright_files) 520 521 if len(file_to_ignore) != 0: 522 fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n" 523 + "\n".join(file_to_ignore)) 524 525 if len(no_copyright_files) != 0: 526 fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n" 527 + "\n".join(no_copyright_files)) 528 529 if format == Format.notice: 530 print_notice(copyrights, False) 531 elif format == Format.notice_with_filename: 532 print_notice(copyrights, True) 533 elif format == Format.html: 534 print_html(copyrights) 535 536def print_html(copyrights): 537 print('<html>') 538 print(""" 539 <head> 540 <style> 541 table { 542 font-family: monospace 543 } 544 545 table tr td { 546 padding: 10px 10px 10px 10px 547 } 548 </style> 549 </head> 550 """) 551 print('<body>') 552 print('<table border="1" style="border-collapse:collapse">') 553 for notice in sorted(copyrights.keys()): 554 files = sorted(copyrights[notice]) 555 556 print('<tr>') 557 print('<td>') 558 print('<ul>') 559 for file in files: 560 print('<li>%s</li>' % file) 561 print('</ul>') 562 print('</td>') 563 print('<td>') 564 print('<p>%s</p>' % notice.replace('\n', '<br>')) 565 print('</td>') 566 567 print('</tr>') 568 569 570 print('</table>') 571 print('</body></html>') 572 573def print_notice(copyrights, print_file): 574 # print the copyright in sorted order for stable output. 575 for notice in sorted(copyrights.keys()): 576 if print_file: 577 files = sorted(copyrights[notice]) 578 print("\n".join(files)) 579 print() 580 print(notice) 581 print() 582 print("-" * 67) 583 print() 584 585class Format(Enum): 586 notice = 'notice' 587 notice_with_filename = 'notice_with_filename' 588 html = 'html' 589 590 def __str__(self): 591 return self.value 592 593def main(): 594 parser = argparse.ArgumentParser(description="Collect notice headers.") 595 parser.add_argument("--format", dest="format", type=Format, choices=list(Format), 596 default=Format.notice, help="print filename before the license notice") 597 parser.add_argument("--target", dest="target", action='store', 598 required=True, help="target directory to collect notice headers") 599 res = parser.parse_args() 600 do_check(res.target, res.format) 601 602if __name__ == "__main__": 603 main() 604 605