1#!/usr/bin/perl -w 2# This script was originally based on the script of the same name from 3# the KDE SDK (by dfaure@kde.org) 4# 5# This version is 6# Copyright (C) 2007, 2008 Adam D. Barratt 7# Copyright (C) 2012 Francesco Poli 8# 9# This program is free software; you can redistribute it and/or modify 10# it under the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 2 of the License, or 12# (at your option) any later version. 13# 14# This program is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License along 20# with this program. If not, see <http://www.gnu.org/licenses/>. 21 22=head1 NAME 23 24licensecheck - simple license checker for source files 25 26=head1 SYNOPSIS 27 28B<licensecheck> B<--help>|B<--version> 29 30B<licensecheck> [B<--no-conf>] [B<--verbose>] [B<--copyright>] 31[B<-l>|B<--lines=>I<N>] [B<-i>|B<--ignore=>I<regex>] [B<-c>|B<--check=>I<regex>] 32[B<-m>|B<--machine>] [B<-r>|B<--recursive>] 33I<list of files and directories to check> 34 35=head1 DESCRIPTION 36 37B<licensecheck> attempts to determine the license that applies to each file 38passed to it, by searching the start of the file for text belonging to 39various licenses. 40 41If any of the arguments passed are directories, B<licensecheck> will add 42the files contained within to the list of files to process. 43 44=head1 OPTIONS 45 46=over 4 47 48=item B<--verbose>, B<--no-verbose> 49 50Specify whether to output the text being processed from each file before 51the corresponding license information. 52 53Default is to be quiet. 54 55=item B<-l=>I<N>, B<--lines=>I<N> 56 57Specify the number of lines of each file's header which should be parsed 58for license information. (Default is 60). 59 60=item B<-i=>I<regex>, B<--ignore=>I<regex> 61 62When processing the list of files and directories, the regular 63expression specified by this option will be used to indicate those which 64should not be considered (e.g. backup files, VCS metadata). 65 66=item B<-r>, B<--recursive> 67 68Specify that the contents of directories should be added 69recursively. 70 71=item B<-c=>I<regex>, B<--check=>I<regex> 72 73Specify a pattern against which filenames will be matched in order to 74decide which files to check the license of. 75 76The default includes common source files. 77 78=item B<--copyright> 79 80Also display copyright text found within the file 81 82=item B<-m>, B<--machine> 83 84Display the information in a machine readable way, i.e. in the form 85<file><tab><license>[<tab><copyright>] so that it can be easily sorted 86and/or filtered, e.g. with the B<awk> and B<sort> commands. 87Note that using the B<--verbose> option will kill the readability. 88 89=item B<--no-conf>, B<--noconf> 90 91Do not read any configuration files. This can only be used as the first 92option given on the command-line. 93 94=back 95 96=head1 CONFIGURATION VARIABLES 97 98The two configuration files F</etc/devscripts.conf> and 99F<~/.devscripts> are sourced by a shell in that order to set 100configuration variables. Command line options can be used to override 101configuration file settings. Environment variable settings are 102ignored for this purpose. The currently recognised variables are: 103 104=over 4 105 106=item B<LICENSECHECK_VERBOSE> 107 108If this is set to I<yes>, then it is the same as the B<--verbose> command 109line parameter being used. The default is I<no>. 110 111=item B<LICENSECHECK_PARSELINES> 112 113If this is set to a positive number then the specified number of lines 114at the start of each file will be read whilst attempting to determine 115the license(s) in use. This is equivalent to the B<--lines> command line 116option. 117 118=back 119 120=head1 LICENSE 121 122This code is copyright by Adam D. Barratt <I<adam@adam-barratt.org.uk>>, 123all rights reserved; based on a script of the same name from the KDE 124SDK, which is copyright by <I<dfaure@kde.org>>. 125This program comes with ABSOLUTELY NO WARRANTY. 126You are free to redistribute this code under the terms of the GNU 127General Public License, version 2 or later. 128 129=head1 AUTHOR 130 131Adam D. Barratt <adam@adam-barratt.org.uk> 132 133=cut 134 135use strict; 136use warnings; 137use Getopt::Long qw(:config gnu_getopt); 138use File::Basename; 139 140sub fatal($); 141sub parse_copyright($); 142sub parselicense($); 143 144my $progname = basename($0); 145 146# From dpkg-source 147my $default_ignore_regex = ' 148# Ignore general backup files 149(?:^|/).*~$| 150# Ignore emacs recovery files 151(?:^|/)\.#.*$| 152# Ignore vi swap files 153(?:^|/)\..*\.swp$| 154# Ignore baz-style junk files or directories 155(?:^|/),,.*(?:$|/.*$)| 156# File-names that should be ignored (never directories) 157(?:^|/)(?:DEADJOE|\.cvsignore|\.arch-inventory|\.bzrignore|\.gitignore)$| 158# File or directory names that should be ignored 159(?:^|/)(?:CVS|RCS|\.deps|\{arch\}|\.arch-ids|\.svn|\.hg|_darcs|\.git| 160\.shelf|_MTN|\.bzr(?:\.backup|tags)?)(?:$|/.*$) 161'; 162 163# Take out comments and newlines 164$default_ignore_regex =~ s/^#.*$//mg; 165$default_ignore_regex =~ s/\n//sg; 166 167my $default_check_regex = '\.(c(c|pp|xx)?|h(h|pp|xx)?|f(77|90)?|p(l|m)|xs|sh|php|py(|x)|rb|java|vala|el|sc(i|e)|cs|pas|inc|dtd|xsl|mod|m|tex|mli?)$'; 168 169my $modified_conf_msg; 170 171my ($opt_verbose, $opt_lines, $opt_noconf, $opt_ignore_regex, $opt_check_regex) 172 = ('', '', '', '', ''); 173my $opt_recursive = 0; 174my $opt_copyright = 0; 175my $opt_machine = 0; 176my ($opt_help, $opt_version); 177my $def_lines = 60; 178 179# Read configuration files and then command line 180# This is boilerplate 181 182if (@ARGV and $ARGV[0] =~ /^--no-?conf$/) { 183 $modified_conf_msg = " (no configuration files read)"; 184 shift; 185} else { 186 my @config_files = ('/etc/devscripts.conf', '~/.devscripts'); 187 my %config_vars = ( 188 'LICENSECHECK_VERBOSE' => 'no', 189 'LICENSECHECK_PARSELINES' => $def_lines, 190 ); 191 my %config_default = %config_vars; 192 193 my $shell_cmd; 194 # Set defaults 195 foreach my $var (keys %config_vars) { 196 $shell_cmd .= qq[$var="$config_vars{$var}";\n]; 197 } 198 $shell_cmd .= 'for file in ' . join(" ", @config_files) . "; do\n"; 199 $shell_cmd .= '[ -f $file ] && . $file; done;' . "\n"; 200 # Read back values 201 foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" } 202 my $shell_out = `/bin/bash -c '$shell_cmd'`; 203 @config_vars{keys %config_vars} = split /\n/, $shell_out, -1; 204 205 # Check validity 206 $config_vars{'LICENSECHECK_VERBOSE'} =~ /^(yes|no)$/ 207 or $config_vars{'LICENSECHECK_VERBOSE'} = 'no'; 208 $config_vars{'LICENSECHECK_PARSELINES'} =~ /^[1-9][0-9]*$/ 209 or $config_vars{'LICENSECHECK_PARSELINES'} = $def_lines; 210 211 foreach my $var (sort keys %config_vars) { 212 if ($config_vars{$var} ne $config_default{$var}) { 213 $modified_conf_msg .= " $var=$config_vars{$var}\n"; 214 } 215 } 216 $modified_conf_msg ||= " (none)\n"; 217 chomp $modified_conf_msg; 218 219 $opt_verbose = $config_vars{'LICENSECHECK_VERBOSE'} eq 'yes' ? 1 : 0; 220 $opt_lines = $config_vars{'LICENSECHECK_PARSELINES'}; 221} 222 223GetOptions("help|h" => \$opt_help, 224 "version|v" => \$opt_version, 225 "verbose!" => \$opt_verbose, 226 "lines|l=i" => \$opt_lines, 227 "ignore|i=s" => \$opt_ignore_regex, 228 "recursive|r" => \$opt_recursive, 229 "check|c=s" => \$opt_check_regex, 230 "copyright" => \$opt_copyright, 231 "machine|m" => \$opt_machine, 232 "noconf" => \$opt_noconf, 233 "no-conf" => \$opt_noconf, 234 ) 235 or die "Usage: $progname [options] filelist\nRun $progname --help for more details\n"; 236 237$opt_lines = $def_lines if $opt_lines !~ /^[1-9][0-9]*$/; 238$opt_ignore_regex = $default_ignore_regex if ! length $opt_ignore_regex; 239$opt_check_regex = $default_check_regex if ! length $opt_check_regex; 240 241if ($opt_noconf) { 242 fatal "--no-conf is only acceptable as the first command-line option!"; 243} 244if ($opt_help) { help(); exit 0; } 245if ($opt_version) { version(); exit 0; } 246 247die "Usage: $progname [options] filelist\nRun $progname --help for more details\n" unless @ARGV; 248 249$opt_lines = $def_lines if not defined $opt_lines; 250 251my @files = (); 252my @find_args = (); 253my $files_count = @ARGV; 254 255push @find_args, qw(-maxdepth 1) unless $opt_recursive; 256push @find_args, qw(-follow -type f -print); 257 258while (@ARGV) { 259 my $file = shift @ARGV; 260 261 if (-d $file) { 262 open FIND, '-|', 'find', $file, @find_args 263 or die "$progname: couldn't exec find: $!\n"; 264 265 while (<FIND>) { 266 chomp; 267 next unless m%$opt_check_regex%; 268 # Skip empty files 269 next if (-z $_); 270 push @files, $_ unless m%$opt_ignore_regex%; 271 } 272 close FIND; 273 } else { 274 next unless ($files_count == 1) or $file =~ m%$opt_check_regex%; 275 push @files, $file unless $file =~ m%$opt_ignore_regex%; 276 } 277} 278 279while (@files) { 280 my $file = shift @files; 281 my $content = ''; 282 my $copyright_match; 283 my $copyright = ''; 284 my $license = ''; 285 my %copyrights; 286 287 open (F, "<$file") or die "Unable to access $file\n"; 288 while (<F>) { 289 last if ($. > $opt_lines); 290 $content .= $_; 291 $copyright_match = parse_copyright($_); 292 if ($copyright_match) { 293 $copyrights{lc("$copyright_match")} = "$copyright_match"; 294 } 295 } 296 close(F); 297 298 $copyright = join(" / ", values %copyrights); 299 300 print qq(----- $file header -----\n$content----- end header -----\n\n) 301 if $opt_verbose; 302 303 # Remove Fortran comments 304 $content =~ s/^[cC] //gm; 305 $content =~ tr/\t\r\n/ /; 306 # Remove C / C++ comments 307 $content =~ s#(\*/|/[/*])##g; 308 $content =~ tr% A-Za-z.,@;0-9\(\)/-%%cd; 309 $content =~ tr/ //s; 310 311 $license = parselicense($content); 312 if ($opt_machine) { 313 print "$file\t$license"; 314 print "\t" . ($copyright or "*No copyright*") if $opt_copyright; 315 print "\n"; 316 } else { 317 print "$file: "; 318 print "*No copyright* " unless $copyright; 319 print $license . "\n"; 320 print " [Copyright: " . $copyright . "]\n" 321 if $copyright and $opt_copyright; 322 print "\n" if $opt_copyright; 323 } 324} 325 326sub parse_copyright($) { 327 my $copyright = ''; 328 my $match; 329 330 my $copyright_indicator_regex = ' 331 (?:copyright # The full word 332 |copr\. # Legally-valid abbreviation 333 |\x{00a9} # Unicode character COPYRIGHT SIGN 334 |\xc2\xa9 # Unicode copyright sign encoded in iso8859 335 |\(c\) # Legally-null representation of sign 336 )'; 337 my $copyright_disindicator_regex = ' 338 \b(?:info(?:rmation)? # Discussing copyright information 339 |notice # Discussing the notice 340 |and|or # Part of a sentence 341 )\b'; 342 343 if (m%$copyright_indicator_regex(?::\s*|\s+)(\S.*)$%ix) { 344 $match = $1; 345 346 # Ignore lines matching "see foo for copyright information" etc. 347 if ($match !~ m%^\s*$copyright_disindicator_regex%ix) { 348 # De-cruft 349 $match =~ s/([,.])?\s*$//; 350 $match =~ s/$copyright_indicator_regex//igx; 351 $match =~ s/^\s+//; 352 $match =~ s/\s{2,}/ /g; 353 $match =~ s/\\@/@/g; 354 $copyright = $match; 355 } 356 } 357 358 return $copyright; 359} 360 361sub help { 362 print <<"EOF"; 363Usage: $progname [options] filename [filename ...] 364Valid options are: 365 --help, -h Display this message 366 --version, -v Display version and copyright info 367 --no-conf, --noconf Don't read devscripts config files; must be 368 the first option given 369 --verbose Display the header of each file before its 370 license information 371 --lines, -l Specify how many lines of the file header 372 should be parsed for license information 373 (Default: $def_lines) 374 --check, -c Specify a pattern indicating which files should 375 be checked 376 (Default: '$default_check_regex') 377 --machine, -m Display in a machine readable way (good for awk) 378 --recursive, -r Add the contents of directories recursively 379 --copyright Also display the file's copyright 380 --ignore, -i Specify that files / directories matching the 381 regular expression should be ignored when 382 checking files 383 (Default: '$default_ignore_regex') 384 385Default settings modified by devscripts configuration files: 386$modified_conf_msg 387EOF 388} 389 390sub version { 391 print <<"EOF"; 392This is $progname, from the Debian devscripts package, version ###VERSION### 393Copyright (C) 2007, 2008 by Adam D. Barratt <adam\@adam-barratt.org.uk>; based 394on a script of the same name from the KDE SDK by <dfaure\@kde.org>. 395 396This program comes with ABSOLUTELY NO WARRANTY. 397You are free to redistribute this code under the terms of the 398GNU General Public License, version 2, or (at your option) any 399later version. 400EOF 401} 402 403sub parselicense($) { 404 my ($licensetext) = @_; 405 406 my $gplver = ""; 407 my $extrainfo = ""; 408 my $license = ""; 409 410 if ($licensetext =~ /version ([^, ]+?)[.,]? (?:\(?only\)?.? )?(?:of the GNU (Affero )?(Lesser |Library )?General Public License )?(as )?published by the Free Software Foundation/i or 411 $licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License (?:as )?published by the Free Software Foundation; version ([^, ]+?)[.,]? /i) { 412 413 $gplver = " (v$1)"; 414 } elsif ($licensetext =~ /GNU (?:Affero )?(?:Lesser |Library )?General Public License, version (\d+(?:\.\d+)?)[ \.]/) { 415 $gplver = " (v$1)"; 416 } elsif ($licensetext =~ /either version ([^ ]+)(?: of the License)?, or \(at your option\) any later version/) { 417 $gplver = " (v$1 or later)"; 418 } 419 420 if ($licensetext =~ /(?:675 Mass Ave|59 Temple Place|51 Franklin Steet|02139|02111-1307)/i) { 421 $extrainfo = " (with incorrect FSF address)$extrainfo"; 422 } 423 424 if ($licensetext =~ /permission (?:is (also granted|given))? to link (the code of )?this program with (any edition of )?(Qt|the Qt library)/i) { 425 $extrainfo = " (with Qt exception)$extrainfo" 426 } 427 428 if ($licensetext =~ /(All changes made in this file will be lost|DO NOT (EDIT|delete this file)|Generated (automatically|by|from)|generated.*file)/i) { 429 $license = "GENERATED FILE"; 430 } 431 432 if ($licensetext =~ /is (free software.? you can redistribute it and\/or modify it|licensed) under the terms of (version [^ ]+ of )?the (GNU (Library |Lesser )General Public License|LGPL)/i) { 433 $license = "LGPL$gplver$extrainfo $license"; 434 } 435 436 if ($licensetext =~ /is free software.? you can redistribute it and\/or modify it under the terms of the (GNU Affero General Public License|AGPL)/i) { 437 $license = "AGPL$gplver$extrainfo $license"; 438 } 439 440 if ($licensetext =~ /is free software.? you (can|may) redistribute it and\/or modify it under the terms of (?:version [^ ]+ (?:\(?only\)? )?of )?the GNU General Public License/i) { 441 $license = "GPL$gplver$extrainfo $license"; 442 } 443 444 if ($licensetext =~ /is distributed under the terms of the GNU General Public License,/ 445 and length $gplver) { 446 $license = "GPL$gplver$extrainfo $license"; 447 } 448 449 if ($licensetext =~ /is distributed.*terms.*GPL/) { 450 $license = "GPL (unversioned/unknown version) $license"; 451 } 452 453 if ($licensetext =~ /This file is part of the .*Qt GUI Toolkit. This file may be distributed under the terms of the Q Public License as defined/) { 454 $license = "QPL (part of Qt) $license"; 455 } elsif ($licensetext =~ /may be distributed under the terms of the Q Public License as defined/) { 456 $license = "QPL $license"; 457 } 458 459 if ($licensetext =~ /opensource\.org\/licenses\/mit-license\.php/) { 460 $license = "MIT/X11 (BSD like) $license"; 461 } elsif ($licensetext =~ /Permission is hereby granted, free of charge, to any person obtaining a copy of this software and(\/or)? associated documentation files \(the (Software|Materials)\), to deal in the (Software|Materials)/) { 462 $license = "MIT/X11 (BSD like) $license"; 463 } elsif ($licensetext =~ /Permission is hereby granted, without written agreement and without license or royalty fees, to use, copy, modify, and distribute this software and its documentation for any purpose/) { 464 $license = "MIT/X11 (BSD like) $license"; 465 } 466 467 if ($licensetext =~ /Permission to use, copy, modify, and(\/or)? distribute this software for any purpose with or without fee is hereby granted, provided.*copyright notice.*permission notice.*all copies/) { 468 $license = "ISC $license"; 469 } 470 471 if ($licensetext =~ /THIS SOFTWARE IS PROVIDED .*AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY/) { 472 if ($licensetext =~ /All advertising materials mentioning features or use of this software must display the following acknowledge?ment.*This product includes software developed by/i) { 473 $license = "BSD (4 clause) $license"; 474 } elsif ($licensetext =~ /(The name .*? may not|Neither the names? .*? nor the names of (its|their) contributors may) be used to endorse or promote products derived from this software/i) { 475 $license = "BSD (3 clause) $license"; 476 } elsif ($licensetext =~ /Redistributions of source code must retain the above copyright notice/i) { 477 $license = "BSD (2 clause) $license"; 478 } else { 479 $license = "BSD $license"; 480 } 481 } 482 483 if ($licensetext =~ /Mozilla Public License Version ([^ ]+)/) { 484 $license = "MPL (v$1) $license"; 485 } 486 487 if ($licensetext =~ /Released under the terms of the Artistic License ([^ ]+)/) { 488 $license = "Artistic (v$1) $license"; 489 } 490 491 if ($licensetext =~ /is free software under the Artistic [Ll]icense/) { 492 $license = "Artistic $license"; 493 } 494 495 if ($licensetext =~ /This program is free software; you can redistribute it and\/or modify it under the same terms as Perl itself/) { 496 $license = "Perl $license"; 497 } 498 499 if ($licensetext =~ /under the Apache License, Version ([^ ]+)/) { 500 $license = "Apache (v$1) $license"; 501 } 502 503 if ($licensetext =~ /(THE BEER-WARE LICENSE)/i) { 504 $license = "Beerware $license"; 505 } 506 507 if ($licensetext =~ /This source file is subject to version ([^ ]+) of the PHP license/) { 508 $license = "PHP (v$1) $license"; 509 } 510 511 if ($licensetext =~ /under the terms of the CeCILL /) { 512 $license = "CeCILL $license"; 513 } 514 515 if ($licensetext =~ /under the terms of the CeCILL-([^ ]+) /) { 516 $license = "CeCILL-$1 $license"; 517 } 518 519 if ($licensetext =~ /under the SGI Free Software License B/) { 520 $license = "SGI Free Software License B $license"; 521 } 522 523 if ($licensetext =~ /is in the public domain/i) { 524 $license = "Public domain $license"; 525 } 526 527 if ($licensetext =~ /terms of the Common Development and Distribution License(, Version ([^(]+))? \(the License\)/) { 528 $license = "CDDL " . ($1 ? "(v$2) " : '') . $license; 529 } 530 531 if ($licensetext =~ /Microsoft Permissive License \(Ms-PL\)/) { 532 $license = "Ms-PL $license"; 533 } 534 535 if ($licensetext =~ /Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license \(the \"Software\"\)/ or 536 $licensetext =~ /Boost Software License([ ,-]+Version ([^ ]+)?(\.))/i) { 537 $license = "BSL " . ($1 ? "(v$2) " : '') . $license; 538 } 539 540 if ($licensetext =~ /PYTHON SOFTWARE FOUNDATION LICENSE (VERSION ([^ ]+))/i) { 541 $license = "PSF " . ($1 ? "(v$2) " : '') . $license; 542 } 543 544 if ($licensetext =~ /The origin of this software must not be misrepresented.*Altered source versions must be plainly marked as such.*This notice may not be removed or altered from any source distribution/ or 545 $licensetext =~ /see copyright notice in zlib\.h/) { 546 $license = "zlib/libpng $license"; 547 } elsif ($licensetext =~ /This code is released under the libpng license/) { 548 $license = "libpng $license"; 549 } 550 551 if ($licensetext =~ /Do What The Fuck You Want To Public License, Version ([^, ]+)/i) { 552 $license = "WTFPL (v$1) $license"; 553 } 554 555 if ($licensetext =~ /Do what The Fuck You Want To Public License/i) { 556 $license = "WTFPL $license"; 557 } 558 559 if ($licensetext =~ /(License WTFPL|Under (the|a) WTFPL)/i) { 560 $license = "WTFPL $license"; 561 } 562 563 $license = "UNKNOWN" if (!length($license)); 564 565 # Remove trailing spaces. 566 $license =~ s/\s+$//; 567 568 return $license; 569} 570 571sub fatal($) { 572 my ($pack,$file,$line); 573 ($pack,$file,$line) = caller(); 574 (my $msg = "$progname: fatal error at line $line:\n@_\n") =~ tr/\0//d; 575 $msg =~ s/\n\n$/\n/; 576 die $msg; 577} 578