1# 2# Copyright (C) 2017 and later: Unicode, Inc. and others. 3# License & terms of use: http://www.unicode.org/copyright.html 4# 5# *********************************************************************** 6# * COPYRIGHT: 7# * Copyright (c) 2011, International Business Machines Corporation 8# * and others. All Rights Reserved. 9# *********************************************************************** 10# 11# Common functionality between cpysearch.pl and cpyscan.pl 12# 13 14package Cpy; 15use strict; 16use warnings; 17use base 'Exporter'; 18 19our @EXPORT = qw(any glob_to_regex should_ignore); 20 21# any(CODE, LIST) 22# Evaluate CODE for each element of LIST till CODE($_) returns 1. Return 0 if 23# not found. 24sub any(&@) { 25 my $code = shift; 26 local $_; 27 &$code && return 1 for @_; 28 0; 29} 30 31# Perl doesn't have fnmatch. Closest thing is File::FnMatch but that's 32# UNIX-only (see its caveats). So, as a workaround, convert globs to regular 33# expressions. Translated from Python's fnmatch module. 34sub glob_to_regex($) { 35 my ($glob, $i, $len, $regex); 36 $glob = shift; 37 $i = 0; 38 $len = length($glob); 39 $regex = ""; 40 41 # charat(STR, IDX) 42 # Return the character in the argument at the given index. 43 my $charat = sub($$) { return substr(shift, shift, 1) }; 44 45 while ($i < $len) { 46 my ($c, $out); 47 $c = &$charat($glob, $i++); 48 if ($c eq '*') { $out = '.*' } 49 elsif ($c eq '?') { $out = '.' } 50 elsif ($c eq '[') { # glob classes 51 my $j = $i; 52 53 # Get the closing index of the class. ] appearing here is part 54 # of the class. 55 if ($j < $len && &$charat($glob, $j) eq '!') { $j++ } 56 if ($j < $len && &$charat($glob, $j) eq ']') { $j++ } 57 while ($j < $len && &$charat($glob, $j) ne ']') { $j++ } 58 59 # Didn't find closing brace. Use literal [ 60 if ($j >= $len) { $out = "\\[" } 61 62 else { 63 # The complete class contents (except the braces) 64 my $s = substr($regex, $i, $j - $i); 65 $s =~ s/\\/\\\\/g; 66 $i = $j + 1; # change position to outside class 67 68 # Negation 69 if (&$charat($s, 0) eq '!') { $s = '^'.substr($s, 1); } 70 # Literal ^ 71 elsif (&$charat($s, 0) eq '^') { $s = '\\'.$s; } 72 73 $out = "[$s]"; 74 } 75 } 76 else { $out = quotemeta($c) } 77 $regex .= $out; 78 } 79 return $regex; 80} 81 82# Load cpyskip.txt contents. 83# Try local .cpyskip.txt 84# no support for HTTP fetch. 85our $cpyskip_file = ".cpyskip.txt"; 86our @cpyskip_lines; 87if (open(our $cpyskip_fh, "<", $cpyskip_file)) { 88 @cpyskip_lines = <$cpyskip_fh>; 89 close $cpyskip_fh; 90 # print "Using local cpyskip.txt\n"; 91} else { 92 die "Could not open $cpyskip_file"; 93} 94our @ignore_globs = map { chomp; glob_to_regex($_) } 95 grep { /^\s*[^#\s]+/ } 96 @cpyskip_lines; 97 98#for my $rgx (@ignore_globs) {print $rgx . "\n"} 99#exit(0); 100 101# list of file extensions to ignore 102our @ignore_extensions = qw(svn dll ilk idb pdb dsp dsw opt ncb vcproj sln suo 103 cvsignore cnv res icu exe obj bin exp lib out plg jar spp stub policy ttf 104 TTF otf); 105our $ignore_exts = join '|', 106 map { "\\.$_" } 107 @ignore_extensions; 108 109# ignore regex 110our $ignore_regex = "data/out/build|CVS|\\~|\\#|Debug|Release|positions|unidata|sources\.txt|$ignore_exts"; 111 112# Check if this file should be ignored. 113sub should_ignore($) { 114 my $filename = shift; 115 return 1 if $filename eq $cpyskip_file; 116 return 1 if $filename =~ /$ignore_regex/; 117 for my $r (@ignore_globs) { return 1 if $filename =~ /$r/ } 118 0; 119} 120 1211; 122