1import os 2import re 3import sys 4 5def check_encoding(encoding, scan_dir, regex_pattern): 6 fname = None 7 try: 8 assert encoding in ['ascii', 'utf-8'], "unexpected encoding" 9 cmp = re.compile(regex_pattern) 10 for root, dirs, files in os.walk(scan_dir): 11 fname = root 12 cmp_list = [f for f in files if cmp.search(f) is not None] 13 for f in cmp_list: 14 fname = os.path.join(root, f) 15 with open(fname, mode='rb') as test_file: 16 btext = test_file.read() 17 # check encoding 18 btext.decode(encoding=encoding, errors="strict") 19 if encoding == "utf-8" and btext.startswith(b'\xEF\xBB\xBF'): 20 raise ValueError("unexpected BOM in file") 21 # check strict CRLF line-ending 22 LF = btext.count(b'\r') 23 CRLF = btext.count(b'\r\n') 24 assert LF >= CRLF, "CRLF logic error" 25 if CRLF != LF: 26 raise ValueError("CRLF violation: found {} LF characters".format(LF - CRLF)) 27 except Exception as err: 28 print("ERROR with [{}]: {}".format(fname, err)) 29 return -1 30 else: 31 return 0 32 33if __name__ == "__main__": 34 # python check-sources.sh.py 'ascii' '.' '.*\.(cpp|h)$' 35 res = check_encoding(sys.argv[1], sys.argv[2], sys.argv[3]) 36 sys.exit(0 if res == 0 else -1) 37