• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import os
2import re
3import sys
4
5def check_encoding(encoding, scan_dir, regex_pattern):
6  fname = None
7  try:
8    assert encoding in ['ascii', 'utf-8'], "unexpected encoding"
9    cmp = re.compile(regex_pattern)
10    for root, dirs, files in os.walk(scan_dir):
11      fname = root
12      cmp_list = [f for f in files if cmp.search(f) is not None]
13      for f in cmp_list:
14        fname = os.path.join(root, f)
15        with open(fname, mode='rb') as test_file:
16          btext = test_file.read()
17        # check encoding
18        btext.decode(encoding=encoding, errors="strict")
19        if encoding == "utf-8" and btext.startswith(b'\xEF\xBB\xBF'):
20          raise ValueError("unexpected BOM in file")
21        # check strict CRLF line-ending
22        LF = btext.count(b'\r')
23        CRLF = btext.count(b'\r\n')
24        assert LF >= CRLF, "CRLF logic error"
25        if CRLF != LF:
26          raise ValueError("CRLF violation: found {} LF characters".format(LF - CRLF))
27  except Exception as err:
28    print("ERROR with [{}]: {}".format(fname, err))
29    return -1
30  else:
31    return 0
32
33if __name__ == "__main__":
34  # python check-sources.sh.py 'ascii' '.' '.*\.(cpp|h)$'
35  res = check_encoding(sys.argv[1], sys.argv[2], sys.argv[3])
36  sys.exit(0 if res == 0 else -1)
37