Lines Matching +full:bom +full:- +full:path
4 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
8 including incorrect file permissions, presence of tabs, non-Unix line endings,
9 trailing whitespace, and presence of UTF-8 BOM.
21 from typing import FrozenSet, Optional, Pattern # pylint: disable=unused-import
25 import scripts_path # pylint: disable=unused-import
30 """Base class for file-wide issue tracking.
38 ``path_exemptions``: files whose path (relative to the root of the source
40 ``None`` to match no path. Paths are normalized and converted to ``/``
43 ``heading``: human-readable description of the issue
49 # pylint: disable=no-member
57 filepath = os.path.normpath(filepath)
60 seps = os.path.sep
61 if os.path.altsep is not None:
62 seps += os.path.altsep
69 or whose path matches ``self.path_exemptions`` will not be checked.
118 """Base class for line-by-line issue tracking.
149 _root, ext = os.path.splitext(filepath)
183 _shebang_re = re.compile(rb'^#! ?(?:/bin/(bash|sh)(?: -[^\n ]*)?'
209 # Shebang on a non-executable file
229 f.seek(-1, 2)
240 """Track files that start with a UTF-8 BOM.
241 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM."""
243 heading = "UTF-8 BOM present:"
257 heading = "Invalid UTF-8 or forbidden character:"
259 # Only allow valid UTF-8, and only other explicitly allowed characters.
260 # We deliberately exclude all characters that aren't a simple non-blank,
261 # non-zero-width glyph, apart from a very small set (tab, ordinary space,
262 # line breaks, "basic" no-break space and soft hyphen). In particular,
263 # non-ASCII control characters, combinig characters, and Unicode state
264 # changes (e.g. right-to-left text) are forbidden.
266 # for example '-' (U+002D HYPHEN-MINUS) vs '' (U+00AD SOFT HYPHEN) vs
270 '\t\n\r -~', # ASCII (tabs and line endings are checked separately)
271 '\u00A0-\u00FF', # Latin-1 Supplement (for NO-BREAK SPACE and punctuation)
272 '\u2010-\u2027\u2030-\u205E', # General Punctuation (printable)
273 '\u2070\u2071\u2074-\u208E\u2090-\u209C', # Superscripts and Subscripts
274 '\u2190-\u21FF', # Arrows
275 '\u2200-\u22FF', # Mathematical Symbols
276 '\u2500-\u257F' # Box Drawings characters used in markdown trees
284 text = line.decode('utf-8')
288 # Strip BOM (U+FEFF ZERO WIDTH NO-BREAK SPACE) at the beginning.
289 # Which files are allowed to have a BOM is handled in
295 """Track files with non-Unix line endings (i.e. files with CR)."""
297 heading = "Non-Unix line endings:"
309 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF)."""
311 heading = "Non-Windows line endings:"
319 return not line.endswith(b"\r\n") or b"\r" in line[:-2]
367 """Sanity-check files under the current directory."""
401 bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
402 bytes_filepaths = bytes_output.split(b'\0')[:-1]
404 # Prepend './' to files in the top-level directory so that
405 # something like `'/Makefile' in fp` matches in the top-level
407 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
428 "-l", "--log_file", type=str, help="path to optional output log",