1#!/usr/bin/env python3 2 3# Copyright The Mbed TLS Contributors 4# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 5# 6# This file is provided under the Apache License 2.0, or the 7# GNU General Public License v2.0 or later. 8# 9# ********** 10# Apache License 2.0: 11# 12# Licensed under the Apache License, Version 2.0 (the "License"); you may 13# not use this file except in compliance with the License. 14# You may obtain a copy of the License at 15# 16# http://www.apache.org/licenses/LICENSE-2.0 17# 18# Unless required by applicable law or agreed to in writing, software 19# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 20# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21# See the License for the specific language governing permissions and 22# limitations under the License. 23# 24# ********** 25# 26# ********** 27# GNU General Public License v2.0 or later: 28# 29# This program is free software; you can redistribute it and/or modify 30# it under the terms of the GNU General Public License as published by 31# the Free Software Foundation; either version 2 of the License, or 32# (at your option) any later version. 33# 34# This program is distributed in the hope that it will be useful, 35# but WITHOUT ANY WARRANTY; without even the implied warranty of 36# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 37# GNU General Public License for more details. 38# 39# You should have received a copy of the GNU General Public License along 40# with this program; if not, write to the Free Software Foundation, Inc., 41# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 42# 43# ********** 44 45""" 46This script checks the current state of the source code for minor issues, 47including incorrect file permissions, presence of tabs, non-Unix line endings, 48trailing whitespace, and presence of UTF-8 BOM. 49Note: requires python 3, must be run from Mbed TLS root. 50""" 51 52import os 53import argparse 54import logging 55import codecs 56import re 57import subprocess 58import sys 59 60 61class FileIssueTracker: 62 """Base class for file-wide issue tracking. 63 64 To implement a checker that processes a file as a whole, inherit from 65 this class and implement `check_file_for_issue` and define ``heading``. 66 67 ``suffix_exemptions``: files whose name ends with a string in this set 68 will not be checked. 69 70 ``path_exemptions``: files whose path (relative to the root of the source 71 tree) matches this regular expression will not be checked. This can be 72 ``None`` to match no path. Paths are normalized and converted to ``/`` 73 separators before matching. 74 75 ``heading``: human-readable description of the issue 76 """ 77 78 suffix_exemptions = frozenset() 79 path_exemptions = None 80 # heading must be defined in derived classes. 81 # pylint: disable=no-member 82 83 def __init__(self): 84 self.files_with_issues = {} 85 86 @staticmethod 87 def normalize_path(filepath): 88 """Normalize ``filepath`` with / as the directory separator.""" 89 filepath = os.path.normpath(filepath) 90 # On Windows, we may have backslashes to separate directories. 91 # We need slashes to match exemption lists. 92 seps = os.path.sep 93 if os.path.altsep is not None: 94 seps += os.path.altsep 95 return '/'.join(filepath.split(seps)) 96 97 def should_check_file(self, filepath): 98 """Whether the given file name should be checked. 99 100 Files whose name ends with a string listed in ``self.suffix_exemptions`` 101 or whose path matches ``self.path_exemptions`` will not be checked. 102 """ 103 for files_exemption in self.suffix_exemptions: 104 if filepath.endswith(files_exemption): 105 return False 106 if self.path_exemptions and \ 107 re.match(self.path_exemptions, self.normalize_path(filepath)): 108 return False 109 return True 110 111 def check_file_for_issue(self, filepath): 112 """Check the specified file for the issue that this class is for. 113 114 Subclasses must implement this method. 115 """ 116 raise NotImplementedError 117 118 def record_issue(self, filepath, line_number): 119 """Record that an issue was found at the specified location.""" 120 if filepath not in self.files_with_issues.keys(): 121 self.files_with_issues[filepath] = [] 122 self.files_with_issues[filepath].append(line_number) 123 124 def output_file_issues(self, logger): 125 """Log all the locations where the issue was found.""" 126 if self.files_with_issues.values(): 127 logger.info(self.heading) 128 for filename, lines in sorted(self.files_with_issues.items()): 129 if lines: 130 logger.info("{}: {}".format( 131 filename, ", ".join(str(x) for x in lines) 132 )) 133 else: 134 logger.info(filename) 135 logger.info("") 136 137BINARY_FILE_PATH_RE_LIST = [ 138 r'docs/.*\.pdf\Z', 139 r'programs/fuzz/corpuses/[^.]+\Z', 140 r'tests/data_files/[^.]+\Z', 141 r'tests/data_files/.*\.(crt|csr|db|der|key|pubkey)\Z', 142 r'tests/data_files/.*\.req\.[^/]+\Z', 143 r'tests/data_files/.*malformed[^/]+\Z', 144 r'tests/data_files/format_pkcs12\.fmt\Z', 145] 146BINARY_FILE_PATH_RE = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST)) 147 148class LineIssueTracker(FileIssueTracker): 149 """Base class for line-by-line issue tracking. 150 151 To implement a checker that processes files line by line, inherit from 152 this class and implement `line_with_issue`. 153 """ 154 155 # Exclude binary files. 156 path_exemptions = BINARY_FILE_PATH_RE 157 158 def issue_with_line(self, line, filepath): 159 """Check the specified line for the issue that this class is for. 160 161 Subclasses must implement this method. 162 """ 163 raise NotImplementedError 164 165 def check_file_line(self, filepath, line, line_number): 166 if self.issue_with_line(line, filepath): 167 self.record_issue(filepath, line_number) 168 169 def check_file_for_issue(self, filepath): 170 """Check the lines of the specified file. 171 172 Subclasses must implement the ``issue_with_line`` method. 173 """ 174 with open(filepath, "rb") as f: 175 for i, line in enumerate(iter(f.readline, b"")): 176 self.check_file_line(filepath, line, i + 1) 177 178 179def is_windows_file(filepath): 180 _root, ext = os.path.splitext(filepath) 181 return ext in ('.bat', '.dsp', '.dsw', '.sln', '.vcxproj') 182 183 184class PermissionIssueTracker(FileIssueTracker): 185 """Track files with bad permissions. 186 187 Files that are not executable scripts must not be executable.""" 188 189 heading = "Incorrect permissions:" 190 191 def check_file_for_issue(self, filepath): 192 is_executable = os.access(filepath, os.X_OK) 193 should_be_executable = filepath.endswith((".sh", ".pl", ".py")) 194 if is_executable != should_be_executable: 195 self.files_with_issues[filepath] = None 196 197 198class EndOfFileNewlineIssueTracker(FileIssueTracker): 199 """Track files that end with an incomplete line 200 (no newline character at the end of the last line).""" 201 202 heading = "Missing newline at end of file:" 203 204 path_exemptions = BINARY_FILE_PATH_RE 205 206 def check_file_for_issue(self, filepath): 207 with open(filepath, "rb") as f: 208 try: 209 f.seek(-1, 2) 210 except OSError: 211 # This script only works on regular files. If we can't seek 212 # 1 before the end, it means that this position is before 213 # the beginning of the file, i.e. that the file is empty. 214 return 215 if f.read(1) != b"\n": 216 self.files_with_issues[filepath] = None 217 218 219class Utf8BomIssueTracker(FileIssueTracker): 220 """Track files that start with a UTF-8 BOM. 221 Files should be ASCII or UTF-8. Valid UTF-8 does not start with a BOM.""" 222 223 heading = "UTF-8 BOM present:" 224 225 suffix_exemptions = frozenset([".vcxproj", ".sln"]) 226 path_exemptions = BINARY_FILE_PATH_RE 227 228 def check_file_for_issue(self, filepath): 229 with open(filepath, "rb") as f: 230 if f.read().startswith(codecs.BOM_UTF8): 231 self.files_with_issues[filepath] = None 232 233 234class UnixLineEndingIssueTracker(LineIssueTracker): 235 """Track files with non-Unix line endings (i.e. files with CR).""" 236 237 heading = "Non-Unix line endings:" 238 239 def should_check_file(self, filepath): 240 if not super().should_check_file(filepath): 241 return False 242 return not is_windows_file(filepath) 243 244 def issue_with_line(self, line, _filepath): 245 return b"\r" in line 246 247 248class WindowsLineEndingIssueTracker(LineIssueTracker): 249 """Track files with non-Windows line endings (i.e. CR or LF not in CRLF).""" 250 251 heading = "Non-Windows line endings:" 252 253 def should_check_file(self, filepath): 254 if not super().should_check_file(filepath): 255 return False 256 return is_windows_file(filepath) 257 258 def issue_with_line(self, line, _filepath): 259 return not line.endswith(b"\r\n") or b"\r" in line[:-2] 260 261 262class TrailingWhitespaceIssueTracker(LineIssueTracker): 263 """Track lines with trailing whitespace.""" 264 265 heading = "Trailing whitespace:" 266 suffix_exemptions = frozenset([".dsp", ".md"]) 267 268 def issue_with_line(self, line, _filepath): 269 return line.rstrip(b"\r\n") != line.rstrip() 270 271 272class TabIssueTracker(LineIssueTracker): 273 """Track lines with tabs.""" 274 275 heading = "Tabs present:" 276 suffix_exemptions = frozenset([ 277 ".pem", # some openssl dumps have tabs 278 ".sln", 279 "/Makefile", 280 "/generate_visualc_files.pl", 281 ]) 282 283 def issue_with_line(self, line, _filepath): 284 return b"\t" in line 285 286 287class MergeArtifactIssueTracker(LineIssueTracker): 288 """Track lines with merge artifacts. 289 These are leftovers from a ``git merge`` that wasn't fully edited.""" 290 291 heading = "Merge artifact:" 292 293 def issue_with_line(self, line, _filepath): 294 # Detect leftover git conflict markers. 295 if line.startswith(b'<<<<<<< ') or line.startswith(b'>>>>>>> '): 296 return True 297 if line.startswith(b'||||||| '): # from merge.conflictStyle=diff3 298 return True 299 if line.rstrip(b'\r\n') == b'=======' and \ 300 not _filepath.endswith('.md'): 301 return True 302 return False 303 304 305class IntegrityChecker: 306 """Sanity-check files under the current directory.""" 307 308 def __init__(self, log_file): 309 """Instantiate the sanity checker. 310 Check files under the current directory. 311 Write a report of issues to log_file.""" 312 self.check_repo_path() 313 self.logger = None 314 self.setup_logger(log_file) 315 self.issues_to_check = [ 316 PermissionIssueTracker(), 317 EndOfFileNewlineIssueTracker(), 318 Utf8BomIssueTracker(), 319 UnixLineEndingIssueTracker(), 320 WindowsLineEndingIssueTracker(), 321 TrailingWhitespaceIssueTracker(), 322 TabIssueTracker(), 323 MergeArtifactIssueTracker(), 324 ] 325 326 @staticmethod 327 def check_repo_path(): 328 if not all(os.path.isdir(d) for d in ["include", "library", "tests"]): 329 raise Exception("Must be run from Mbed TLS root") 330 331 def setup_logger(self, log_file, level=logging.INFO): 332 self.logger = logging.getLogger() 333 self.logger.setLevel(level) 334 if log_file: 335 handler = logging.FileHandler(log_file) 336 self.logger.addHandler(handler) 337 else: 338 console = logging.StreamHandler() 339 self.logger.addHandler(console) 340 341 @staticmethod 342 def collect_files(): 343 bytes_output = subprocess.check_output(['git', 'ls-files', '-z']) 344 bytes_filepaths = bytes_output.split(b'\0')[:-1] 345 ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths) 346 # Prepend './' to files in the top-level directory so that 347 # something like `'/Makefile' in fp` matches in the top-level 348 # directory as well as in subdirectories. 349 return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp) 350 for fp in ascii_filepaths] 351 352 def check_files(self): 353 for issue_to_check in self.issues_to_check: 354 for filepath in self.collect_files(): 355 if issue_to_check.should_check_file(filepath): 356 issue_to_check.check_file_for_issue(filepath) 357 358 def output_issues(self): 359 integrity_return_code = 0 360 for issue_to_check in self.issues_to_check: 361 if issue_to_check.files_with_issues: 362 integrity_return_code = 1 363 issue_to_check.output_file_issues(self.logger) 364 return integrity_return_code 365 366 367def run_main(): 368 parser = argparse.ArgumentParser(description=__doc__) 369 parser.add_argument( 370 "-l", "--log_file", type=str, help="path to optional output log", 371 ) 372 check_args = parser.parse_args() 373 integrity_check = IntegrityChecker(check_args.log_file) 374 integrity_check.check_files() 375 return_code = integrity_check.output_issues() 376 sys.exit(return_code) 377 378 379if __name__ == "__main__": 380 run_main() 381