1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Checks Java files for illegal imports.""" 6 7 8 9import codecs 10import os 11import re 12 13import results 14from rules import Rule 15 16 17class JavaChecker(object): 18 """Import checker for Java files. 19 20 The CheckFile method uses real filesystem paths, but Java imports work in 21 terms of package names. To deal with this, we have an extra "prescan" pass 22 that reads all the .java files and builds a mapping of class name -> filepath. 23 In CheckFile, we convert each import statement into a real filepath, and check 24 that against the rules in the DEPS files. 25 26 Note that in Java you can always use classes in the same directory without an 27 explicit import statement, so these imports can't be blocked with DEPS files. 28 But that shouldn't be a problem, because same-package imports are pretty much 29 always correct by definition. (If we find a case where this is *not* correct, 30 it probably means the package is too big and needs to be split up.) 31 32 Properties: 33 _classmap: dict of fully-qualified Java class name -> filepath 34 """ 35 36 EXTENSIONS = ['.java'] 37 38 # This regular expression will be used to extract filenames from import 39 # statements. 40 _EXTRACT_IMPORT_PATH = re.compile(r'^import\s+(?:static\s+)?([\w\.]+)\s*;') 41 42 def __init__(self, base_directory, verbose, added_imports=None, 43 allow_multiple_definitions=None): 44 self._base_directory = base_directory 45 self._verbose = verbose 46 self._classmap = {} 47 self._allow_multiple_definitions = allow_multiple_definitions or [] 48 if added_imports: 49 added_classset = self._PrescanImportFiles(added_imports) 50 self._PrescanFiles(added_classset) 51 52 def _GetClassFullName(self, filepath): 53 """Get the full class name of a file with package name.""" 54 if not os.path.isfile(filepath): 55 return None 56 with codecs.open(filepath, encoding='utf-8') as f: 57 short_class_name, _ = os.path.splitext(os.path.basename(filepath)) 58 for line in f: 59 for package in re.findall(r'^package\s+([\w\.]+);', line): 60 return package + '.' + short_class_name 61 62 def _IgnoreDir(self, d): 63 # Skip hidden directories. 64 if d.startswith('.'): 65 return True 66 # Skip the "out" directory, as dealing with generated files is awkward. 67 # We don't want paths like "out/Release/lib.java" in our DEPS files. 68 # TODO(husky): We need some way of determining the "real" path to 69 # a generated file -- i.e., where it would be in source control if 70 # it weren't generated. 71 if d.startswith('out') or d in ('xcodebuild', 'AndroidStudioDefault', 72 'libassistant',): 73 return True 74 # Skip third-party directories. 75 if d in ('third_party', 'ThirdParty'): 76 return True 77 return False 78 79 def _PrescanFiles(self, added_classset): 80 for root, dirs, files in os.walk(self._base_directory): 81 # Skip unwanted subdirectories. TODO(husky): it would be better to do 82 # this via the skip_child_includes flag in DEPS files. Maybe hoist this 83 # prescan logic into checkdeps.py itself? 84 # Modify dirs in-place with slice assignment to avoid recursing into them. 85 dirs[:] = [d for d in dirs if not self._IgnoreDir(d)] 86 for f in files: 87 if f.endswith('.java'): 88 self._PrescanFile(os.path.join(root, f), added_classset) 89 90 def _PrescanImportFiles(self, added_imports): 91 """Build a set of fully-qualified class affected by this patch. 92 93 Prescan imported files and build classset to collect full class names 94 with package name. This includes both changed files as well as changed 95 imports. 96 97 Args: 98 added_imports : ((file_path, (import_line, import_line, ...), ...) 99 100 Return: 101 A set of full class names with package name of imported files. 102 """ 103 classset = set() 104 for filepath, changed_lines in (added_imports or []): 105 if not self.ShouldCheck(filepath): 106 continue 107 full_class_name = self._GetClassFullName(filepath) 108 if full_class_name: 109 classset.add(full_class_name) 110 for line in changed_lines: 111 found_item = self._EXTRACT_IMPORT_PATH.match(line) 112 if found_item: 113 classset.add(found_item.group(1)) 114 return classset 115 116 def _PrescanFile(self, filepath, added_classset): 117 if self._verbose: 118 print('Prescanning: ' + filepath) 119 full_class_name = self._GetClassFullName(filepath) 120 if full_class_name: 121 if full_class_name in self._classmap: 122 if self._verbose or full_class_name in added_classset: 123 if not any(re.match(i, filepath) for i in 124 self._allow_multiple_definitions): 125 print('WARNING: multiple definitions of %s:' % full_class_name) 126 print(' ' + filepath) 127 print(' ' + self._classmap[full_class_name]) 128 print() 129 # Prefer the public repo when multiple matches are found. 130 if self._classmap[full_class_name].startswith( 131 os.path.join(self._base_directory, 'clank')): 132 self._classmap[full_class_name] = filepath 133 else: 134 self._classmap[full_class_name] = filepath 135 elif self._verbose: 136 print('WARNING: no package definition found in %s' % filepath) 137 138 def CheckLine(self, rules, line, filepath, fail_on_temp_allow=False): 139 """Checks the given line with the given rule set. 140 141 Returns a tuple (is_import, dependency_violation) where 142 is_import is True only if the line is an import 143 statement, and dependency_violation is an instance of 144 results.DependencyViolation if the line violates a rule, or None 145 if it does not. 146 """ 147 found_item = self._EXTRACT_IMPORT_PATH.match(line) 148 if not found_item: 149 return False, None # Not a match 150 clazz = found_item.group(1) 151 if clazz not in self._classmap: 152 # Importing a class from outside the Chromium tree. That's fine -- 153 # it's probably a Java or Android system class. 154 return True, None 155 import_path = os.path.relpath( 156 self._classmap[clazz], self._base_directory) 157 # Convert Windows paths to Unix style, as used in DEPS files. 158 import_path = import_path.replace(os.path.sep, '/') 159 rule = rules.RuleApplyingTo(import_path, filepath) 160 if (rule.allow == Rule.DISALLOW or 161 (fail_on_temp_allow and rule.allow == Rule.TEMP_ALLOW)): 162 return True, results.DependencyViolation(import_path, rule, rules) 163 return True, None 164 165 def CheckFile(self, rules, filepath): 166 if self._verbose: 167 print('Checking: ' + filepath) 168 169 dependee_status = results.DependeeStatus(filepath) 170 with codecs.open(filepath, encoding='utf-8') as f: 171 for line in f: 172 is_import, violation = self.CheckLine(rules, line, filepath) 173 if violation: 174 dependee_status.AddViolation(violation) 175 if '{' in line: 176 # This is code, so we're finished reading imports for this file. 177 break 178 179 return dependee_status 180 181 @staticmethod 182 def IsJavaFile(filepath): 183 """Returns True if the given path ends in the extensions 184 handled by this checker. 185 """ 186 return os.path.splitext(filepath)[1] in JavaChecker.EXTENSIONS 187 188 def ShouldCheck(self, file_path): 189 """Check if the new import file path should be presubmit checked. 190 191 Args: 192 file_path: file path to be checked 193 194 Return: 195 bool: True if the file should be checked; False otherwise. 196 """ 197 return self.IsJavaFile(file_path) 198