1# Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com) 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions 5# are met: 6# 1. Redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer. 8# 2. Redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution. 11# 12# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY 13# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY 16# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 23"""Contains filter-related code.""" 24 25 26def validate_filter_rules(filter_rules, all_categories): 27 """Validate the given filter rules, and raise a ValueError if not valid. 28 29 Args: 30 filter_rules: A list of boolean filter rules, for example-- 31 ["-whitespace", "+whitespace/braces"] 32 all_categories: A list of all available category names, for example-- 33 ["whitespace/tabs", "whitespace/braces"] 34 35 Raises: 36 ValueError: An error occurs if a filter rule does not begin 37 with "+" or "-" or if a filter rule does not match 38 the beginning of some category name in the list 39 of all available categories. 40 41 """ 42 for rule in filter_rules: 43 if not (rule.startswith('+') or rule.startswith('-')): 44 raise ValueError('Invalid filter rule "%s": every rule ' 45 "must start with + or -." % rule) 46 47 for category in all_categories: 48 if category.startswith(rule[1:]): 49 break 50 else: 51 raise ValueError('Suspected incorrect filter rule "%s": ' 52 "the rule does not match the beginning " 53 "of any category name." % rule) 54 55 56class _CategoryFilter(object): 57 58 """Filters whether to check style categories.""" 59 60 def __init__(self, filter_rules=None): 61 """Create a category filter. 62 63 Args: 64 filter_rules: A list of strings that are filter rules, which 65 are strings beginning with the plus or minus 66 symbol (+/-). The list should include any 67 default filter rules at the beginning. 68 Defaults to the empty list. 69 70 Raises: 71 ValueError: Invalid filter rule if a rule does not start with 72 plus ("+") or minus ("-"). 73 74 """ 75 if filter_rules is None: 76 filter_rules = [] 77 78 self._filter_rules = filter_rules 79 self._should_check_category = {} # Cached dictionary of category to True/False 80 81 def __str__(self): 82 return ",".join(self._filter_rules) 83 84 # Useful for unit testing. 85 def __eq__(self, other): 86 """Return whether this CategoryFilter instance is equal to another.""" 87 return self._filter_rules == other._filter_rules 88 89 # Useful for unit testing. 90 def __ne__(self, other): 91 # Python does not automatically deduce from __eq__(). 92 return not (self == other) 93 94 def should_check(self, category): 95 """Return whether the category should be checked. 96 97 The rules for determining whether a category should be checked 98 are as follows. By default all categories should be checked. 99 Then apply the filter rules in order from first to last, with 100 later flags taking precedence. 101 102 A filter rule applies to a category if the string after the 103 leading plus/minus (+/-) matches the beginning of the category 104 name. A plus (+) means the category should be checked, while a 105 minus (-) means the category should not be checked. 106 107 """ 108 if category in self._should_check_category: 109 return self._should_check_category[category] 110 111 should_check = True # All categories checked by default. 112 for rule in self._filter_rules: 113 if not category.startswith(rule[1:]): 114 continue 115 should_check = rule.startswith('+') 116 self._should_check_category[category] = should_check # Update cache. 117 return should_check 118 119 120class FilterConfiguration(object): 121 122 """Supports filtering with path-specific and user-specified rules.""" 123 124 def __init__(self, base_rules=None, path_specific=None, user_rules=None): 125 """Create a FilterConfiguration instance. 126 127 Args: 128 base_rules: The starting list of filter rules to use for 129 processing. The default is the empty list, which 130 by itself would mean that all categories should be 131 checked. 132 133 path_specific: A list of (sub_paths, path_rules) pairs 134 that stores the path-specific filter rules for 135 appending to the base rules. 136 The "sub_paths" value is a list of path 137 substrings. If a file path contains one of the 138 substrings, then the corresponding path rules 139 are appended. The first substring match takes 140 precedence, i.e. only the first match triggers 141 an append. 142 The "path_rules" value is a list of filter 143 rules that can be appended to the base rules. 144 145 user_rules: A list of filter rules that is always appended 146 to the base rules and any path rules. In other 147 words, the user rules take precedence over the 148 everything. In practice, the user rules are 149 provided by the user from the command line. 150 151 """ 152 if base_rules is None: 153 base_rules = [] 154 if path_specific is None: 155 path_specific = [] 156 if user_rules is None: 157 user_rules = [] 158 159 self._base_rules = base_rules 160 self._path_specific = path_specific 161 self._path_specific_lower = None 162 """The backing store for self._get_path_specific_lower().""" 163 164 self._user_rules = user_rules 165 166 self._path_rules_to_filter = {} 167 """Cached dictionary of path rules to CategoryFilter instance.""" 168 169 # The same CategoryFilter instance can be shared across 170 # multiple keys in this dictionary. This allows us to take 171 # greater advantage of the caching done by 172 # CategoryFilter.should_check(). 173 self._path_to_filter = {} 174 """Cached dictionary of file path to CategoryFilter instance.""" 175 176 # Useful for unit testing. 177 def __eq__(self, other): 178 """Return whether this FilterConfiguration is equal to another.""" 179 if self._base_rules != other._base_rules: 180 return False 181 if self._path_specific != other._path_specific: 182 return False 183 if self._user_rules != other._user_rules: 184 return False 185 186 return True 187 188 # Useful for unit testing. 189 def __ne__(self, other): 190 # Python does not automatically deduce this from __eq__(). 191 return not self.__eq__(other) 192 193 # We use the prefix "_get" since the name "_path_specific_lower" 194 # is already taken up by the data attribute backing store. 195 def _get_path_specific_lower(self): 196 """Return a copy of self._path_specific with the paths lower-cased.""" 197 if self._path_specific_lower is None: 198 self._path_specific_lower = [] 199 for (sub_paths, path_rules) in self._path_specific: 200 sub_paths = map(str.lower, sub_paths) 201 self._path_specific_lower.append((sub_paths, path_rules)) 202 return self._path_specific_lower 203 204 def _path_rules_from_path(self, path): 205 """Determine the path-specific rules to use, and return as a tuple. 206 207 This method returns a tuple rather than a list so the return 208 value can be passed to _filter_from_path_rules() without change. 209 210 """ 211 path = path.lower() 212 for (sub_paths, path_rules) in self._get_path_specific_lower(): 213 for sub_path in sub_paths: 214 if path.find(sub_path) > -1: 215 return tuple(path_rules) 216 return () # Default to the empty tuple. 217 218 def _filter_from_path_rules(self, path_rules): 219 """Return the CategoryFilter associated to the given path rules. 220 221 Args: 222 path_rules: A tuple of path rules. We require a tuple rather 223 than a list so the value can be used as a dictionary 224 key in self._path_rules_to_filter. 225 226 """ 227 # We reuse the same CategoryFilter where possible to take 228 # advantage of the caching they do. 229 if path_rules not in self._path_rules_to_filter: 230 rules = list(self._base_rules) # Make a copy 231 rules.extend(path_rules) 232 rules.extend(self._user_rules) 233 self._path_rules_to_filter[path_rules] = _CategoryFilter(rules) 234 235 return self._path_rules_to_filter[path_rules] 236 237 def _filter_from_path(self, path): 238 """Return the CategoryFilter associated to a path.""" 239 if path not in self._path_to_filter: 240 path_rules = self._path_rules_from_path(path) 241 filter = self._filter_from_path_rules(path_rules) 242 self._path_to_filter[path] = filter 243 244 return self._path_to_filter[path] 245 246 def should_check(self, category, path): 247 """Return whether the given category should be checked. 248 249 This method determines whether a category should be checked 250 by checking the category name against the filter rules for 251 the given path. 252 253 For a given path, the filter rules are the combination of 254 the base rules, the path-specific rules, and the user-provided 255 rules -- in that order. As we will describe below, later rules 256 in the list take precedence. The path-specific rules are the 257 rules corresponding to the first element of the "path_specific" 258 parameter that contains a string case-insensitively matching 259 some substring of the path. If there is no such element, 260 there are no path-specific rules for that path. 261 262 Given a list of filter rules, the logic for determining whether 263 a category should be checked is as follows. By default all 264 categories should be checked. Then apply the filter rules in 265 order from first to last, with later flags taking precedence. 266 267 A filter rule applies to a category if the string after the 268 leading plus/minus (+/-) matches the beginning of the category 269 name. A plus (+) means the category should be checked, while a 270 minus (-) means the category should not be checked. 271 272 Args: 273 category: The category name. 274 path: The path of the file being checked. 275 276 """ 277 return self._filter_from_path(path).should_check(category) 278 279