1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.loganalysis.util; 17 18 import java.util.ArrayList; 19 import java.util.Arrays; 20 import java.util.LinkedHashMap; 21 import java.util.List; 22 import java.util.Map; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 /** 27 * The RegexTrie is a trie where each _stored_ segment of the key is a regex {@link Pattern}. Thus, 28 * the full _stored_ key is a List<Pattern> rather than a String as in a standard trie. Note that 29 * the {@link #get(Object key)} method requires a List<String>, which will be matched against the 30 * {@link Pattern}s, rather than checked for equality as in a standard trie. It will likely perform 31 * poorly for large datasets. 32 * <p /> 33 * One can also use a {@code null} entry in the {@code Pattern} sequence to serve as a wildcard. If 34 * a {@code null} is encountered, all subsequent entries in the sequence will be ignored. 35 * When the retrieval code encounters a {@code null} {@code Pattern}, it will first wait to see if a 36 * more-specific entry matches the sequence. If one does, that more-specific entry will proceed, 37 * even if it subsequently fails to match. 38 * <p /> 39 * If no more-specific entry matches, the wildcard match will add all remaining {@code String}s 40 * to the list of captures (if enabled) and return the value associated with the wildcard. 41 * <p /> 42 * A short sample of the wildcard functionality: 43 * <pre> 44 * List<List<String>> captures = new LinkedList<List<String>>(); 45 * RegexTrie<Integer> trie = new RegexTrie<Integer>(); 46 * trie.put(2, "a", null); 47 * trie.put(4, "a", "b"); 48 * trie.retrieve(captures, "a", "c", "e"); 49 * // returns 2. captures is now [[], ["c"], ["e"]] 50 * trie.retrieve(captures, "a", "b"); 51 * // returns 4. captures is now [[], []] 52 * trie.retrieve(captures, "a", "b", "c"); 53 * // returns null. captures is now [[], []] 54 * </pre> 55 */ 56 //TODO: Use libTF once this is copied over. 57 public class RegexTrie<V> { 58 private V mValue = null; 59 private Map<CompPattern, RegexTrie<V>> mChildren = 60 new LinkedHashMap<CompPattern, RegexTrie<V>>(); 61 62 /** 63 * Patterns aren't comparable by default, which prevents you from retrieving them from a 64 * HashTable. This is a simple stub class that makes a Pattern with a working 65 * {@link CompPattern#equals()} method. 66 */ 67 static class CompPattern { 68 protected final Pattern mPattern; 69 CompPattern(Pattern pattern)70 CompPattern(Pattern pattern) { 71 if (pattern == null) { 72 throw new NullPointerException(); 73 } 74 mPattern = pattern; 75 } 76 77 @Override equals(Object other)78 public boolean equals(Object other) { 79 Pattern otherPat; 80 if (other instanceof Pattern) { 81 otherPat = (Pattern) other; 82 } else if (other instanceof CompPattern) { 83 CompPattern otherCPat = (CompPattern) other; 84 otherPat = otherCPat.mPattern; 85 } else { 86 return false; 87 } 88 return mPattern.toString().equals(otherPat.toString()); 89 } 90 91 @Override hashCode()92 public int hashCode() { 93 return mPattern.toString().hashCode(); 94 } 95 96 @Override toString()97 public String toString() { 98 return String.format("CP(%s)", mPattern.toString()); 99 } 100 matcher(String string)101 public Matcher matcher(String string) { 102 return mPattern.matcher(string); 103 } 104 } 105 clear()106 public void clear() { 107 mValue = null; 108 for (RegexTrie<V> child : mChildren.values()) { 109 child.clear(); 110 } 111 mChildren.clear(); 112 } 113 containsKey(String... strings)114 boolean containsKey(String... strings) { 115 return retrieve(strings) != null; 116 } 117 recursivePut(V value, List<CompPattern> patterns)118 V recursivePut(V value, List<CompPattern> patterns) { 119 // Cases: 120 // 1) patterns is empty -- set our value 121 // 2) patterns is non-empty -- recurse downward, creating a child if necessary 122 if (patterns.isEmpty()) { 123 V oldValue = mValue; 124 mValue = value; 125 return oldValue; 126 } else { 127 CompPattern curKey = patterns.get(0); 128 List<CompPattern> nextKeys = patterns.subList(1, patterns.size()); 129 130 // Create a new child to handle 131 RegexTrie<V> nextChild = mChildren.get(curKey); 132 if (nextChild == null) { 133 nextChild = new RegexTrie<V>(); 134 mChildren.put(curKey, nextChild); 135 } 136 return nextChild.recursivePut(value, nextKeys); 137 } 138 } 139 140 /** 141 * A helper method to consolidate validation before adding an entry to the trie. 142 * 143 * @param value The value to set 144 * @param patterns The sequence of {@link CompPattern}s that must be sequentially matched to 145 * retrieve the associated {@code value} 146 */ validateAndPut(V value, List<CompPattern> pList)147 private V validateAndPut(V value, List<CompPattern> pList) { 148 if (pList.size() == 0) { 149 throw new IllegalArgumentException("pattern list must be non-empty."); 150 } 151 return recursivePut(value, pList); 152 } 153 154 /** 155 * Add an entry to the trie. 156 * 157 * @param value The value to set 158 * @param patterns The sequence of {@link Pattern}s that must be sequentially matched to 159 * retrieve the associated {@code value} 160 */ put(V value, Pattern... patterns)161 public V put(V value, Pattern... patterns) { 162 List<CompPattern> pList = new ArrayList<CompPattern>(patterns.length); 163 for (Pattern pat : patterns) { 164 if (pat == null) { 165 pList.add(null); 166 break; 167 } 168 pList.add(new CompPattern(pat)); 169 } 170 return validateAndPut(value, pList); 171 } 172 173 /** 174 * This helper method takes a list of regular expressions as {@link String}s and compiles them 175 * on-the-fly before adding the subsequent {@link Pattern}s to the trie 176 * 177 * @param value The value to set 178 * @param patterns The sequence of regular expressions (as {@link String}s) that must be 179 * sequentially matched to retrieve the associated {@code value}. Each String will be 180 * compiled as a {@link Pattern} before invoking {@link #put(V, Pattern...)}. 181 */ put(V value, String... regexen)182 public V put(V value, String... regexen) { 183 List<CompPattern> pList = new ArrayList<CompPattern>(regexen.length); 184 for (String regex : regexen) { 185 if (regex == null) { 186 pList.add(null); 187 break; 188 } 189 Pattern pat = Pattern.compile(regex); 190 pList.add(new CompPattern(pat)); 191 } 192 return validateAndPut(value, pList); 193 } 194 recursiveRetrieve(List<List<String>> captures, List<String> strings)195 V recursiveRetrieve(List<List<String>> captures, List<String> strings) { 196 // Cases: 197 // 1) strings is empty -- return our value 198 // 2) strings is non-empty -- find the first child that matches, recurse downward 199 if (strings.isEmpty()) { 200 return mValue; 201 } else { 202 boolean wildcardMatch = false; 203 V wildcardValue = null; 204 String curKey = strings.get(0); 205 List<String> nextKeys = strings.subList(1, strings.size()); 206 207 for (Map.Entry<CompPattern, RegexTrie<V>> child : mChildren.entrySet()) { 208 CompPattern pattern = child.getKey(); 209 if (pattern == null) { 210 wildcardMatch = true; 211 wildcardValue = child.getValue().getValue(); 212 continue; 213 } 214 215 Matcher matcher = pattern.matcher(curKey); 216 if (matcher.matches()) { 217 if (captures != null) { 218 List<String> curCaptures = new ArrayList<String>(matcher.groupCount()); 219 for (int i = 0; i < matcher.groupCount(); i++) { 220 // i+1 since group 0 is the entire matched string 221 curCaptures.add(matcher.group(i+1)); 222 } 223 captures.add(curCaptures); 224 } 225 226 return child.getValue().recursiveRetrieve(captures, nextKeys); 227 } 228 } 229 230 if (wildcardMatch) { 231 // Stick the rest of the query string into the captures list and return 232 if (captures != null) { 233 for (String str : strings) { 234 captures.add(Arrays.asList(str)); 235 } 236 } 237 return wildcardValue; 238 } 239 240 // no match 241 return null; 242 } 243 } 244 245 /** 246 * Fetch a value from the trie, by matching the provided sequence of {@link String}s to a 247 * sequence of {@link Pattern}s stored in the trie. 248 * 249 * @param strings A sequence of {@link String}s to match 250 * @return The associated value, or {@code null} if no value was found 251 */ retrieve(String... strings)252 public V retrieve(String... strings) { 253 return retrieve(null, strings); 254 } 255 256 /** 257 * Fetch a value from the trie, by matching the provided sequence of {@link String}s to a 258 * sequence of {@link Pattern}s stored in the trie. This version of the method also returns 259 * a {@link List} of capture groups for each {@link Pattern} that was matched. 260 * <p /> 261 * Each entry in the outer List corresponds to one level of {@code Pattern} in the trie. 262 * For each level, the list of capture groups will be stored. If there were no captures 263 * for a particular level, an empty list will be stored. 264 * <p /> 265 * Note that {@code captures} will be {@link List#clear()}ed before the retrieval begins. 266 * Also, if the retrieval fails after a partial sequence of matches, {@code captures} will 267 * still reflect the capture groups from the partial match. 268 * 269 * @param captures A {@code List<List<String>>} through which capture groups will be returned. 270 * @param strings A sequence of {@link String}s to match 271 * @return The associated value, or {@code null} if no value was found 272 */ retrieve(List<List<String>> captures, String... strings)273 public V retrieve(List<List<String>> captures, String... strings) { 274 if (strings.length == 0) { 275 throw new IllegalArgumentException("string list must be non-empty"); 276 } 277 List<String> sList = Arrays.asList(strings); 278 if (captures != null) { 279 captures.clear(); 280 } 281 return recursiveRetrieve(captures, sList); 282 } 283 getValue()284 private V getValue() { 285 return mValue; 286 } 287 288 @Override toString()289 public String toString() { 290 return String.format("{V: %s, C: %s}", mValue, mChildren); 291 } 292 } 293 294