1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.Comparator; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.LinkedHashMap; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Map.Entry; 12 import java.util.Set; 13 import java.util.TreeSet; 14 import java.util.regex.Matcher; 15 import java.util.regex.Pattern; 16 17 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 18 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 19 import org.unicode.cldr.util.RegexFileParser.VariableProcessor; 20 import org.unicode.cldr.util.RegexLookup.Finder; 21 import org.unicode.cldr.util.RegexLookup.Finder.Info; 22 23 import com.google.common.base.Splitter; 24 import com.ibm.icu.text.Transform; 25 import com.ibm.icu.util.Output; 26 27 /** 28 * Lookup items according to a set of regex patterns. Returns the value according to the first pattern that matches. Not 29 * thread-safe. 30 * 31 * @param <T> 32 */ 33 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> { 34 protected static final String SEPARATOR = "; "; 35 private VariableReplacer variables = new VariableReplacer(); 36 private StorageInterfaceBase<T> storage; 37 private Map<Finder, T> MEntries; 38 private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform; 39 private Transform<String, ? extends T> valueTransform; 40 private Merger<T> valueMerger; 41 private final boolean allowNull = false; 42 private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 43 44 public enum LookupType { 45 STAR_PATTERN_LOOKUP, OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, STANDARD 46 } 47 48 private LookupType _lookupType; 49 50 /* 51 * STAR_PATTERN_LOOKUP 52 * 53 * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below. 54 * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur 55 * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression, 56 * thus greatly reducing the number of actual regex matches that need to occur. 57 */ 58 59 /* 60 * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP 61 * 62 * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored. 63 * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/' 64 * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"] 65 * 66 * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored. 67 * However regex's no longer need to follow any specific format (Slower but more versatile). 68 */ 69 RegexLookup(LookupType type)70 public RegexLookup(LookupType type) { 71 _lookupType = type; 72 switch (type) { 73 case STAR_PATTERN_LOOKUP: 74 // SPEntries = new StarPatternMap<T>(); 75 storage = new StarPatternMap<>(); 76 break; 77 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 78 // RTEntries = new RegexTree<T>(); 79 storage = new RegexTree<>(); 80 break; 81 default: 82 MEntries = new LinkedHashMap<>(); 83 break; 84 } 85 } 86 RegexLookup()87 public RegexLookup() { 88 this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP); 89 // _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP; 90 // RTEntries = new RegexTree<T>(); 91 } 92 93 public abstract static class Finder { 94 public static class Info { 95 public String[] value; 96 } 97 98 // abstract public String[] getInfo(); 99 100 // abstract public boolean find(String item, Object context); 101 find(String item, Object context, Info info)102 abstract public boolean find(String item, Object context, Info info); 103 matches(String item, Object context, Info info)104 abstract public boolean matches(String item, Object context, Info info); 105 getFailPoint(String source)106 public int getFailPoint(String source) { 107 return -1; 108 } 109 // must also define toString 110 } 111 112 public static class RegexFinder extends Finder { 113 /** 114 * The matcher used by this RegexFinder 115 */ 116 private final Matcher matcher; 117 118 /** 119 * The Pattern used by this RegexFinder 120 */ 121 protected final Pattern pattern; 122 RegexFinder(String pattern)123 public RegexFinder(String pattern) { 124 this.pattern = Pattern.compile(pattern, Pattern.COMMENTS); 125 matcher = this.pattern.matcher(""); 126 } 127 128 /** 129 * Call Matches on the pattern, returning additional information in the Info field, 130 * if it is non null 131 */ 132 @Override matches(String item, Object context, Info info)133 public boolean matches(String item, Object context, Info info) { 134 synchronized (matcher) { 135 try { 136 boolean result = matcher.reset(item).matches(); 137 extractInfo(info, result); 138 return result; 139 } catch (StringIndexOutOfBoundsException e) { 140 // We don't know what causes this error (cldrbug 5051) so 141 // make the exception message more detailed. 142 throw new IllegalArgumentException("Matching error caused by pattern: [" 143 + matcher.toString() + "] on text: [" + item + "]", e); 144 } 145 } 146 } 147 148 /** 149 * Extract match related information into the info field, if result is true, and info 150 * is not null. 151 * @param info 152 * @param result 153 */ extractInfo(Info info, boolean result)154 private void extractInfo(Info info, boolean result) { 155 if (result && info != null) { 156 int limit = matcher.groupCount() + 1; 157 String[] value = new String[limit]; 158 for (int i = 0; i < limit; ++i) { 159 value[i] = matcher.group(i); 160 } 161 info.value = value; 162 } 163 } 164 165 /** 166 * Call find() on the pattern, returning additional information in the info field, 167 * if it is non-null 168 */ 169 @Override find(String item, Object context, Info info)170 public boolean find(String item, Object context, Info info) { 171 synchronized (matcher) { 172 try { 173 boolean result = matcher.reset(item).find(); 174 extractInfo(info, result); 175 return result; 176 } catch (StringIndexOutOfBoundsException e) { 177 // We don't know what causes this error (cldrbug 5051) so 178 // make the exception message more detailed. 179 throw new IllegalArgumentException("Matching error caused by pattern: [" 180 + matcher.toString() + "] on text: [" + item + "]", e); 181 } 182 } 183 } 184 185 @Override toString()186 public String toString() { 187 // Use pattern here, to avoid having to synchronize on matcher 188 return pattern.pattern(); 189 } 190 191 @Override equals(Object obj)192 public boolean equals(Object obj) { 193 if (obj == null) { 194 return false; 195 } else { 196 return toString().equals(obj.toString()); 197 } 198 } 199 200 @Override hashCode()201 public int hashCode() { 202 return toString().hashCode(); 203 } 204 205 @Override getFailPoint(String source)206 public int getFailPoint(String source) { 207 synchronized (matcher) { 208 return RegexUtilities.findMismatch(matcher, source); 209 } 210 } 211 } 212 213 private static interface StorageInterfaceBase<T> { entrySet()214 Set<Entry<Finder, T>> entrySet(); 215 get(Finder finder)216 T get(Finder finder); 217 get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)218 T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound); 219 getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)220 List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo); 221 put(Finder pattern, T value)222 void put(Finder pattern, T value); 223 size()224 int size(); 225 } 226 227 // private static class FinderWithInfo { 228 // Finder _finder; 229 // Info _info; 230 // public FinderWithInfo(Finder finder,Info info) { 231 // _info=info; 232 // _finder=finder; 233 // } 234 // } 235 private static class RegexTree<T> implements StorageInterfaceBase<T> { 236 private RTNode root; 237 private int _size; 238 private RTNodeRankComparator rankComparator = new RTNodeRankComparator(); 239 RegexTree()240 public RegexTree() { 241 root = new RTNode("", null); 242 _size = 0; 243 } 244 245 @Override size()246 public int size() { 247 return _size; 248 } 249 250 @Override put(Finder pattern, T value)251 public void put(Finder pattern, T value) { 252 root.put(new RTNode(pattern, value, _size)); 253 _size++; 254 } 255 256 @Override get(Finder finder)257 public T get(Finder finder) { 258 return root.get(finder); 259 } 260 261 @Override getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)262 public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) { 263 List<RTNode> list = new ArrayList<>(); 264 List<T> retList = new ArrayList<>(); 265 266 root.addToList(pattern, context, list); 267 Collections.sort(list, rankComparator); 268 269 boolean isFirst = true; 270 if (firstInfo != null && !list.isEmpty()) { 271 RTNode firstNode = list.get(0); 272 if (firstNode._info != null) { 273 firstInfo.value = firstNode._info.value; 274 } 275 } 276 277 for (RTNode n : list) { 278 if (isFirst) { 279 firstInfo.value = n._info.value; 280 isFirst = false; 281 } 282 } 283 284 for (RTNode n : list) { 285 retList.add(n._val); 286 if (matcherList != null) { 287 matcherList.add(n._finder); 288 } 289 } 290 291 return retList; 292 } 293 294 @Override get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)295 public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) { 296 List<Finder> matcherList = new ArrayList<>(); 297 Output<String[]> firstInfo = new Output<>(); 298 List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first 299 if (arguments != null) { 300 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 301 arguments.value = firstInfo.value; 302 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 303 arguments.value = firstInfo.value; 304 } 305 if (matcherFound != null) { 306 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 307 } 308 return (matches.size() > 0) ? matches.get(0) : null; 309 } 310 311 @Override entrySet()312 public Set<Entry<Finder, T>> entrySet() { 313 LinkedHashMap<Finder, T> ret = new LinkedHashMap<>(); 314 TreeSet<RTNode> s = new TreeSet<>(rankComparator); 315 root.addToEntrySet(s); 316 317 for (RTNode n : s) { 318 ret.put(n._finder, n._val); 319 } 320 321 return ret.entrySet(); 322 } 323 324 public class RTNode extends NodeBase<T> { 325 // Finder _finder; 326 // T _val; 327 List<RTNode> _children = new ArrayList<>(); 328 int _rank = -1; //rank -1 means the node was not inserted, but only used for structural purposes 329 330 //constructor for regular nodes with a Finder RTNode(Finder finder, T val, int rank)331 public RTNode(Finder finder, T val, int rank) { 332 super(finder, val); 333 // _finder = finder; 334 // _val = val; 335 _rank = rank; 336 } 337 338 //constructors for nodes without a Finder RTNode(String key, T val)339 public RTNode(String key, T val) { 340 super(new RegexFinder(key), val); 341 // _finder = new RegexFinder(key); 342 // _val = val; 343 // _rank = -1; 344 _info = new Info(); 345 } 346 put(RTNode node)347 public void put(RTNode node) { 348 if (_children.size() == 0) { 349 _children.add(node); 350 } else { 351 String maxSimilarChars = ""; //most similar characters up to the last similar directory 352 int insertIndex = 0; 353 for (int i = 0; i < _children.size(); i++) { 354 RTNode child = _children.get(i); 355 String childFinderPattern = child._finder.toString(); 356 if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') { 357 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$" 358 } else if (child._rank == -1) { 359 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*" 360 } 361 362 //check if child has the same Finder as node to insert, then replace it 363 if (node._finder.equals(child._finder)) { 364 child._finder = node._finder; 365 child._val = node._val; 366 if (child._rank == -1) { 367 child._rank = node._rank; 368 } else { 369 _size--; 370 } 371 return; 372 } 373 374 //check if child is the parent of node 375 if (child._rank == -1 && node._finder.toString().startsWith(childFinderPattern)) { 376 child.put(node); 377 return; 378 } 379 380 //if not parent then check if candidate for most similar RTNode 381 String gcp = greatestCommonPrefix(childFinderPattern, node._finder.toString()); 382 gcp = removeExtraChars(gcp); 383 if (gcp.length() > maxSimilarChars.length()) { 384 maxSimilarChars = gcp; 385 insertIndex = i; 386 } 387 } 388 389 String finderPattern = this._finder.toString(); 390 if (finderPattern.length() > 0 && finderPattern.charAt(finderPattern.length() - 1) == '$') { 391 finderPattern = finderPattern.substring(0, finderPattern.length() - 1); //takes into account the added "$" 392 } else if (!(finderPattern.equals("")) && this._rank == -1) { 393 finderPattern = finderPattern.substring(0, finderPattern.length() - 2); //takes into account the added ".*" 394 } 395 396 if ((maxSimilarChars).equals(finderPattern)) { //add under this if no similar children 397 _children.add(node); 398 } else { 399 //create the common parent of the chosen candidate above and node, then add to the insert index 400 RTNode newParent = new RTNode(maxSimilarChars + ".*", null); 401 newParent._children.add(this._children.get(insertIndex)); 402 newParent._children.add(node); 403 this._children.remove(insertIndex); 404 this._children.add(insertIndex, newParent); 405 } 406 } 407 } 408 409 //takes a string in a directory regex form and removes all characters up to the last full directory removeExtraChars(String input)410 private String removeExtraChars(String input) { 411 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/'))); 412 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) || 413 (ret.lastIndexOf('[') != -1 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) || 414 (ret.lastIndexOf('{') != -1 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) { 415 ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/'))); 416 } 417 return ret; 418 } 419 420 //traverse tree to get value get(Finder finder)421 public T get(Finder finder) { 422 T ret = null; //return value 423 424 if (_children.size() == 0) { 425 return null; 426 } else { 427 for (RTNode child : _children) { 428 429 //check if child is the node 430 if (child._rank != -1 && finder.equals(child._finder)) { 431 return child._val; 432 } 433 434 String childFinderPattern = child._finder.toString(); 435 436 if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') { 437 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$" 438 } else if (child._rank == -1) { 439 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*" 440 } 441 442 //check if child is the parent of node 443 if (finder.toString().startsWith(childFinderPattern)) { 444 ret = child.get(finder); 445 if (ret != null) { 446 break; 447 } 448 } 449 } 450 451 return ret; 452 } 453 } 454 455 //traverse tree to get an entry set addToEntrySet(TreeSet<RTNode> s)456 public void addToEntrySet(TreeSet<RTNode> s) { 457 if (_children.size() == 0) { 458 return; 459 } else { 460 for (RTNode child : _children) { 461 if (child._rank != -1) { 462 s.add(child); 463 } 464 child.addToEntrySet(s); 465 } 466 } 467 } 468 469 //traverse tree to get list of all values who's key matcher matches pattern addToList(String pattern, Object context, List<RTNode> list)470 public void addToList(String pattern, Object context, List<RTNode> list) { 471 if (_children.size() == 0) { 472 return; 473 } else { 474 Info firstInfo = new Info(); 475 for (RTNode child : _children) { 476 boolean found; 477 synchronized (child._finder) { 478 found = child._finder.find(pattern, context, firstInfo); 479 } 480 481 //check if child matches pattern 482 if (found) { 483 if (child._rank != -1) { 484 list.add(child); 485 } 486 // if this node's info value is unset, set it to the result of the 487 // lookup 488 // if (child._info!=null && child._info.value==null) { 489 if (child._info != null) { 490 // set the value to the result of the last find 491 child._info.value = firstInfo.value; 492 } else { 493 // for some reason, child._info is null, so simply initialize it. 494 child._info = new Info(); 495 // set the value to the result of the last find 496 child._info.value = firstInfo.value; 497 } 498 //check if child is the parent of node then enter that node 499 child.addToList(pattern, context, list); 500 } 501 } 502 } 503 } 504 505 @Override toString()506 public String toString() { 507 return toString("", new StringBuilder()).toString(); 508 } 509 toString(String prefix, StringBuilder result)510 private StringBuilder toString(String prefix, StringBuilder result) { 511 result.append(prefix).append(this._finder.toString()).append("\n"); 512 for (RegexTree<T>.RTNode child : _children) { 513 child.toString(prefix + "\t", result); 514 } 515 return result; 516 } 517 518 //greatest common prefix between two strings greatestCommonPrefix(String a, String b)519 public String greatestCommonPrefix(String a, String b) { 520 int minLength = Math.min(a.length(), b.length()); 521 for (int i = 0; i < minLength; i++) { 522 if (a.charAt(i) != b.charAt(i)) { 523 return a.substring(0, i); 524 } 525 } 526 return a.substring(0, minLength); 527 } 528 } 529 530 class RTNodeRankComparator implements Comparator<RTNode> { 531 @Override compare(RTNode a, RTNode b)532 public int compare(RTNode a, RTNode b) { 533 if (a == b) { 534 return 0; 535 } else if (a == null) { 536 return -1; 537 } else if (b == null) { 538 return 1; 539 } else if (a._rank == b._rank) { 540 return 0; 541 } else if (a._rank > b._rank) { 542 return 1; 543 } else { 544 return -1; 545 } 546 } 547 } 548 549 @Override toString()550 public String toString() { 551 return root.toString(); 552 } 553 } 554 555 private static class StarPatternMap<T> implements StorageInterfaceBase<T> { 556 private Map<String, List<SPNode>> _spmap; 557 private int _size = 0; 558 StarPatternMap()559 public StarPatternMap() { 560 _spmap = new HashMap<>(); 561 // _size = 0; 562 } 563 564 @Override size()565 public int size() { 566 return _size; 567 } 568 569 @Override put(Finder pattern, T value)570 public void put(Finder pattern, T value) { 571 //System.out.println("pattern.toString() is => "+pattern.toString()); 572 String starPattern = pathStarrer.transform2(pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*")); 573 //System.out.println("Putting => "+starPattern); 574 List<SPNode> candidates = _spmap.get(starPattern); 575 if (candidates == null) { 576 candidates = new ArrayList<>(); 577 } 578 SPNode newNode = new SPNode(pattern, value); 579 candidates.add(newNode); 580 _spmap.put(starPattern, candidates); 581 _size++; 582 } 583 584 @Override get(Finder finder)585 public T get(Finder finder) { 586 String starPattern = pathStarrer.transform2(finder.toString()); 587 List<SPNode> candidates = _spmap.get(starPattern); 588 if (candidates == null) { 589 return null; 590 } 591 for (SPNode cand : candidates) { 592 if (cand._finder.equals(finder)) { 593 return cand._val; 594 } 595 } 596 return null; 597 } 598 599 @Override getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)600 public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) { 601 List<SPNode> list = new ArrayList<>(); 602 List<T> retList = new ArrayList<>(); 603 604 String starPattern = pathStarrer.transform2(pattern); 605 List<SPNode> candidates = _spmap.get(starPattern); 606 if (candidates == null) { 607 return retList; 608 } 609 for (SPNode cand : candidates) { 610 Info info = new Info(); 611 if (cand._finder.find(pattern, context, info)) { 612 list.add(cand); 613 if (firstInfo != null) { 614 firstInfo.value = info.value; 615 } 616 } 617 } 618 619 for (SPNode n : list) { 620 retList.add(n._val); 621 if (matcherList != null) { 622 matcherList.add(n._finder); 623 } 624 } 625 626 return retList; 627 } 628 629 @Override get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)630 public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) { 631 List<Finder> matcherList = new ArrayList<>(); 632 Output<String[]> firstInfo = new Output<>(); 633 List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first 634 if (arguments != null && firstInfo.value != null) { 635 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 636 arguments.value = matcherList.isEmpty() ? null : firstInfo.value; 637 } 638 if (matcherFound != null) { 639 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 640 } 641 return (matches.size() > 0) ? matches.get(0) : null; 642 } 643 644 @Override entrySet()645 public Set<Entry<Finder, T>> entrySet() { 646 LinkedHashMap<Finder, T> ret = new LinkedHashMap<>(); 647 648 for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) { 649 List<SPNode> candidates = entry.getValue(); 650 for (SPNode node : candidates) { 651 ret.put(node._finder, node._val); 652 } 653 } 654 return ret.entrySet(); 655 } 656 657 /** 658 * A Node of a StarPatternMap 659 * @author ribnitz 660 * 661 */ 662 public class SPNode extends NodeBase<T> { 663 // Finder _finder; 664 // T _val; 665 SPNode(Finder finder, T val)666 public SPNode(Finder finder, T val) { 667 // _finder = finder; 668 // _val = val; 669 super(finder, val); 670 } 671 672 @Override toString()673 public String toString() { 674 return this._finder.toString(); 675 } 676 } 677 } 678 679 /** 680 * The basic class of an information node, featuring a Finder, a value and an Info 681 * 682 * @author ribnitz 683 * 684 * @param <T> 685 */ 686 private static class NodeBase<T> { 687 Finder _finder; 688 T _val; 689 Info _info = new Info(); 690 NodeBase(Finder finder, T value)691 public NodeBase(Finder finder, T value) { 692 this._finder = finder; 693 this._val = value; 694 } 695 } 696 697 public static Transform<String, RegexFinder> RegexFinderTransform = new Transform<String, RegexFinder>() { 698 @Override 699 public RegexFinder transform(String source) { 700 return new RegexFinder(source); 701 } 702 }; 703 704 /** 705 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //. To work better 706 * with XPaths. 707 */ 708 public static Transform<String, RegexFinder> RegexFinderTransformPath = new Transform<String, RegexFinder>() { 709 @Override 710 public RegexFinder transform(String source) { 711 final String newSource = source.replace("[@", "\\[@"); 712 return new RegexFinder(newSource.startsWith("//") 713 ? "^" + newSource 714 : newSource); 715 } 716 }; 717 718 /** 719 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //, and ' is changed to ". 720 * To work better with XPaths. 721 */ 722 public static Transform<String, RegexFinder> RegexFinderTransformPath2 = new Transform<String, RegexFinder>() { 723 @Override 724 public RegexFinder transform(String source) { 725 final String newSource = source.replace("[@", "\\[@").replace('\'', '"'); 726 return new RegexFinder(newSource.startsWith("//") 727 ? "^" + newSource 728 : newSource); 729 } 730 }; 731 732 /** 733 * Allows for merging items of the same type. 734 * 735 * @param <T> 736 */ 737 public interface Merger<T> { merge(T a, T into)738 T merge(T a, T into); 739 } 740 741 /** 742 * Returns the result of a regex lookup. 743 * 744 * @param source 745 * @return 746 */ get(String source)747 public final T get(String source) { 748 return get(source, null, null, null, null); 749 } 750 751 /** 752 * Returns the result of a regex lookup, with the group arguments that matched. 753 * 754 * @param source 755 * @param context 756 * TODO 757 * @return 758 */ get(String source, Object context, Output<String[]> arguments)759 public T get(String source, Object context, Output<String[]> arguments) { 760 return get(source, context, arguments, null, null); 761 } 762 763 /** 764 * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure cases for 765 * debugging. 766 * 767 * @param source 768 * @param context 769 * @return 770 */ get(String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)771 public T get(String source, Object context, Output<String[]> arguments, 772 Output<Finder> matcherFound, List<String> failures) { 773 774 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 775 // T ret = SPEntries.get(source, context, arguments, matcherFound); 776 T ret = storage.get(source, context, arguments, matcherFound); 777 if (ret != null) { 778 return ret; 779 } 780 781 if (failures != null) { 782 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 783 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 784 Finder matcher = entry.getKey(); 785 synchronized (matcher) { 786 int failPoint = matcher.getFailPoint(source); 787 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 788 + matcher.toString(); 789 failures.add(show); 790 } 791 } 792 } 793 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 794 // T ret = RTEntries.get(source, context, arguments, matcherFound); 795 T ret = storage.get(source, context, arguments, matcherFound); 796 if (ret != null) { 797 return ret; 798 } 799 800 if (failures != null) { 801 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 802 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 803 Finder matcher = entry.getKey(); 804 synchronized (matcher) { 805 int failPoint = matcher.getFailPoint(source); 806 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 807 + matcher.toString(); 808 failures.add(show); 809 } 810 } 811 } 812 } else { 813 //slow but versatile implementation 814 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 815 Finder matcher = entry.getKey(); 816 synchronized (matcher) { 817 Info firstInfo = new Info(); 818 if (matcher.find(source, context, firstInfo)) { 819 if (arguments != null) { 820 // arguments.value = matcher.getInfo(); 821 arguments.value = firstInfo.value; 822 } 823 if (matcherFound != null) { 824 matcherFound.value = matcher; 825 } 826 return entry.getValue(); 827 } else if (failures != null) { 828 int failPoint = matcher.getFailPoint(source); 829 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 830 + matcher.toString(); 831 failures.add(show); 832 } 833 } 834 } 835 } 836 837 // not really necessary, but makes debugging easier. 838 if (arguments != null) { 839 arguments.value = null; 840 } 841 if (matcherFound != null) { 842 matcherFound.value = null; 843 } 844 return null; 845 } 846 847 /** 848 * Returns all results of a regex lookup, with the group arguments that matched. Supplies failure cases for 849 * debugging. 850 * 851 * @param source 852 * @param context 853 * TODO 854 * @return 855 */ getAll(String source, Object context, List<Finder> matcherList, List<String> failures)856 public List<T> getAll(String source, Object context, List<Finder> matcherList, List<String> failures) { 857 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 858 Output<String[]> firstInfo = new Output<>(); 859 // List<T> matches = SPEntries.getAll(source, context, matcherList,firstInfo); 860 List<T> matches = storage.getAll(source, context, matcherList, firstInfo); 861 if (matches != null) { 862 return matches; 863 } 864 865 if (failures != null) { 866 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 867 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 868 Finder matcher = entry.getKey(); 869 synchronized (matcher) { 870 int failPoint = matcher.getFailPoint(source); 871 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 872 + matcher.toString(); 873 failures.add(show); 874 } 875 } 876 } 877 return null; 878 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 879 Output<String[]> info = new Output<>(); 880 // List<T> matches = RTEntries.getAll(source, context, matcherList,info); 881 List<T> matches = storage.getAll(source, context, matcherList, info); 882 if (matches != null) { 883 return matches; 884 } 885 886 if (failures != null) { 887 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 888 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 889 Finder matcher = entry.getKey(); 890 synchronized (matcher) { 891 int failPoint = matcher.getFailPoint(source); 892 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 893 + matcher.toString(); 894 failures.add(show); 895 } 896 } 897 } 898 return null; 899 } else { 900 //slow but versatile implementation 901 List<T> matches = new ArrayList<>(); 902 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 903 Finder matcher = entry.getKey(); 904 Info firstInfo = new Info(); 905 if (matcher.find(source, context, firstInfo)) { 906 if (matcherList != null) { 907 matcherList.add(matcher); 908 } 909 matches.add(entry.getValue()); 910 } else if (failures != null) { 911 int failPoint = matcher.getFailPoint(source); 912 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 913 + matcher.toString(); 914 failures.add(show); 915 } 916 } 917 return matches; 918 } 919 } 920 921 /** 922 * Find the patterns that haven't been matched. Requires the caller to collect the patterns that have, using 923 * matcherFound. 924 * 925 * @return outputUnmatched 926 */ getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched)927 public Map<String, T> getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched) { 928 outputUnmatched.clear(); 929 930 Set<Map.Entry<Finder, T>> entrySet; 931 switch (_lookupType) { 932 case STAR_PATTERN_LOOKUP: 933 // entrySet = SPEntries.entrySet(); 934 entrySet = storage.entrySet(); 935 break; 936 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 937 // entrySet = RTEntries.entrySet(); 938 entrySet = storage.entrySet(); 939 break; 940 default: 941 entrySet = MEntries.entrySet(); 942 break; 943 } 944 945 for (Map.Entry<Finder, T> entry : entrySet) { 946 String pattern = entry.getKey().toString(); 947 if (!matched.contains(pattern)) { 948 outputUnmatched.put(pattern, entry.getValue()); 949 } 950 } 951 return outputUnmatched; 952 } 953 954 /** 955 * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex pattern and the value is 956 * what gets returned. 957 * 958 * @param patternTransform 959 * Used to transform string patterns into a Pattern. Can be used to process replacements (like 960 * variables). 961 * @param valueTransform 962 * Used to transform string values into another form. 963 * @param valueMerger 964 * Used to merge values with the same key. 965 */ of(Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)966 public static <T, U> RegexLookup<T> of(Transform<String, Finder> patternTransform, 967 Transform<String, T> valueTransform, Merger<T> valueMerger) { 968 return new RegexLookup<T>().setPatternTransform(patternTransform).setValueTransform(valueTransform) 969 .setValueMerger(valueMerger); 970 } 971 of(Transform<String, T> valueTransform)972 public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) { 973 return new RegexLookup<T>().setValueTransform(valueTransform).setPatternTransform(RegexFinderTransform); 974 } 975 of()976 public static <T> RegexLookup<T> of() { 977 return new RegexLookup<T>().setPatternTransform(RegexFinderTransform); 978 } 979 980 /** 981 * @deprecated Use {@link #of(LookupType,Transform)} instead 982 */ 983 @Deprecated of(LookupType lookupType)984 public static <T> RegexLookup<T> of(LookupType lookupType) { 985 return of(lookupType, RegexFinderTransform); 986 } 987 of(LookupType lookupType, Transform<String, RegexFinder> transform)988 public static <T> RegexLookup<T> of(LookupType lookupType, Transform<String, RegexFinder> transform) { 989 return new RegexLookup<T>(lookupType).setPatternTransform(transform); 990 } 991 setValueTransform(Transform<String, ? extends T> valueTransform)992 public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) { 993 this.valueTransform = valueTransform; 994 return this; 995 } 996 setPatternTransform(Transform<String, ? extends Finder> patternTransform)997 public RegexLookup<T> setPatternTransform(Transform<String, ? extends Finder> patternTransform) { 998 this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform; 999 return this; 1000 } 1001 setValueMerger(Merger<T> valueMerger)1002 public RegexLookup<T> setValueMerger(Merger<T> valueMerger) { 1003 this.valueMerger = valueMerger; 1004 return this; 1005 } 1006 1007 /** 1008 * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated by "; ". Lines 1009 * starting with # are comments. 1010 */ loadFromFile(Class<?> baseClass, String filename)1011 public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) { 1012 RegexFileParser parser = setupRegexFileParser(); 1013 parser.parse(baseClass, filename); 1014 return this; 1015 } 1016 1017 /** 1018 * Load a RegexLookup from a list of strings, as if they came from a file. See loadFromFile 1019 */ loadFromString(String source)1020 public RegexLookup<T> loadFromString(String source) { 1021 RegexFileParser parser = setupRegexFileParser(); 1022 parser.parseStrings("string", Splitter.on('\n').split(source)); 1023 return this; 1024 } 1025 setupRegexFileParser()1026 private RegexFileParser setupRegexFileParser() { 1027 RegexFileParser parser = new RegexFileParser(); 1028 parser.setLineParser(new RegexLineParser() { 1029 @Override 1030 public void parse(String line) { 1031 int pos = line.indexOf(RegexLookup.SEPARATOR); 1032 if (pos < 0) { 1033 throw new IllegalArgumentException("Illegal line, doesn't contain semicolon: " + line); 1034 } 1035 String source = line.substring(0, pos).trim(); 1036 String target = line.substring(pos + 2).trim(); 1037 try { 1038 @SuppressWarnings("unchecked") 1039 T result = valueTransform == null ? (T) target : valueTransform.transform(target); 1040 add(source, result); 1041 } catch (Exception e) { 1042 throw new IllegalArgumentException("Failed to add <" + source + "> => <" + target + ">", e); 1043 } 1044 } 1045 }); 1046 parser.setVariableProcessor(new VariableProcessor() { 1047 @Override 1048 public void add(String variable, String variableName) { 1049 addVariable(variable, variableName); 1050 } 1051 1052 @Override 1053 public String replace(String str) { 1054 return variables.replace(str); 1055 } 1056 }); 1057 return parser; 1058 } 1059 addVariable(String variable, String variableValue)1060 public RegexLookup<T> addVariable(String variable, String variableValue) { 1061 if (!variable.startsWith("%")) { 1062 throw new IllegalArgumentException("Variables must start with %"); 1063 } 1064 variables.add(variable.trim(), variableValue.trim()); 1065 return this; 1066 } 1067 1068 /** 1069 * Add a pattern/value pair. 1070 * 1071 * @param stringPattern 1072 * @param target 1073 * @return this, for chaining 1074 */ add(String stringPattern, T target)1075 public RegexLookup<T> add(String stringPattern, T target) { 1076 if (stringPattern.contains("%")) { 1077 stringPattern = variables.replace(stringPattern); 1078 } 1079 Finder pattern0 = patternTransform.transform(stringPattern); 1080 return add(pattern0, target); 1081 } 1082 1083 /** 1084 * Add a pattern/value pair. 1085 * 1086 * @param pattern 1087 * @param target 1088 * @return this, for chaining 1089 */ add(Finder pattern, T target)1090 public RegexLookup<T> add(Finder pattern, T target) { 1091 if (!allowNull && target == null) { 1092 throw new NullPointerException("null disallowed, unless allowNull(true) is called."); 1093 } 1094 1095 T old; 1096 switch (_lookupType) { 1097 case STAR_PATTERN_LOOKUP: // fallthrough 1098 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1099 old = storage.get(pattern); 1100 // old = SPEntries.get(pattern); 1101 break; 1102 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1103 // old = storage.get(pattern); 1104 // old = RTEntries.get(pattern); 1105 // break; 1106 default: 1107 old = MEntries.get(pattern); 1108 break; 1109 } 1110 1111 if (old == null) { 1112 switch (_lookupType) { 1113 case STAR_PATTERN_LOOKUP: // fallthrough 1114 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1115 storage.put(pattern, target); 1116 // SPEntries.put(pattern, target); 1117 break; 1118 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1119 // storage.put(pattern, target); 1120 // RTEntries.put(pattern, target); 1121 // break; 1122 default: 1123 MEntries.put(pattern, target); 1124 break; 1125 } 1126 } else if (valueMerger != null) { 1127 valueMerger.merge(target, old); 1128 } else { 1129 throw new IllegalArgumentException("Duplicate matcher without Merger defined " + pattern + "; old: " + old 1130 + "; new: " + target); 1131 } 1132 return this; 1133 } 1134 1135 @Override iterator()1136 public Iterator<Map.Entry<Finder, T>> iterator() { 1137 switch (_lookupType) { 1138 case STAR_PATTERN_LOOKUP: // fall through 1139 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1140 // return Collections.unmodifiableCollection(SPEntries.entrySet()).iterator(); 1141 return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1142 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1143 // return Collections.unmodifiableCollection(RTEntries.entrySet()).iterator(); 1144 // return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1145 default: 1146 return Collections.unmodifiableCollection(MEntries.entrySet()).iterator(); 1147 } 1148 } 1149 replace(String lookup, String... arguments)1150 public static String replace(String lookup, String... arguments) { 1151 StringBuilder result = new StringBuilder(); 1152 int last = 0; 1153 while (true) { 1154 int pos = lookup.indexOf("$", last); 1155 if (pos < 0) { 1156 result.append(lookup.substring(last, lookup.length())); 1157 break; 1158 } 1159 result.append(lookup.substring(last, pos)); 1160 final int arg = lookup.charAt(pos + 1) - '0'; 1161 try { 1162 result.append(arguments[arg]); 1163 } catch (Exception e) { 1164 throw new IllegalArgumentException("Replacing $" + arg + " in <" + lookup 1165 + ">, but too few arguments supplied."); 1166 } 1167 last = pos + 2; 1168 } 1169 return result.toString(); 1170 } 1171 1172 /** 1173 * @return the number of entries 1174 */ size()1175 public int size() { 1176 switch (_lookupType) { 1177 case STAR_PATTERN_LOOKUP: // fall through 1178 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1179 // return SPEntries.size(); 1180 return storage.size(); 1181 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1182 // return storage.size(); 1183 // return RTEntries.size(); 1184 default: 1185 return MEntries.size(); 1186 } 1187 } 1188 1189 @Override toString()1190 public String toString() { 1191 return storage.toString(); 1192 } 1193 }