1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.Comparator; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.LinkedHashMap; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Map.Entry; 12 import java.util.Set; 13 import java.util.TreeSet; 14 import java.util.regex.Matcher; 15 import java.util.regex.Pattern; 16 17 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 18 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 19 import org.unicode.cldr.util.RegexFileParser.VariableProcessor; 20 import org.unicode.cldr.util.RegexLookup.Finder; 21 import org.unicode.cldr.util.RegexLookup.Finder.Info; 22 23 import com.google.common.base.Splitter; 24 import com.ibm.icu.text.Transform; 25 import com.ibm.icu.util.Output; 26 27 /** 28 * Lookup items according to a set of regex patterns. Returns the value according to the first pattern that matches. Not 29 * thread-safe. 30 * 31 * @param <T> 32 */ 33 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> { 34 private VariableReplacer variables = new VariableReplacer(); 35 private StorageInterfaceBase<T> storage; 36 // private StarPatternMap<T> SPEntries; 37 // private RegexTree<T> RTEntries; 38 private Map<Finder, T> MEntries; 39 private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform; 40 private Transform<String, ? extends T> valueTransform; 41 private Merger<T> valueMerger; 42 private final boolean allowNull = false; 43 private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 44 45 public enum LookupType { 46 STAR_PATTERN_LOOKUP, OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, STANDARD 47 } 48 49 private LookupType _lookupType; 50 51 /* 52 * STAR_PATTERN_LOOKUP 53 * 54 * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below. 55 * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur 56 * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression, 57 * thus greatly reducing the number of actual regex matches that need to occur. 58 */ 59 60 /* 61 * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP 62 * 63 * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored. 64 * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/' 65 * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"] 66 * 67 * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored. 68 * However regex's no longer need to follow any specific format (Slower but more versatile). 69 */ 70 RegexLookup(LookupType type)71 public RegexLookup(LookupType type) { 72 _lookupType = type; 73 switch (type) { 74 case STAR_PATTERN_LOOKUP: 75 // SPEntries = new StarPatternMap<T>(); 76 storage = new StarPatternMap<>(); 77 break; 78 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 79 // RTEntries = new RegexTree<T>(); 80 storage = new RegexTree<>(); 81 break; 82 default: 83 MEntries = new LinkedHashMap<>(); 84 break; 85 } 86 } 87 RegexLookup()88 public RegexLookup() { 89 this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP); 90 // _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP; 91 // RTEntries = new RegexTree<T>(); 92 } 93 94 public abstract static class Finder { 95 public static class Info { 96 public String[] value; 97 } 98 99 // abstract public String[] getInfo(); 100 101 // abstract public boolean find(String item, Object context); 102 find(String item, Object context, Info info)103 abstract public boolean find(String item, Object context, Info info); 104 matches(String item, Object context, Info info)105 abstract public boolean matches(String item, Object context, Info info); 106 getFailPoint(String source)107 public int getFailPoint(String source) { 108 return -1; 109 } 110 // must also define toString 111 } 112 113 public static class RegexFinder extends Finder { 114 /** 115 * The matcher used by this RegexFinder 116 */ 117 private final Matcher matcher; 118 119 /** 120 * The Pattern used by this RegexFinder 121 */ 122 protected final Pattern pattern; 123 RegexFinder(String pattern)124 public RegexFinder(String pattern) { 125 this.pattern = Pattern.compile(pattern, Pattern.COMMENTS); 126 matcher = this.pattern.matcher(""); 127 } 128 129 /** 130 * Call Matches on the pattern, returning additional information in the Info field, 131 * if it is non null 132 */ 133 @Override matches(String item, Object context, Info info)134 public boolean matches(String item, Object context, Info info) { 135 synchronized (matcher) { 136 try { 137 boolean result = matcher.reset(item).matches(); 138 extractInfo(info, result); 139 return result; 140 } catch (StringIndexOutOfBoundsException e) { 141 // We don't know what causes this error (cldrbug 5051) so 142 // make the exception message more detailed. 143 throw new IllegalArgumentException("Matching error caused by pattern: [" 144 + matcher.toString() + "] on text: [" + item + "]", e); 145 } 146 } 147 } 148 149 /** 150 * Extract match related information into the info field, if result is true, and info 151 * is not null. 152 * @param info 153 * @param result 154 */ extractInfo(Info info, boolean result)155 private void extractInfo(Info info, boolean result) { 156 if (result && info != null) { 157 int limit = matcher.groupCount() + 1; 158 String[] value = new String[limit]; 159 for (int i = 0; i < limit; ++i) { 160 value[i] = matcher.group(i); 161 } 162 info.value = value; 163 } 164 } 165 166 /** 167 * Call find() on the pattern, returning additional information in the info field, 168 * if it is non-null 169 */ 170 @Override find(String item, Object context, Info info)171 public boolean find(String item, Object context, Info info) { 172 synchronized (matcher) { 173 try { 174 boolean result = matcher.reset(item).find(); 175 extractInfo(info, result); 176 return result; 177 } catch (StringIndexOutOfBoundsException e) { 178 // We don't know what causes this error (cldrbug 5051) so 179 // make the exception message more detailed. 180 throw new IllegalArgumentException("Matching error caused by pattern: [" 181 + matcher.toString() + "] on text: [" + item + "]", e); 182 } 183 } 184 } 185 186 @Override toString()187 public String toString() { 188 // Use pattern here, to avoid having to synchronize on matcher 189 return pattern.pattern(); 190 } 191 192 @Override equals(Object obj)193 public boolean equals(Object obj) { 194 if (obj == null) { 195 return false; 196 } else { 197 return toString().equals(obj.toString()); 198 } 199 } 200 201 @Override hashCode()202 public int hashCode() { 203 return toString().hashCode(); 204 } 205 206 @Override getFailPoint(String source)207 public int getFailPoint(String source) { 208 synchronized (matcher) { 209 return RegexUtilities.findMismatch(matcher, source); 210 } 211 } 212 } 213 214 private static interface StorageInterfaceBase<T> { entrySet()215 Set<Entry<Finder, T>> entrySet(); 216 get(Finder finder)217 T get(Finder finder); 218 get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)219 T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound); 220 getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)221 List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo); 222 put(Finder pattern, T value)223 void put(Finder pattern, T value); 224 size()225 int size(); 226 } 227 228 // private static class FinderWithInfo { 229 // Finder _finder; 230 // Info _info; 231 // public FinderWithInfo(Finder finder,Info info) { 232 // _info=info; 233 // _finder=finder; 234 // } 235 // } 236 private static class RegexTree<T> implements StorageInterfaceBase<T> { 237 private RTNode root; 238 private int _size; 239 private RTNodeRankComparator rankComparator = new RTNodeRankComparator(); 240 RegexTree()241 public RegexTree() { 242 root = new RTNode("", null); 243 _size = 0; 244 } 245 246 @Override size()247 public int size() { 248 return _size; 249 } 250 251 @Override put(Finder pattern, T value)252 public void put(Finder pattern, T value) { 253 root.put(new RTNode(pattern, value, _size)); 254 _size++; 255 } 256 257 @Override get(Finder finder)258 public T get(Finder finder) { 259 return root.get(finder); 260 } 261 262 @Override getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)263 public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) { 264 List<RTNode> list = new ArrayList<>(); 265 List<T> retList = new ArrayList<>(); 266 267 root.addToList(pattern, context, list); 268 Collections.sort(list, rankComparator); 269 270 boolean isFirst = true; 271 if (firstInfo != null && !list.isEmpty()) { 272 RTNode firstNode = list.get(0); 273 if (firstNode._info != null) { 274 firstInfo.value = firstNode._info.value; 275 } 276 } 277 278 for (RTNode n : list) { 279 if (isFirst) { 280 firstInfo.value = n._info.value; 281 isFirst = false; 282 } 283 } 284 285 for (RTNode n : list) { 286 retList.add(n._val); 287 if (matcherList != null) { 288 matcherList.add(n._finder); 289 } 290 } 291 292 return retList; 293 } 294 295 @Override get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)296 public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) { 297 List<Finder> matcherList = new ArrayList<>(); 298 Output<String[]> firstInfo = new Output<>(); 299 List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first 300 if (arguments != null) { 301 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 302 arguments.value = firstInfo.value; 303 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 304 arguments.value = firstInfo.value; 305 } 306 if (matcherFound != null) { 307 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 308 } 309 return (matches.size() > 0) ? matches.get(0) : null; 310 } 311 312 @Override entrySet()313 public Set<Entry<Finder, T>> entrySet() { 314 LinkedHashMap<Finder, T> ret = new LinkedHashMap<>(); 315 TreeSet<RTNode> s = new TreeSet<>(rankComparator); 316 root.addToEntrySet(s); 317 318 for (RTNode n : s) { 319 ret.put(n._finder, n._val); 320 } 321 322 return ret.entrySet(); 323 } 324 325 public class RTNode extends NodeBase<T> { 326 // Finder _finder; 327 // T _val; 328 List<RTNode> _children = new ArrayList<>(); 329 int _rank = -1; //rank -1 means the node was not inserted, but only used for structural purposes 330 331 //constructor for regular nodes with a Finder RTNode(Finder finder, T val, int rank)332 public RTNode(Finder finder, T val, int rank) { 333 super(finder, val); 334 // _finder = finder; 335 // _val = val; 336 _rank = rank; 337 } 338 339 //constructors for nodes without a Finder RTNode(String key, T val)340 public RTNode(String key, T val) { 341 super(new RegexFinder(key), val); 342 // _finder = new RegexFinder(key); 343 // _val = val; 344 // _rank = -1; 345 _info = new Info(); 346 } 347 put(RTNode node)348 public void put(RTNode node) { 349 if (_children.size() == 0) { 350 _children.add(node); 351 } else { 352 String maxSimilarChars = ""; //most similar characters up to the last similar directory 353 int insertIndex = 0; 354 for (int i = 0; i < _children.size(); i++) { 355 RTNode child = _children.get(i); 356 String childFinderPattern = child._finder.toString(); 357 if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') { 358 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$" 359 } else if (child._rank == -1) { 360 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*" 361 } 362 363 //check if child has the same Finder as node to insert, then replace it 364 if (node._finder.equals(child._finder)) { 365 child._finder = node._finder; 366 child._val = node._val; 367 if (child._rank == -1) { 368 child._rank = node._rank; 369 } else { 370 _size--; 371 } 372 return; 373 } 374 375 //check if child is the parent of node 376 if (child._rank == -1 && node._finder.toString().startsWith(childFinderPattern)) { 377 child.put(node); 378 return; 379 } 380 381 //if not parent then check if candidate for most similar RTNode 382 String gcp = greatestCommonPrefix(childFinderPattern, node._finder.toString()); 383 gcp = removeExtraChars(gcp); 384 if (gcp.length() > maxSimilarChars.length()) { 385 maxSimilarChars = gcp; 386 insertIndex = i; 387 } 388 } 389 390 String finderPattern = this._finder.toString(); 391 if (finderPattern.length() > 0 && finderPattern.charAt(finderPattern.length() - 1) == '$') { 392 finderPattern = finderPattern.substring(0, finderPattern.length() - 1); //takes into account the added "$" 393 } else if (!(finderPattern.equals("")) && this._rank == -1) { 394 finderPattern = finderPattern.substring(0, finderPattern.length() - 2); //takes into account the added ".*" 395 } 396 397 if ((maxSimilarChars).equals(finderPattern)) { //add under this if no similar children 398 _children.add(node); 399 } else { 400 //create the common parent of the chosen candidate above and node, then add to the insert index 401 RTNode newParent = new RTNode(maxSimilarChars + ".*", null); 402 newParent._children.add(this._children.get(insertIndex)); 403 newParent._children.add(node); 404 this._children.remove(insertIndex); 405 this._children.add(insertIndex, newParent); 406 } 407 } 408 } 409 410 //takes a string in a directory regex form and removes all characters up to the last full directory removeExtraChars(String input)411 private String removeExtraChars(String input) { 412 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/'))); 413 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) || 414 (ret.lastIndexOf('[') != -1 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) || 415 (ret.lastIndexOf('{') != -1 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) { 416 ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/'))); 417 } 418 return ret; 419 } 420 421 //traverse tree to get value get(Finder finder)422 public T get(Finder finder) { 423 T ret = null; //return value 424 425 if (_children.size() == 0) { 426 return null; 427 } else { 428 for (RTNode child : _children) { 429 430 //check if child is the node 431 if (child._rank != -1 && finder.equals(child._finder)) { 432 return child._val; 433 } 434 435 String childFinderPattern = child._finder.toString(); 436 437 if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') { 438 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$" 439 } else if (child._rank == -1) { 440 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*" 441 } 442 443 //check if child is the parent of node 444 if (finder.toString().startsWith(childFinderPattern)) { 445 ret = child.get(finder); 446 if (ret != null) { 447 break; 448 } 449 } 450 } 451 452 return ret; 453 } 454 } 455 456 //traverse tree to get an entry set addToEntrySet(TreeSet<RTNode> s)457 public void addToEntrySet(TreeSet<RTNode> s) { 458 if (_children.size() == 0) { 459 return; 460 } else { 461 for (RTNode child : _children) { 462 if (child._rank != -1) { 463 s.add(child); 464 } 465 child.addToEntrySet(s); 466 } 467 } 468 } 469 470 //traverse tree to get list of all values who's key matcher matches pattern addToList(String pattern, Object context, List<RTNode> list)471 public void addToList(String pattern, Object context, List<RTNode> list) { 472 if (_children.size() == 0) { 473 return; 474 } else { 475 Info firstInfo = new Info(); 476 for (RTNode child : _children) { 477 boolean found; 478 synchronized (child._finder) { 479 found = child._finder.find(pattern, context, firstInfo); 480 } 481 482 //check if child matches pattern 483 if (found) { 484 if (child._rank != -1) { 485 list.add(child); 486 } 487 // if this node's info value is unset, set it to the result of the 488 // lookup 489 // if (child._info!=null && child._info.value==null) { 490 if (child._info != null) { 491 // set the value to the result of the last find 492 child._info.value = firstInfo.value; 493 } else { 494 // for some reason, child._info is null, so simply initialize it. 495 child._info = new Info(); 496 // set the value to the result of the last find 497 child._info.value = firstInfo.value; 498 } 499 //check if child is the parent of node then enter that node 500 child.addToList(pattern, context, list); 501 } 502 } 503 } 504 } 505 506 @Override toString()507 public String toString() { 508 return toString("", new StringBuilder()).toString(); 509 } 510 toString(String prefix, StringBuilder result)511 private StringBuilder toString(String prefix, StringBuilder result) { 512 result.append(prefix).append(this._finder.toString()).append("\n"); 513 for (RegexTree<T>.RTNode child : _children) { 514 child.toString(prefix + "\t", result); 515 } 516 return result; 517 } 518 519 //greatest common prefix between two strings greatestCommonPrefix(String a, String b)520 public String greatestCommonPrefix(String a, String b) { 521 int minLength = Math.min(a.length(), b.length()); 522 for (int i = 0; i < minLength; i++) { 523 if (a.charAt(i) != b.charAt(i)) { 524 return a.substring(0, i); 525 } 526 } 527 return a.substring(0, minLength); 528 } 529 } 530 531 class RTNodeRankComparator implements Comparator<RTNode> { 532 @Override compare(RTNode a, RTNode b)533 public int compare(RTNode a, RTNode b) { 534 if (a == b) { 535 return 0; 536 } else if (a == null) { 537 return -1; 538 } else if (b == null) { 539 return 1; 540 } else if (a._rank == b._rank) { 541 return 0; 542 } else if (a._rank > b._rank) { 543 return 1; 544 } else { 545 return -1; 546 } 547 } 548 } 549 550 @Override toString()551 public String toString() { 552 return root.toString(); 553 } 554 } 555 556 private static class StarPatternMap<T> implements StorageInterfaceBase<T> { 557 private Map<String, List<SPNode>> _spmap; 558 private int _size = 0; 559 StarPatternMap()560 public StarPatternMap() { 561 _spmap = new HashMap<>(); 562 // _size = 0; 563 } 564 565 @Override size()566 public int size() { 567 return _size; 568 } 569 570 @Override put(Finder pattern, T value)571 public void put(Finder pattern, T value) { 572 //System.out.println("pattern.toString() is => "+pattern.toString()); 573 String starPattern = pathStarrer.transform2(pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*")); 574 //System.out.println("Putting => "+starPattern); 575 List<SPNode> candidates = _spmap.get(starPattern); 576 if (candidates == null) { 577 candidates = new ArrayList<>(); 578 } 579 SPNode newNode = new SPNode(pattern, value); 580 candidates.add(newNode); 581 _spmap.put(starPattern, candidates); 582 _size++; 583 } 584 585 @Override get(Finder finder)586 public T get(Finder finder) { 587 String starPattern = pathStarrer.transform2(finder.toString()); 588 List<SPNode> candidates = _spmap.get(starPattern); 589 if (candidates == null) { 590 return null; 591 } 592 for (SPNode cand : candidates) { 593 if (cand._finder.equals(finder)) { 594 return cand._val; 595 } 596 } 597 return null; 598 } 599 600 @Override getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)601 public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) { 602 List<SPNode> list = new ArrayList<>(); 603 List<T> retList = new ArrayList<>(); 604 605 String starPattern = pathStarrer.transform2(pattern); 606 List<SPNode> candidates = _spmap.get(starPattern); 607 if (candidates == null) { 608 return retList; 609 } 610 for (SPNode cand : candidates) { 611 Info info = new Info(); 612 if (cand._finder.find(pattern, context, info)) { 613 list.add(cand); 614 if (firstInfo != null) { 615 firstInfo.value = info.value; 616 } 617 } 618 } 619 620 for (SPNode n : list) { 621 retList.add(n._val); 622 if (matcherList != null) { 623 matcherList.add(n._finder); 624 } 625 } 626 627 return retList; 628 } 629 630 @Override get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)631 public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) { 632 List<Finder> matcherList = new ArrayList<>(); 633 Output<String[]> firstInfo = new Output<>(); 634 List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first 635 if (arguments != null && firstInfo.value != null) { 636 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 637 arguments.value = matcherList.isEmpty() ? null : firstInfo.value; 638 } 639 if (matcherFound != null) { 640 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 641 } 642 return (matches.size() > 0) ? matches.get(0) : null; 643 } 644 645 @Override entrySet()646 public Set<Entry<Finder, T>> entrySet() { 647 LinkedHashMap<Finder, T> ret = new LinkedHashMap<>(); 648 649 for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) { 650 List<SPNode> candidates = entry.getValue(); 651 for (SPNode node : candidates) { 652 ret.put(node._finder, node._val); 653 } 654 } 655 return ret.entrySet(); 656 } 657 658 /** 659 * A Node of a StarPatternMap 660 * @author ribnitz 661 * 662 */ 663 public class SPNode extends NodeBase<T> { 664 // Finder _finder; 665 // T _val; 666 SPNode(Finder finder, T val)667 public SPNode(Finder finder, T val) { 668 // _finder = finder; 669 // _val = val; 670 super(finder, val); 671 } 672 673 @Override toString()674 public String toString() { 675 return this._finder.toString(); 676 } 677 } 678 } 679 680 /** 681 * The basic class of an information node, featuring a Finder, a value and an Info 682 * 683 * @author ribnitz 684 * 685 * @param <T> 686 */ 687 private static class NodeBase<T> { 688 Finder _finder; 689 T _val; 690 Info _info = new Info(); 691 NodeBase(Finder finder, T value)692 public NodeBase(Finder finder, T value) { 693 this._finder = finder; 694 this._val = value; 695 } 696 } 697 698 public static Transform<String, RegexFinder> RegexFinderTransform = new Transform<String, RegexFinder>() { 699 @Override 700 public RegexFinder transform(String source) { 701 return new RegexFinder(source); 702 } 703 }; 704 705 /** 706 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //. To work better 707 * with XPaths. 708 */ 709 public static Transform<String, RegexFinder> RegexFinderTransformPath = new Transform<String, RegexFinder>() { 710 @Override 711 public RegexFinder transform(String source) { 712 final String newSource = source.replace("[@", "\\[@"); 713 return new RegexFinder(newSource.startsWith("//") 714 ? "^" + newSource 715 : newSource); 716 } 717 }; 718 719 /** 720 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //, and ' is changed to ". 721 * To work better with XPaths. 722 */ 723 public static Transform<String, RegexFinder> RegexFinderTransformPath2 = new Transform<String, RegexFinder>() { 724 @Override 725 public RegexFinder transform(String source) { 726 final String newSource = source.replace("[@", "\\[@").replace('\'', '"'); 727 return new RegexFinder(newSource.startsWith("//") 728 ? "^" + newSource 729 : newSource); 730 } 731 }; 732 733 /** 734 * Allows for merging items of the same type. 735 * 736 * @param <T> 737 */ 738 public interface Merger<T> { merge(T a, T into)739 T merge(T a, T into); 740 } 741 742 /** 743 * Returns the result of a regex lookup. 744 * 745 * @param source 746 * @return 747 */ get(String source)748 public final T get(String source) { 749 return get(source, null, null, null, null); 750 } 751 752 /** 753 * Returns the result of a regex lookup, with the group arguments that matched. 754 * 755 * @param source 756 * @param context 757 * TODO 758 * @return 759 */ get(String source, Object context, Output<String[]> arguments)760 public T get(String source, Object context, Output<String[]> arguments) { 761 return get(source, context, arguments, null, null); 762 } 763 764 /** 765 * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure cases for 766 * debugging. 767 * 768 * @param source 769 * @param context 770 * @return 771 */ get(String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)772 public T get(String source, Object context, Output<String[]> arguments, 773 Output<Finder> matcherFound, List<String> failures) { 774 775 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 776 // T ret = SPEntries.get(source, context, arguments, matcherFound); 777 T ret = storage.get(source, context, arguments, matcherFound); 778 if (ret != null) { 779 return ret; 780 } 781 782 if (failures != null) { 783 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 784 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 785 Finder matcher = entry.getKey(); 786 synchronized (matcher) { 787 int failPoint = matcher.getFailPoint(source); 788 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 789 + matcher.toString(); 790 failures.add(show); 791 } 792 } 793 } 794 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 795 // T ret = RTEntries.get(source, context, arguments, matcherFound); 796 T ret = storage.get(source, context, arguments, matcherFound); 797 if (ret != null) { 798 return ret; 799 } 800 801 if (failures != null) { 802 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 803 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 804 Finder matcher = entry.getKey(); 805 synchronized (matcher) { 806 int failPoint = matcher.getFailPoint(source); 807 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 808 + matcher.toString(); 809 failures.add(show); 810 } 811 } 812 } 813 } else { 814 //slow but versatile implementation 815 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 816 Finder matcher = entry.getKey(); 817 synchronized (matcher) { 818 Info firstInfo = new Info(); 819 if (matcher.find(source, context, firstInfo)) { 820 if (arguments != null) { 821 // arguments.value = matcher.getInfo(); 822 arguments.value = firstInfo.value; 823 } 824 if (matcherFound != null) { 825 matcherFound.value = matcher; 826 } 827 return entry.getValue(); 828 } else if (failures != null) { 829 int failPoint = matcher.getFailPoint(source); 830 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 831 + matcher.toString(); 832 failures.add(show); 833 } 834 } 835 } 836 } 837 838 // not really necessary, but makes debugging easier. 839 if (arguments != null) { 840 arguments.value = null; 841 } 842 if (matcherFound != null) { 843 matcherFound.value = null; 844 } 845 return null; 846 } 847 848 /** 849 * Returns all results of a regex lookup, with the group arguments that matched. Supplies failure cases for 850 * debugging. 851 * 852 * @param source 853 * @param context 854 * TODO 855 * @return 856 */ getAll(String source, Object context, List<Finder> matcherList, List<String> failures)857 public List<T> getAll(String source, Object context, List<Finder> matcherList, List<String> failures) { 858 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 859 Output<String[]> firstInfo = new Output<>(); 860 // List<T> matches = SPEntries.getAll(source, context, matcherList,firstInfo); 861 List<T> matches = storage.getAll(source, context, matcherList, firstInfo); 862 if (matches != null) { 863 return matches; 864 } 865 866 if (failures != null) { 867 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 868 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 869 Finder matcher = entry.getKey(); 870 synchronized (matcher) { 871 int failPoint = matcher.getFailPoint(source); 872 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 873 + matcher.toString(); 874 failures.add(show); 875 } 876 } 877 } 878 return null; 879 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 880 Output<String[]> info = new Output<>(); 881 // List<T> matches = RTEntries.getAll(source, context, matcherList,info); 882 List<T> matches = storage.getAll(source, context, matcherList, info); 883 if (matches != null) { 884 return matches; 885 } 886 887 if (failures != null) { 888 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 889 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 890 Finder matcher = entry.getKey(); 891 synchronized (matcher) { 892 int failPoint = matcher.getFailPoint(source); 893 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 894 + matcher.toString(); 895 failures.add(show); 896 } 897 } 898 } 899 return null; 900 } else { 901 //slow but versatile implementation 902 List<T> matches = new ArrayList<>(); 903 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 904 Finder matcher = entry.getKey(); 905 Info firstInfo = new Info(); 906 if (matcher.find(source, context, firstInfo)) { 907 if (matcherList != null) { 908 matcherList.add(matcher); 909 } 910 matches.add(entry.getValue()); 911 } else if (failures != null) { 912 int failPoint = matcher.getFailPoint(source); 913 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 914 + matcher.toString(); 915 failures.add(show); 916 } 917 } 918 return matches; 919 } 920 } 921 922 /** 923 * Find the patterns that haven't been matched. Requires the caller to collect the patterns that have, using 924 * matcherFound. 925 * 926 * @return outputUnmatched 927 */ getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched)928 public Map<String, T> getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched) { 929 outputUnmatched.clear(); 930 931 Set<Map.Entry<Finder, T>> entrySet; 932 switch (_lookupType) { 933 case STAR_PATTERN_LOOKUP: 934 // entrySet = SPEntries.entrySet(); 935 entrySet = storage.entrySet(); 936 break; 937 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 938 // entrySet = RTEntries.entrySet(); 939 entrySet = storage.entrySet(); 940 break; 941 default: 942 entrySet = MEntries.entrySet(); 943 break; 944 } 945 946 for (Map.Entry<Finder, T> entry : entrySet) { 947 String pattern = entry.getKey().toString(); 948 if (!matched.contains(pattern)) { 949 outputUnmatched.put(pattern, entry.getValue()); 950 } 951 } 952 return outputUnmatched; 953 } 954 955 /** 956 * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex pattern and the value is 957 * what gets returned. 958 * 959 * @param patternTransform 960 * Used to transform string patterns into a Pattern. Can be used to process replacements (like 961 * variables). 962 * @param valueTransform 963 * Used to transform string values into another form. 964 * @param valueMerger 965 * Used to merge values with the same key. 966 */ of(Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)967 public static <T, U> RegexLookup<T> of(Transform<String, Finder> patternTransform, 968 Transform<String, T> valueTransform, Merger<T> valueMerger) { 969 return new RegexLookup<T>().setPatternTransform(patternTransform).setValueTransform(valueTransform) 970 .setValueMerger(valueMerger); 971 } 972 of(Transform<String, T> valueTransform)973 public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) { 974 return new RegexLookup<T>().setValueTransform(valueTransform).setPatternTransform(RegexFinderTransform); 975 } 976 of()977 public static <T> RegexLookup<T> of() { 978 return new RegexLookup<T>().setPatternTransform(RegexFinderTransform); 979 } 980 981 /** 982 * @deprecated Use {@link #of(LookupType,Transform)} instead 983 */ 984 @Deprecated of(LookupType lookupType)985 public static <T> RegexLookup<T> of(LookupType lookupType) { 986 return of(lookupType, RegexFinderTransform); 987 } 988 of(LookupType lookupType, Transform<String, RegexFinder> transform)989 public static <T> RegexLookup<T> of(LookupType lookupType, Transform<String, RegexFinder> transform) { 990 return new RegexLookup<T>(lookupType).setPatternTransform(transform); 991 } 992 setValueTransform(Transform<String, ? extends T> valueTransform)993 public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) { 994 this.valueTransform = valueTransform; 995 return this; 996 } 997 setPatternTransform(Transform<String, ? extends Finder> patternTransform)998 public RegexLookup<T> setPatternTransform(Transform<String, ? extends Finder> patternTransform) { 999 this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform; 1000 return this; 1001 } 1002 setValueMerger(Merger<T> valueMerger)1003 public RegexLookup<T> setValueMerger(Merger<T> valueMerger) { 1004 this.valueMerger = valueMerger; 1005 return this; 1006 } 1007 1008 /** 1009 * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated by "; ". Lines 1010 * starting with # are comments. 1011 */ loadFromFile(Class<?> baseClass, String filename)1012 public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) { 1013 RegexFileParser parser = setupRegexFileParser(); 1014 parser.parse(baseClass, filename); 1015 return this; 1016 } 1017 1018 /** 1019 * Load a RegexLookup from a list of strings, as if they came from a file. See loadFromFile 1020 */ loadFromString(String source)1021 public RegexLookup<T> loadFromString(String source) { 1022 RegexFileParser parser = setupRegexFileParser(); 1023 parser.parseStrings("string", Splitter.on('\n').split(source)); 1024 return this; 1025 } 1026 setupRegexFileParser()1027 private RegexFileParser setupRegexFileParser() { 1028 RegexFileParser parser = new RegexFileParser(); 1029 parser.setLineParser(new RegexLineParser() { 1030 @Override 1031 public void parse(String line) { 1032 int pos = line.indexOf("; "); 1033 if (pos < 0) { 1034 throw new IllegalArgumentException("Illegal line, doesn't contain semicolon: " + line); 1035 } 1036 String source = line.substring(0, pos).trim(); 1037 String target = line.substring(pos + 2).trim(); 1038 try { 1039 @SuppressWarnings("unchecked") 1040 T result = valueTransform == null ? (T) target : valueTransform.transform(target); 1041 add(source, result); 1042 } catch (Exception e) { 1043 throw new IllegalArgumentException("Failed to add <" + source + "> => <" + target + ">", e); 1044 } 1045 } 1046 }); 1047 parser.setVariableProcessor(new VariableProcessor() { 1048 @Override 1049 public void add(String variable, String variableName) { 1050 addVariable(variable, variableName); 1051 } 1052 1053 @Override 1054 public String replace(String str) { 1055 return variables.replace(str); 1056 } 1057 }); 1058 return parser; 1059 } 1060 addVariable(String variable, String variableValue)1061 public RegexLookup<T> addVariable(String variable, String variableValue) { 1062 if (!variable.startsWith("%")) { 1063 throw new IllegalArgumentException("Variables must start with %"); 1064 } 1065 variables.add(variable.trim(), variableValue.trim()); 1066 return this; 1067 } 1068 1069 /** 1070 * Add a pattern/value pair. 1071 * 1072 * @param stringPattern 1073 * @param target 1074 * @return this, for chaining 1075 */ add(String stringPattern, T target)1076 public RegexLookup<T> add(String stringPattern, T target) { 1077 if (stringPattern.contains("%")) { 1078 stringPattern = variables.replace(stringPattern); 1079 } 1080 Finder pattern0 = patternTransform.transform(stringPattern); 1081 return add(pattern0, target); 1082 } 1083 1084 /** 1085 * Add a pattern/value pair. 1086 * 1087 * @param pattern 1088 * @param target 1089 * @return this, for chaining 1090 */ add(Finder pattern, T target)1091 public RegexLookup<T> add(Finder pattern, T target) { 1092 if (!allowNull && target == null) { 1093 throw new NullPointerException("null disallowed, unless allowNull(true) is called."); 1094 } 1095 1096 T old; 1097 switch (_lookupType) { 1098 case STAR_PATTERN_LOOKUP: // fallthrough 1099 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1100 old = storage.get(pattern); 1101 // old = SPEntries.get(pattern); 1102 break; 1103 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1104 // old = storage.get(pattern); 1105 // old = RTEntries.get(pattern); 1106 // break; 1107 default: 1108 old = MEntries.get(pattern); 1109 break; 1110 } 1111 1112 if (old == null) { 1113 switch (_lookupType) { 1114 case STAR_PATTERN_LOOKUP: // fallthrough 1115 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1116 storage.put(pattern, target); 1117 // SPEntries.put(pattern, target); 1118 break; 1119 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1120 // storage.put(pattern, target); 1121 // RTEntries.put(pattern, target); 1122 // break; 1123 default: 1124 MEntries.put(pattern, target); 1125 break; 1126 } 1127 } else if (valueMerger != null) { 1128 valueMerger.merge(target, old); 1129 } else { 1130 throw new IllegalArgumentException("Duplicate matcher without Merger defined " + pattern + "; old: " + old 1131 + "; new: " + target); 1132 } 1133 return this; 1134 } 1135 1136 @Override iterator()1137 public Iterator<Map.Entry<Finder, T>> iterator() { 1138 switch (_lookupType) { 1139 case STAR_PATTERN_LOOKUP: // fall through 1140 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1141 // return Collections.unmodifiableCollection(SPEntries.entrySet()).iterator(); 1142 return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1143 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1144 // return Collections.unmodifiableCollection(RTEntries.entrySet()).iterator(); 1145 // return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1146 default: 1147 return Collections.unmodifiableCollection(MEntries.entrySet()).iterator(); 1148 } 1149 } 1150 replace(String lookup, String... arguments)1151 public static String replace(String lookup, String... arguments) { 1152 StringBuilder result = new StringBuilder(); 1153 int last = 0; 1154 while (true) { 1155 int pos = lookup.indexOf("$", last); 1156 if (pos < 0) { 1157 result.append(lookup.substring(last, lookup.length())); 1158 break; 1159 } 1160 result.append(lookup.substring(last, pos)); 1161 final int arg = lookup.charAt(pos + 1) - '0'; 1162 try { 1163 result.append(arguments[arg]); 1164 } catch (Exception e) { 1165 throw new IllegalArgumentException("Replacing $" + arg + " in <" + lookup 1166 + ">, but too few arguments supplied."); 1167 } 1168 last = pos + 2; 1169 } 1170 return result.toString(); 1171 } 1172 1173 /** 1174 * @return the number of entries 1175 */ size()1176 public int size() { 1177 switch (_lookupType) { 1178 case STAR_PATTERN_LOOKUP: // fall through 1179 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1180 // return SPEntries.size(); 1181 return storage.size(); 1182 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1183 // return storage.size(); 1184 // return RTEntries.size(); 1185 default: 1186 return MEntries.size(); 1187 } 1188 } 1189 1190 @Override toString()1191 public String toString() { 1192 return storage.toString(); 1193 } 1194 }