1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.Comparator; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.LinkedHashMap; 9 import java.util.List; 10 import java.util.Map; 11 import java.util.Map.Entry; 12 import java.util.Set; 13 import java.util.TreeSet; 14 import java.util.regex.Matcher; 15 import java.util.regex.Pattern; 16 17 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 18 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 19 import org.unicode.cldr.util.RegexFileParser.VariableProcessor; 20 import org.unicode.cldr.util.RegexLookup.Finder; 21 import org.unicode.cldr.util.RegexLookup.Finder.Info; 22 23 import com.ibm.icu.text.Transform; 24 import com.ibm.icu.util.Output; 25 26 /** 27 * Lookup items according to a set of regex patterns. Returns the value according to the first pattern that matches. Not 28 * thread-safe. 29 * 30 * @param <T> 31 */ 32 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> { 33 private VariableReplacer variables = new VariableReplacer(); 34 private StorageInterfaceBase<T> storage; 35 // private StarPatternMap<T> SPEntries; 36 // private RegexTree<T> RTEntries; 37 private Map<Finder, T> MEntries; 38 private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform; 39 private Transform<String, ? extends T> valueTransform; 40 private Merger<T> valueMerger; 41 private final boolean allowNull = false; 42 private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 43 44 public enum LookupType { 45 STAR_PATTERN_LOOKUP, OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, STANDARD 46 }; 47 48 private LookupType _lookupType; 49 50 /* 51 * STAR_PATTERN_LOOKUP 52 * 53 * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below. 54 * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur 55 * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression, 56 * thus greatly reducing the number of actual regex matches that need to occur. 57 */ 58 59 /* 60 * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP 61 * 62 * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored. 63 * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/' 64 * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"] 65 * 66 * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored. 67 * However regex's no longer need to follow any specific format (Slower but more versatile). 68 */ 69 RegexLookup(LookupType type)70 public RegexLookup(LookupType type) { 71 _lookupType = type; 72 switch (type) { 73 case STAR_PATTERN_LOOKUP: 74 // SPEntries = new StarPatternMap<T>(); 75 storage = new StarPatternMap<T>(); 76 break; 77 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 78 // RTEntries = new RegexTree<T>(); 79 storage = new RegexTree<T>(); 80 break; 81 default: 82 MEntries = new LinkedHashMap<Finder, T>(); 83 break; 84 } 85 } 86 RegexLookup()87 public RegexLookup() { 88 this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP); 89 // _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP; 90 // RTEntries = new RegexTree<T>(); 91 } 92 93 public abstract static class Finder { 94 public static class Info { 95 public String[] value; 96 } 97 98 // abstract public String[] getInfo(); 99 100 // abstract public boolean find(String item, Object context); 101 find(String item, Object context, Info info)102 abstract public boolean find(String item, Object context, Info info); 103 matches(String item, Object context, Info info)104 abstract public boolean matches(String item, Object context, Info info); 105 getFailPoint(String source)106 public int getFailPoint(String source) { 107 return -1; 108 } 109 // must also define toString 110 } 111 112 public static class RegexFinder extends Finder { 113 /** 114 * The matcher used by this RegexFinder 115 */ 116 private final Matcher matcher; 117 118 /** 119 * The Pattern used by this RegexFinder 120 */ 121 protected final Pattern pattern; 122 RegexFinder(String pattern)123 public RegexFinder(String pattern) { 124 this.pattern = Pattern.compile(pattern, Pattern.COMMENTS); 125 matcher = this.pattern.matcher(""); 126 } 127 128 /** 129 * Call Matches on the pattern, returning additional information in the Info field, 130 * if it is non null 131 */ matches(String item, Object context, Info info)132 public boolean matches(String item, Object context, Info info) { 133 synchronized (matcher) { 134 try { 135 boolean result = matcher.reset(item).matches(); 136 extractInfo(info, result); 137 return result; 138 } catch (StringIndexOutOfBoundsException e) { 139 // We don't know what causes this error (cldrbug 5051) so 140 // make the exception message more detailed. 141 throw new IllegalArgumentException("Matching error caused by pattern: [" 142 + matcher.toString() + "] on text: [" + item + "]", e); 143 } 144 } 145 } 146 147 /** 148 * Extract match related information into the info field, if result is true, and info 149 * is not null. 150 * @param info 151 * @param result 152 */ extractInfo(Info info, boolean result)153 private void extractInfo(Info info, boolean result) { 154 if (result && info != null) { 155 int limit = matcher.groupCount() + 1; 156 String[] value = new String[limit]; 157 for (int i = 0; i < limit; ++i) { 158 value[i] = matcher.group(i); 159 } 160 info.value = value; 161 } 162 } 163 164 /** 165 * Call find() on the pattern, returning additional information in the info field, 166 * if it is non-null 167 */ find(String item, Object context, Info info)168 public boolean find(String item, Object context, Info info) { 169 synchronized (matcher) { 170 try { 171 boolean result = matcher.reset(item).find(); 172 extractInfo(info, result); 173 return result; 174 } catch (StringIndexOutOfBoundsException e) { 175 // We don't know what causes this error (cldrbug 5051) so 176 // make the exception message more detailed. 177 throw new IllegalArgumentException("Matching error caused by pattern: [" 178 + matcher.toString() + "] on text: [" + item + "]", e); 179 } 180 } 181 } 182 toString()183 public String toString() { 184 // Use pattern here, to avoid having to synchronize on matcher 185 return pattern.pattern(); 186 } 187 188 @Override equals(Object obj)189 public boolean equals(Object obj) { 190 if (obj == null) { 191 return false; 192 } else { 193 return toString().equals(obj.toString()); 194 } 195 } 196 197 @Override hashCode()198 public int hashCode() { 199 return toString().hashCode(); 200 } 201 202 @Override getFailPoint(String source)203 public int getFailPoint(String source) { 204 synchronized (matcher) { 205 return RegexUtilities.findMismatch(matcher, source); 206 } 207 } 208 } 209 210 private static interface StorageInterfaceBase<T> { entrySet()211 Set<Entry<Finder, T>> entrySet(); 212 get(Finder finder)213 T get(Finder finder); 214 get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)215 T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound); 216 getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)217 List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo); 218 put(Finder pattern, T value)219 void put(Finder pattern, T value); 220 size()221 int size(); 222 } 223 224 // private static class FinderWithInfo { 225 // Finder _finder; 226 // Info _info; 227 // public FinderWithInfo(Finder finder,Info info) { 228 // _info=info; 229 // _finder=finder; 230 // } 231 // } 232 private static class RegexTree<T> implements StorageInterfaceBase<T> { 233 private RTNode root; 234 private int _size; 235 private RTNodeRankComparator rankComparator = new RTNodeRankComparator(); 236 RegexTree()237 public RegexTree() { 238 root = new RTNode("", null); 239 _size = 0; 240 } 241 242 @Override size()243 public int size() { 244 return _size; 245 } 246 247 @Override put(Finder pattern, T value)248 public void put(Finder pattern, T value) { 249 root.put(new RTNode(pattern, value, _size)); 250 _size++; 251 } 252 253 @Override get(Finder finder)254 public T get(Finder finder) { 255 return root.get(finder); 256 } 257 258 @Override getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)259 public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) { 260 List<RTNode> list = new ArrayList<RTNode>(); 261 List<T> retList = new ArrayList<T>(); 262 263 root.addToList(pattern, context, list); 264 Collections.sort(list, rankComparator); 265 266 boolean isFirst = true; 267 if (firstInfo != null && !list.isEmpty()) { 268 RTNode firstNode = list.get(0); 269 if (firstNode._info != null) { 270 firstInfo.value = firstNode._info.value; 271 } 272 } 273 274 for (RTNode n : list) { 275 if (isFirst) { 276 firstInfo.value = n._info.value; 277 isFirst = false; 278 } 279 } 280 281 for (RTNode n : list) { 282 retList.add(n._val); 283 if (matcherList != null) { 284 matcherList.add(n._finder); 285 } 286 } 287 288 return retList; 289 } 290 get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)291 public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) { 292 List<Finder> matcherList = new ArrayList<Finder>(); 293 Output<String[]> firstInfo = new Output<>(); 294 List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first 295 if (arguments != null) { 296 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 297 arguments.value = firstInfo.value; 298 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 299 arguments.value = firstInfo.value; 300 } 301 if (matcherFound != null) { 302 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 303 } 304 return (matches.size() > 0) ? matches.get(0) : null; 305 } 306 entrySet()307 public Set<Entry<Finder, T>> entrySet() { 308 LinkedHashMap<Finder, T> ret = new LinkedHashMap<Finder, T>(); 309 TreeSet<RTNode> s = new TreeSet<RTNode>(rankComparator); 310 root.addToEntrySet(s); 311 312 for (RTNode n : s) { 313 ret.put(n._finder, n._val); 314 } 315 316 return ret.entrySet(); 317 } 318 319 public class RTNode extends NodeBase<T> { 320 // Finder _finder; 321 // T _val; 322 List<RTNode> _children = new ArrayList<RTNode>(); 323 int _rank = -1; //rank -1 means the node was not inserted, but only used for structural purposes 324 325 //constructor for regular nodes with a Finder RTNode(Finder finder, T val, int rank)326 public RTNode(Finder finder, T val, int rank) { 327 super(finder, val); 328 // _finder = finder; 329 // _val = val; 330 _rank = rank; 331 } 332 333 //constructors for nodes without a Finder RTNode(String key, T val)334 public RTNode(String key, T val) { 335 super(new RegexFinder(key), val); 336 // _finder = new RegexFinder(key); 337 // _val = val; 338 // _rank = -1; 339 _info = new Info(); 340 } 341 put(RTNode node)342 public void put(RTNode node) { 343 if (_children.size() == 0) { 344 _children.add(node); 345 } else { 346 String maxSimilarChars = ""; //most similar characters up to the last similar directory 347 int insertIndex = 0; 348 for (int i = 0; i < _children.size(); i++) { 349 RTNode child = _children.get(i); 350 String childFinderPattern = child._finder.toString(); 351 if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') { 352 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$" 353 } else if (child._rank == -1) { 354 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*" 355 } 356 357 //check if child has the same Finder as node to insert, then replace it 358 if (node._finder.equals(child._finder)) { 359 child._finder = node._finder; 360 child._val = node._val; 361 if (child._rank == -1) { 362 child._rank = node._rank; 363 } else { 364 _size--; 365 } 366 return; 367 } 368 369 //check if child is the parent of node 370 if (child._rank == -1 && node._finder.toString().startsWith(childFinderPattern)) { 371 child.put(node); 372 return; 373 } 374 375 //if not parent then check if candidate for most similar RTNode 376 String gcp = greatestCommonPrefix(childFinderPattern, node._finder.toString()); 377 gcp = removeExtraChars(gcp); 378 if (gcp.length() > maxSimilarChars.length()) { 379 maxSimilarChars = gcp; 380 insertIndex = i; 381 } 382 } 383 384 String finderPattern = this._finder.toString(); 385 if (finderPattern.length() > 0 && finderPattern.charAt(finderPattern.length() - 1) == '$') { 386 finderPattern = finderPattern.substring(0, finderPattern.length() - 1); //takes into account the added "$" 387 } else if (!(finderPattern.equals("")) && this._rank == -1) { 388 finderPattern = finderPattern.substring(0, finderPattern.length() - 2); //takes into account the added ".*" 389 } 390 391 if ((maxSimilarChars).equals(finderPattern)) { //add under this if no similar children 392 _children.add(node); 393 } else { 394 //create the common parent of the chosen candidate above and node, then add to the insert index 395 RTNode newParent = new RTNode(maxSimilarChars + ".*", null); 396 newParent._children.add(this._children.get(insertIndex)); 397 newParent._children.add(node); 398 this._children.remove(insertIndex); 399 this._children.add(insertIndex, newParent); 400 } 401 } 402 } 403 404 //takes a string in a directory regex form and removes all characters up to the last full directory removeExtraChars(String input)405 private String removeExtraChars(String input) { 406 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/'))); 407 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) || 408 (ret.lastIndexOf('[') != -1 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) || 409 (ret.lastIndexOf('{') != -1 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) { 410 ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/'))); 411 } 412 return ret; 413 } 414 415 //traverse tree to get value get(Finder finder)416 public T get(Finder finder) { 417 T ret = null; //return value 418 419 if (_children.size() == 0) { 420 return null; 421 } else { 422 for (RTNode child : _children) { 423 424 //check if child is the node 425 if (child._rank != -1 && finder.equals(child._finder)) { 426 return child._val; 427 } 428 429 String childFinderPattern = child._finder.toString(); 430 431 if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') { 432 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$" 433 } else if (child._rank == -1) { 434 childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*" 435 } 436 437 //check if child is the parent of node 438 if (finder.toString().startsWith(childFinderPattern)) { 439 ret = child.get(finder); 440 if (ret != null) { 441 break; 442 } 443 } 444 } 445 446 return ret; 447 } 448 } 449 450 //traverse tree to get an entry set addToEntrySet(TreeSet<RTNode> s)451 public void addToEntrySet(TreeSet<RTNode> s) { 452 if (_children.size() == 0) { 453 return; 454 } else { 455 for (RTNode child : _children) { 456 if (child._rank != -1) { 457 s.add(child); 458 } 459 child.addToEntrySet(s); 460 } 461 } 462 } 463 464 //traverse tree to get list of all values who's key matcher matches pattern addToList(String pattern, Object context, List<RTNode> list)465 public void addToList(String pattern, Object context, List<RTNode> list) { 466 if (_children.size() == 0) { 467 return; 468 } else { 469 Info firstInfo = new Info(); 470 for (RTNode child : _children) { 471 boolean found; 472 synchronized (child._finder) { 473 found = child._finder.find(pattern, context, firstInfo); 474 } 475 476 //check if child matches pattern 477 if (found) { 478 if (child._rank != -1) { 479 list.add(child); 480 } 481 // if this node's info value is unset, set it to the result of the 482 // lookup 483 // if (child._info!=null && child._info.value==null) { 484 if (child._info != null) { 485 // set the value to the result of the last find 486 child._info.value = firstInfo.value; 487 } else { 488 // for some reason, child._info is null, so simply initialize it. 489 child._info = new Info(); 490 // set the value to the result of the last find 491 child._info.value = firstInfo.value; 492 } 493 //check if child is the parent of node then enter that node 494 child.addToList(pattern, context, list); 495 } 496 } 497 } 498 } 499 toString()500 public String toString() { 501 return toString("", new StringBuilder()).toString(); 502 } 503 toString(String prefix, StringBuilder result)504 private StringBuilder toString(String prefix, StringBuilder result) { 505 result.append(prefix).append(this._finder.toString()).append("\n"); 506 for (RegexTree<T>.RTNode child : _children) { 507 child.toString(prefix + "\t", result); 508 } 509 return result; 510 } 511 512 //greatest common prefix between two strings greatestCommonPrefix(String a, String b)513 public String greatestCommonPrefix(String a, String b) { 514 int minLength = Math.min(a.length(), b.length()); 515 for (int i = 0; i < minLength; i++) { 516 if (a.charAt(i) != b.charAt(i)) { 517 return a.substring(0, i); 518 } 519 } 520 return a.substring(0, minLength); 521 } 522 } 523 524 class RTNodeRankComparator implements Comparator<RTNode> { compare(RTNode a, RTNode b)525 public int compare(RTNode a, RTNode b) { 526 if (a == b) { 527 return 0; 528 } else if (a == null) { 529 return -1; 530 } else if (b == null) { 531 return 1; 532 } else if (a._rank == b._rank) { 533 return 0; 534 } else if (a._rank > b._rank) { 535 return 1; 536 } else { 537 return -1; 538 } 539 } 540 } 541 toString()542 public String toString() { 543 return root.toString(); 544 } 545 } 546 547 private static class StarPatternMap<T> implements StorageInterfaceBase<T> { 548 private Map<String, List<SPNode>> _spmap; 549 private int _size = 0; 550 StarPatternMap()551 public StarPatternMap() { 552 _spmap = new HashMap<String, List<SPNode>>(); 553 // _size = 0; 554 } 555 size()556 public int size() { 557 return _size; 558 } 559 put(Finder pattern, T value)560 public void put(Finder pattern, T value) { 561 //System.out.println("pattern.toString() is => "+pattern.toString()); 562 String starPattern = pathStarrer.transform2(pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*")); 563 //System.out.println("Putting => "+starPattern); 564 List<SPNode> candidates = _spmap.get(starPattern); 565 if (candidates == null) { 566 candidates = new ArrayList<SPNode>(); 567 } 568 SPNode newNode = new SPNode(pattern, value); 569 candidates.add(newNode); 570 _spmap.put(starPattern, candidates); 571 _size++; 572 } 573 get(Finder finder)574 public T get(Finder finder) { 575 String starPattern = pathStarrer.transform2(finder.toString()); 576 List<SPNode> candidates = _spmap.get(starPattern); 577 if (candidates == null) { 578 return null; 579 } 580 for (SPNode cand : candidates) { 581 if (cand._finder.equals(finder)) { 582 return cand._val; 583 } 584 } 585 return null; 586 } 587 getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)588 public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) { 589 List<SPNode> list = new ArrayList<SPNode>(); 590 List<T> retList = new ArrayList<T>(); 591 592 String starPattern = pathStarrer.transform2(pattern); 593 List<SPNode> candidates = _spmap.get(starPattern); 594 if (candidates == null) { 595 return retList; 596 } 597 for (SPNode cand : candidates) { 598 Info info = new Info(); 599 if (cand._finder.find(pattern, context, info)) { 600 list.add(cand); 601 if (firstInfo != null) { 602 firstInfo.value = info.value; 603 } 604 } 605 } 606 607 for (SPNode n : list) { 608 retList.add(n._val); 609 if (matcherList != null) { 610 matcherList.add(n._finder); 611 } 612 } 613 614 return retList; 615 } 616 get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)617 public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) { 618 List<Finder> matcherList = new ArrayList<Finder>(); 619 Output<String[]> firstInfo = new Output<>(); 620 List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first 621 if (arguments != null && firstInfo.value != null) { 622 // arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null; 623 arguments.value = matcherList.isEmpty() ? null : firstInfo.value; 624 } 625 if (matcherFound != null) { 626 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 627 } 628 return (matches.size() > 0) ? matches.get(0) : null; 629 } 630 entrySet()631 public Set<Entry<Finder, T>> entrySet() { 632 LinkedHashMap<Finder, T> ret = new LinkedHashMap<Finder, T>(); 633 634 for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) { 635 List<SPNode> candidates = entry.getValue(); 636 for (SPNode node : candidates) { 637 ret.put(node._finder, node._val); 638 } 639 } 640 return ret.entrySet(); 641 } 642 643 /** 644 * A Node of a StarPatternMap 645 * @author ribnitz 646 * 647 */ 648 public class SPNode extends NodeBase<T> { 649 // Finder _finder; 650 // T _val; 651 SPNode(Finder finder, T val)652 public SPNode(Finder finder, T val) { 653 // _finder = finder; 654 // _val = val; 655 super(finder, val); 656 } 657 toString()658 public String toString() { 659 return this._finder.toString(); 660 } 661 } 662 } 663 664 /** 665 * The basic class of an information node, featuring a Finder, a value and an Info 666 * 667 * @author ribnitz 668 * 669 * @param <T> 670 */ 671 private static class NodeBase<T> { 672 Finder _finder; 673 T _val; 674 Info _info = new Info(); 675 NodeBase(Finder finder, T value)676 public NodeBase(Finder finder, T value) { 677 this._finder = finder; 678 this._val = value; 679 } 680 } 681 682 public static Transform<String, RegexFinder> RegexFinderTransform = new Transform<String, RegexFinder>() { 683 public RegexFinder transform(String source) { 684 return new RegexFinder(source); 685 } 686 }; 687 688 /** 689 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //. To work better 690 * with XPaths. 691 */ 692 public static Transform<String, RegexFinder> RegexFinderTransformPath = new Transform<String, RegexFinder>() { 693 public RegexFinder transform(String source) { 694 final String newSource = source.replace("[@", "\\[@"); 695 return new RegexFinder(newSource.startsWith("//") 696 ? "^" + newSource 697 : newSource); 698 } 699 }; 700 701 /** 702 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //, and ' is changed to ". 703 * To work better with XPaths. 704 */ 705 public static Transform<String, RegexFinder> RegexFinderTransformPath2 = new Transform<String, RegexFinder>() { 706 public RegexFinder transform(String source) { 707 final String newSource = source.replace("[@", "\\[@").replace('\'', '"'); 708 return new RegexFinder(newSource.startsWith("//") 709 ? "^" + newSource 710 : newSource); 711 } 712 }; 713 714 /** 715 * Allows for merging items of the same type. 716 * 717 * @param <T> 718 */ 719 public interface Merger<T> { merge(T a, T into)720 T merge(T a, T into); 721 } 722 723 /** 724 * Returns the result of a regex lookup. 725 * 726 * @param source 727 * @return 728 */ get(String source)729 public final T get(String source) { 730 return get(source, null, null, null, null); 731 } 732 733 /** 734 * Returns the result of a regex lookup, with the group arguments that matched. 735 * 736 * @param source 737 * @param context 738 * TODO 739 * @return 740 */ get(String source, Object context, Output<String[]> arguments)741 public T get(String source, Object context, Output<String[]> arguments) { 742 return get(source, context, arguments, null, null); 743 } 744 745 /** 746 * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure cases for 747 * debugging. 748 * 749 * @param source 750 * @param context 751 * @return 752 */ get(String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)753 public T get(String source, Object context, Output<String[]> arguments, 754 Output<Finder> matcherFound, List<String> failures) { 755 756 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 757 // T ret = SPEntries.get(source, context, arguments, matcherFound); 758 T ret = storage.get(source, context, arguments, matcherFound); 759 if (ret != null) { 760 return ret; 761 } 762 763 if (failures != null) { 764 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 765 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 766 Finder matcher = entry.getKey(); 767 synchronized (matcher) { 768 int failPoint = matcher.getFailPoint(source); 769 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 770 + matcher.toString(); 771 failures.add(show); 772 } 773 } 774 } 775 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 776 // T ret = RTEntries.get(source, context, arguments, matcherFound); 777 T ret = storage.get(source, context, arguments, matcherFound); 778 if (ret != null) { 779 return ret; 780 } 781 782 if (failures != null) { 783 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 784 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 785 Finder matcher = entry.getKey(); 786 synchronized (matcher) { 787 int failPoint = matcher.getFailPoint(source); 788 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 789 + matcher.toString(); 790 failures.add(show); 791 } 792 } 793 } 794 } else { 795 //slow but versatile implementation 796 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 797 Finder matcher = entry.getKey(); 798 synchronized (matcher) { 799 Info firstInfo = new Info(); 800 if (matcher.find(source, context, firstInfo)) { 801 if (arguments != null) { 802 // arguments.value = matcher.getInfo(); 803 arguments.value = firstInfo.value; 804 } 805 if (matcherFound != null) { 806 matcherFound.value = matcher; 807 } 808 return entry.getValue(); 809 } else if (failures != null) { 810 int failPoint = matcher.getFailPoint(source); 811 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 812 + matcher.toString(); 813 failures.add(show); 814 } 815 } 816 } 817 } 818 819 // not really necessary, but makes debugging easier. 820 if (arguments != null) { 821 arguments.value = null; 822 } 823 if (matcherFound != null) { 824 matcherFound.value = null; 825 } 826 return null; 827 } 828 829 /** 830 * Returns all results of a regex lookup, with the group arguments that matched. Supplies failure cases for 831 * debugging. 832 * 833 * @param source 834 * @param context 835 * TODO 836 * @return 837 */ getAll(String source, Object context, List<Finder> matcherList, List<String> failures)838 public List<T> getAll(String source, Object context, List<Finder> matcherList, List<String> failures) { 839 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 840 Output<String[]> firstInfo = new Output<>(); 841 // List<T> matches = SPEntries.getAll(source, context, matcherList,firstInfo); 842 List<T> matches = storage.getAll(source, context, matcherList, firstInfo); 843 if (matches != null) { 844 return matches; 845 } 846 847 if (failures != null) { 848 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 849 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 850 Finder matcher = entry.getKey(); 851 synchronized (matcher) { 852 int failPoint = matcher.getFailPoint(source); 853 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 854 + matcher.toString(); 855 failures.add(show); 856 } 857 } 858 } 859 return null; 860 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 861 Output<String[]> info = new Output<>(); 862 // List<T> matches = RTEntries.getAll(source, context, matcherList,info); 863 List<T> matches = storage.getAll(source, context, matcherList, info); 864 if (matches != null) { 865 return matches; 866 } 867 868 if (failures != null) { 869 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 870 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 871 Finder matcher = entry.getKey(); 872 synchronized (matcher) { 873 int failPoint = matcher.getFailPoint(source); 874 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 875 + matcher.toString(); 876 failures.add(show); 877 } 878 } 879 } 880 return null; 881 } else { 882 //slow but versatile implementation 883 List<T> matches = new ArrayList<T>(); 884 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 885 Finder matcher = entry.getKey(); 886 Info firstInfo = new Info(); 887 if (matcher.find(source, context, firstInfo)) { 888 if (matcherList != null) { 889 matcherList.add(matcher); 890 } 891 matches.add(entry.getValue()); 892 } else if (failures != null) { 893 int failPoint = matcher.getFailPoint(source); 894 String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t" 895 + matcher.toString(); 896 failures.add(show); 897 } 898 } 899 return matches; 900 } 901 } 902 903 /** 904 * Find the patterns that haven't been matched. Requires the caller to collect the patterns that have, using 905 * matcherFound. 906 * 907 * @return outputUnmatched 908 */ getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched)909 public Map<String, T> getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched) { 910 outputUnmatched.clear(); 911 912 Set<Map.Entry<Finder, T>> entrySet; 913 switch (_lookupType) { 914 case STAR_PATTERN_LOOKUP: 915 // entrySet = SPEntries.entrySet(); 916 entrySet = storage.entrySet(); 917 break; 918 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 919 // entrySet = RTEntries.entrySet(); 920 entrySet = storage.entrySet(); 921 break; 922 default: 923 entrySet = MEntries.entrySet(); 924 break; 925 } 926 927 for (Map.Entry<Finder, T> entry : entrySet) { 928 String pattern = entry.getKey().toString(); 929 if (!matched.contains(pattern)) { 930 outputUnmatched.put(pattern, entry.getValue()); 931 } 932 } 933 return outputUnmatched; 934 } 935 936 /** 937 * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex pattern and the value is 938 * what gets returned. 939 * 940 * @param patternTransform 941 * Used to transform string patterns into a Pattern. Can be used to process replacements (like 942 * variables). 943 * @param valueTransform 944 * Used to transform string values into another form. 945 * @param valueMerger 946 * Used to merge values with the same key. 947 */ of(Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)948 public static <T, U> RegexLookup<T> of(Transform<String, Finder> patternTransform, 949 Transform<String, T> valueTransform, Merger<T> valueMerger) { 950 return new RegexLookup<T>().setPatternTransform(patternTransform).setValueTransform(valueTransform) 951 .setValueMerger(valueMerger); 952 } 953 of(Transform<String, T> valueTransform)954 public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) { 955 return new RegexLookup<T>().setValueTransform(valueTransform).setPatternTransform(RegexFinderTransform); 956 } 957 of()958 public static <T> RegexLookup<T> of() { 959 return new RegexLookup<T>().setPatternTransform(RegexFinderTransform); 960 } 961 962 /** 963 * @deprecated Use {@link #of(LookupType,Transform)} instead 964 */ of(LookupType lookupType)965 public static <T> RegexLookup<T> of(LookupType lookupType) { 966 return of(lookupType, RegexFinderTransform); 967 } 968 of(LookupType lookupType, Transform<String, RegexFinder> transform)969 public static <T> RegexLookup<T> of(LookupType lookupType, Transform<String, RegexFinder> transform) { 970 return new RegexLookup<T>(lookupType).setPatternTransform(transform); 971 } 972 setValueTransform(Transform<String, ? extends T> valueTransform)973 public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) { 974 this.valueTransform = valueTransform; 975 return this; 976 } 977 setPatternTransform(Transform<String, ? extends Finder> patternTransform)978 public RegexLookup<T> setPatternTransform(Transform<String, ? extends Finder> patternTransform) { 979 this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform; 980 return this; 981 } 982 setValueMerger(Merger<T> valueMerger)983 public RegexLookup<T> setValueMerger(Merger<T> valueMerger) { 984 this.valueMerger = valueMerger; 985 return this; 986 } 987 988 /** 989 * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated by "; ". Lines 990 * starting with # are comments. 991 */ loadFromFile(Class<?> baseClass, String filename)992 public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) { 993 RegexFileParser parser = new RegexFileParser(); 994 parser.setLineParser(new RegexLineParser() { 995 @Override 996 public void parse(String line) { 997 int pos = line.indexOf("; "); 998 if (pos < 0) { 999 throw new IllegalArgumentException("Illegal line, doesn't contain semicolon: " + line); 1000 } 1001 String source = line.substring(0, pos).trim(); 1002 String target = line.substring(pos + 2).trim(); 1003 try { 1004 @SuppressWarnings("unchecked") 1005 T result = valueTransform == null ? (T) target : valueTransform.transform(target); 1006 add(source, result); 1007 } catch (Exception e) { 1008 throw new IllegalArgumentException("Failed to add <" + source + "> => <" + target + ">", e); 1009 } 1010 } 1011 }); 1012 parser.setVariableProcessor(new VariableProcessor() { 1013 @Override 1014 public void add(String variable, String variableName) { 1015 addVariable(variable, variableName); 1016 } 1017 1018 @Override 1019 public String replace(String str) { 1020 return variables.replace(str); 1021 } 1022 }); 1023 parser.parse(baseClass, filename); 1024 return this; 1025 } 1026 addVariable(String variable, String variableValue)1027 public RegexLookup<T> addVariable(String variable, String variableValue) { 1028 if (!variable.startsWith("%")) { 1029 throw new IllegalArgumentException("Variables must start with %"); 1030 } 1031 variables.add(variable.trim(), variableValue.trim()); 1032 return this; 1033 } 1034 1035 /** 1036 * Add a pattern/value pair. 1037 * 1038 * @param stringPattern 1039 * @param target 1040 * @return this, for chaining 1041 */ add(String stringPattern, T target)1042 public RegexLookup<T> add(String stringPattern, T target) { 1043 if (stringPattern.contains("%")) { 1044 stringPattern = variables.replace(stringPattern); 1045 } 1046 Finder pattern0 = patternTransform.transform(stringPattern); 1047 return add(pattern0, target); 1048 } 1049 1050 /** 1051 * Add a pattern/value pair. 1052 * 1053 * @param pattern 1054 * @param target 1055 * @return this, for chaining 1056 */ add(Finder pattern, T target)1057 public RegexLookup<T> add(Finder pattern, T target) { 1058 if (!allowNull && target == null) { 1059 throw new NullPointerException("null disallowed, unless allowNull(true) is called."); 1060 } 1061 1062 T old; 1063 switch (_lookupType) { 1064 case STAR_PATTERN_LOOKUP: // fallthrough 1065 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1066 old = storage.get(pattern); 1067 // old = SPEntries.get(pattern); 1068 break; 1069 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1070 // old = storage.get(pattern); 1071 // old = RTEntries.get(pattern); 1072 // break; 1073 default: 1074 old = MEntries.get(pattern); 1075 break; 1076 } 1077 1078 if (old == null) { 1079 switch (_lookupType) { 1080 case STAR_PATTERN_LOOKUP: // fallthrough 1081 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1082 storage.put(pattern, target); 1083 // SPEntries.put(pattern, target); 1084 break; 1085 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1086 // storage.put(pattern, target); 1087 // RTEntries.put(pattern, target); 1088 // break; 1089 default: 1090 MEntries.put(pattern, target); 1091 break; 1092 } 1093 } else if (valueMerger != null) { 1094 valueMerger.merge(target, old); 1095 } else { 1096 throw new IllegalArgumentException("Duplicate matcher without Merger defined " + pattern + "; old: " + old 1097 + "; new: " + target); 1098 } 1099 return this; 1100 } 1101 1102 @Override iterator()1103 public Iterator<Map.Entry<Finder, T>> iterator() { 1104 switch (_lookupType) { 1105 case STAR_PATTERN_LOOKUP: // fall through 1106 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1107 // return Collections.unmodifiableCollection(SPEntries.entrySet()).iterator(); 1108 return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1109 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1110 // return Collections.unmodifiableCollection(RTEntries.entrySet()).iterator(); 1111 // return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1112 default: 1113 return Collections.unmodifiableCollection(MEntries.entrySet()).iterator(); 1114 } 1115 } 1116 replace(String lookup, String... arguments)1117 public static String replace(String lookup, String... arguments) { 1118 StringBuilder result = new StringBuilder(); 1119 int last = 0; 1120 while (true) { 1121 int pos = lookup.indexOf("$", last); 1122 if (pos < 0) { 1123 result.append(lookup.substring(last, lookup.length())); 1124 break; 1125 } 1126 result.append(lookup.substring(last, pos)); 1127 final int arg = lookup.charAt(pos + 1) - '0'; 1128 try { 1129 result.append(arguments[arg]); 1130 } catch (Exception e) { 1131 throw new IllegalArgumentException("Replacing $" + arg + " in <" + lookup 1132 + ">, but too few arguments supplied."); 1133 } 1134 last = pos + 2; 1135 } 1136 return result.toString(); 1137 } 1138 1139 /** 1140 * @return the number of entries 1141 */ size()1142 public int size() { 1143 switch (_lookupType) { 1144 case STAR_PATTERN_LOOKUP: // fall through 1145 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1146 // return SPEntries.size(); 1147 return storage.size(); 1148 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1149 // return storage.size(); 1150 // return RTEntries.size(); 1151 default: 1152 return MEntries.size(); 1153 } 1154 } 1155 1156 @Override toString()1157 public String toString() { 1158 return storage.toString(); 1159 } 1160 }