• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.Comparator;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.LinkedHashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12 import java.util.Set;
13 import java.util.TreeSet;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16 
17 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
18 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
19 import org.unicode.cldr.util.RegexFileParser.VariableProcessor;
20 import org.unicode.cldr.util.RegexLookup.Finder;
21 import org.unicode.cldr.util.RegexLookup.Finder.Info;
22 
23 import com.google.common.base.Splitter;
24 import com.ibm.icu.text.Transform;
25 import com.ibm.icu.util.Output;
26 
27 /**
28  * Lookup items according to a set of regex patterns. Returns the value according to the first pattern that matches. Not
29  * thread-safe.
30  *
31  * @param <T>
32  */
33 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> {
34     protected static final String SEPARATOR = "; ";
35     private VariableReplacer variables = new VariableReplacer();
36     private StorageInterfaceBase<T> storage;
37     private Map<Finder, T> MEntries;
38     private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform;
39     private Transform<String, ? extends T> valueTransform;
40     private Merger<T> valueMerger;
41     private final boolean allowNull = false;
42     private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
43 
44     public enum LookupType {
45         STAR_PATTERN_LOOKUP, OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, STANDARD
46     }
47 
48     private LookupType _lookupType;
49 
50     /*
51      * STAR_PATTERN_LOOKUP
52      *
53      * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below.
54      * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur
55      * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression,
56      * thus greatly reducing the number of actual regex matches that need to occur.
57      */
58 
59     /*
60      * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP
61      *
62      * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored.
63      * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/'
64      * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"]
65      *
66      * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored.
67      * However regex's no longer need to follow any specific format (Slower but more versatile).
68      */
69 
RegexLookup(LookupType type)70     public RegexLookup(LookupType type) {
71         _lookupType = type;
72         switch (type) {
73         case STAR_PATTERN_LOOKUP:
74             //   SPEntries = new StarPatternMap<T>();
75             storage = new StarPatternMap<>();
76             break;
77         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
78             //   RTEntries = new RegexTree<T>();
79             storage = new RegexTree<>();
80             break;
81         default:
82             MEntries = new LinkedHashMap<>();
83             break;
84         }
85     }
86 
RegexLookup()87     public RegexLookup() {
88         this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP);
89 //        _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP;
90 //        RTEntries = new RegexTree<T>();
91     }
92 
93     public abstract static class Finder {
94         public static class Info {
95             public String[] value;
96         }
97 
98         //   abstract public String[] getInfo();
99 
100         // abstract public boolean find(String item, Object context);
101 
find(String item, Object context, Info info)102         abstract public boolean find(String item, Object context, Info info);
103 
matches(String item, Object context, Info info)104         abstract public boolean matches(String item, Object context, Info info);
105 
getFailPoint(String source)106         public int getFailPoint(String source) {
107             return -1;
108         }
109         // must also define toString
110     }
111 
112     public static class RegexFinder extends Finder {
113         /**
114          * The matcher used by this RegexFinder
115          */
116         private final Matcher matcher;
117 
118         /**
119          * The Pattern used by this RegexFinder
120          */
121         protected final Pattern pattern;
122 
RegexFinder(String pattern)123         public RegexFinder(String pattern) {
124             this.pattern = Pattern.compile(pattern, Pattern.COMMENTS);
125             matcher = this.pattern.matcher("");
126         }
127 
128         /**
129          * Call Matches on the pattern, returning additional information in the Info field,
130          * if it is non null
131          */
132         @Override
matches(String item, Object context, Info info)133         public boolean matches(String item, Object context, Info info) {
134             synchronized (matcher) {
135                 try {
136                     boolean result = matcher.reset(item).matches();
137                     extractInfo(info, result);
138                     return result;
139                 } catch (StringIndexOutOfBoundsException e) {
140                     // We don't know what causes this error (cldrbug 5051) so
141                     // make the exception message more detailed.
142                     throw new IllegalArgumentException("Matching error caused by pattern: ["
143                         + matcher.toString() + "] on text: [" + item + "]", e);
144                 }
145             }
146         }
147 
148         /**
149          * Extract match related information into  the info field, if result is true, and info
150          * is not null.
151          * @param info
152          * @param result
153          */
extractInfo(Info info, boolean result)154         private void extractInfo(Info info, boolean result) {
155             if (result && info != null) {
156                 int limit = matcher.groupCount() + 1;
157                 String[] value = new String[limit];
158                 for (int i = 0; i < limit; ++i) {
159                     value[i] = matcher.group(i);
160                 }
161                 info.value = value;
162             }
163         }
164 
165         /**
166          * Call find() on the pattern, returning additional information in the info field,
167          * if it is non-null
168          */
169         @Override
find(String item, Object context, Info info)170         public boolean find(String item, Object context, Info info) {
171             synchronized (matcher) {
172                 try {
173                     boolean result = matcher.reset(item).find();
174                     extractInfo(info, result);
175                     return result;
176                 } catch (StringIndexOutOfBoundsException e) {
177                     // We don't know what causes this error (cldrbug 5051) so
178                     // make the exception message more detailed.
179                     throw new IllegalArgumentException("Matching error caused by pattern: ["
180                         + matcher.toString() + "] on text: [" + item + "]", e);
181                 }
182             }
183         }
184 
185         @Override
toString()186         public String toString() {
187             // Use pattern here, to avoid having to synchronize on matcher
188             return pattern.pattern();
189         }
190 
191         @Override
equals(Object obj)192         public boolean equals(Object obj) {
193             if (obj == null) {
194                 return false;
195             } else {
196                 return toString().equals(obj.toString());
197             }
198         }
199 
200         @Override
hashCode()201         public int hashCode() {
202             return toString().hashCode();
203         }
204 
205         @Override
getFailPoint(String source)206         public int getFailPoint(String source) {
207             synchronized (matcher) {
208                 return RegexUtilities.findMismatch(matcher, source);
209             }
210         }
211     }
212 
213     private static interface StorageInterfaceBase<T> {
entrySet()214         Set<Entry<Finder, T>> entrySet();
215 
get(Finder finder)216         T get(Finder finder);
217 
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)218         T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound);
219 
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)220         List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo);
221 
put(Finder pattern, T value)222         void put(Finder pattern, T value);
223 
size()224         int size();
225     }
226 
227 //    private static class FinderWithInfo {
228 //        Finder _finder;
229 //        Info _info;
230 //        public FinderWithInfo(Finder finder,Info info) {
231 //            _info=info;
232 //            _finder=finder;
233 //        }
234 //    }
235     private static class RegexTree<T> implements StorageInterfaceBase<T> {
236         private RTNode root;
237         private int _size;
238         private RTNodeRankComparator rankComparator = new RTNodeRankComparator();
239 
RegexTree()240         public RegexTree() {
241             root = new RTNode("", null);
242             _size = 0;
243         }
244 
245         @Override
size()246         public int size() {
247             return _size;
248         }
249 
250         @Override
put(Finder pattern, T value)251         public void put(Finder pattern, T value) {
252             root.put(new RTNode(pattern, value, _size));
253             _size++;
254         }
255 
256         @Override
get(Finder finder)257         public T get(Finder finder) {
258             return root.get(finder);
259         }
260 
261         @Override
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)262         public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) {
263             List<RTNode> list = new ArrayList<>();
264             List<T> retList = new ArrayList<>();
265 
266             root.addToList(pattern, context, list);
267             Collections.sort(list, rankComparator);
268 
269             boolean isFirst = true;
270             if (firstInfo != null && !list.isEmpty()) {
271                 RTNode firstNode = list.get(0);
272                 if (firstNode._info != null) {
273                     firstInfo.value = firstNode._info.value;
274                 }
275             }
276 
277             for (RTNode n : list) {
278                 if (isFirst) {
279                     firstInfo.value = n._info.value;
280                     isFirst = false;
281                 }
282             }
283 
284             for (RTNode n : list) {
285                 retList.add(n._val);
286                 if (matcherList != null) {
287                     matcherList.add(n._finder);
288                 }
289             }
290 
291             return retList;
292         }
293 
294         @Override
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)295         public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) {
296             List<Finder> matcherList = new ArrayList<>();
297             Output<String[]> firstInfo = new Output<>();
298             List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first
299             if (arguments != null) {
300 //               arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
301                 arguments.value = firstInfo.value;
302 //               arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
303                 arguments.value = firstInfo.value;
304             }
305             if (matcherFound != null) {
306                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
307             }
308             return (matches.size() > 0) ? matches.get(0) : null;
309         }
310 
311         @Override
entrySet()312         public Set<Entry<Finder, T>> entrySet() {
313             LinkedHashMap<Finder, T> ret = new LinkedHashMap<>();
314             TreeSet<RTNode> s = new TreeSet<>(rankComparator);
315             root.addToEntrySet(s);
316 
317             for (RTNode n : s) {
318                 ret.put(n._finder, n._val);
319             }
320 
321             return ret.entrySet();
322         }
323 
324         public class RTNode extends NodeBase<T> {
325 //            Finder _finder;
326 //            T _val;
327             List<RTNode> _children = new ArrayList<>();
328             int _rank = -1; //rank -1 means the node was not inserted, but only used for structural purposes
329 
330             //constructor for regular nodes with a Finder
RTNode(Finder finder, T val, int rank)331             public RTNode(Finder finder, T val, int rank) {
332                 super(finder, val);
333 //                _finder = finder;
334 //                _val = val;
335                 _rank = rank;
336             }
337 
338             //constructors for nodes without a Finder
RTNode(String key, T val)339             public RTNode(String key, T val) {
340                 super(new RegexFinder(key), val);
341 //                _finder = new RegexFinder(key);
342 //                _val = val;
343 //                _rank = -1;
344                 _info = new Info();
345             }
346 
put(RTNode node)347             public void put(RTNode node) {
348                 if (_children.size() == 0) {
349                     _children.add(node);
350                 } else {
351                     String maxSimilarChars = ""; //most similar characters up to the last similar directory
352                     int insertIndex = 0;
353                     for (int i = 0; i < _children.size(); i++) {
354                         RTNode child = _children.get(i);
355                         String childFinderPattern = child._finder.toString();
356                         if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') {
357                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$"
358                         } else if (child._rank == -1) {
359                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*"
360                         }
361 
362                         //check if child has the same Finder as node to insert, then replace it
363                         if (node._finder.equals(child._finder)) {
364                             child._finder = node._finder;
365                             child._val = node._val;
366                             if (child._rank == -1) {
367                                 child._rank = node._rank;
368                             } else {
369                                 _size--;
370                             }
371                             return;
372                         }
373 
374                         //check if child is the parent of node
375                         if (child._rank == -1 && node._finder.toString().startsWith(childFinderPattern)) {
376                             child.put(node);
377                             return;
378                         }
379 
380                         //if not parent then check if candidate for most similar RTNode
381                         String gcp = greatestCommonPrefix(childFinderPattern, node._finder.toString());
382                         gcp = removeExtraChars(gcp);
383                         if (gcp.length() > maxSimilarChars.length()) {
384                             maxSimilarChars = gcp;
385                             insertIndex = i;
386                         }
387                     }
388 
389                     String finderPattern = this._finder.toString();
390                     if (finderPattern.length() > 0 && finderPattern.charAt(finderPattern.length() - 1) == '$') {
391                         finderPattern = finderPattern.substring(0, finderPattern.length() - 1); //takes into account the added "$"
392                     } else if (!(finderPattern.equals("")) && this._rank == -1) {
393                         finderPattern = finderPattern.substring(0, finderPattern.length() - 2); //takes into account the added ".*"
394                     }
395 
396                     if ((maxSimilarChars).equals(finderPattern)) { //add under this if no similar children
397                         _children.add(node);
398                     } else {
399                         //create the common parent of the chosen candidate above and node, then add to the insert index
400                         RTNode newParent = new RTNode(maxSimilarChars + ".*", null);
401                         newParent._children.add(this._children.get(insertIndex));
402                         newParent._children.add(node);
403                         this._children.remove(insertIndex);
404                         this._children.add(insertIndex, newParent);
405                     }
406                 }
407             }
408 
409             //takes a string in a directory regex form and removes all characters up to the last full directory
removeExtraChars(String input)410             private String removeExtraChars(String input) {
411                 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/')));
412                 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) ||
413                     (ret.lastIndexOf('[') != -1 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) ||
414                     (ret.lastIndexOf('{') != -1 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) {
415                     ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/')));
416                 }
417                 return ret;
418             }
419 
420             //traverse tree to get value
get(Finder finder)421             public T get(Finder finder) {
422                 T ret = null; //return value
423 
424                 if (_children.size() == 0) {
425                     return null;
426                 } else {
427                     for (RTNode child : _children) {
428 
429                         //check if child is the node
430                         if (child._rank != -1 && finder.equals(child._finder)) {
431                             return child._val;
432                         }
433 
434                         String childFinderPattern = child._finder.toString();
435 
436                         if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') {
437                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$"
438                         } else if (child._rank == -1) {
439                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*"
440                         }
441 
442                         //check if child is the parent of node
443                         if (finder.toString().startsWith(childFinderPattern)) {
444                             ret = child.get(finder);
445                             if (ret != null) {
446                                 break;
447                             }
448                         }
449                     }
450 
451                     return ret;
452                 }
453             }
454 
455             //traverse tree to get an entry set
addToEntrySet(TreeSet<RTNode> s)456             public void addToEntrySet(TreeSet<RTNode> s) {
457                 if (_children.size() == 0) {
458                     return;
459                 } else {
460                     for (RTNode child : _children) {
461                         if (child._rank != -1) {
462                             s.add(child);
463                         }
464                         child.addToEntrySet(s);
465                     }
466                 }
467             }
468 
469             //traverse tree to get list of all values who's key matcher matches pattern
addToList(String pattern, Object context, List<RTNode> list)470             public void addToList(String pattern, Object context, List<RTNode> list) {
471                 if (_children.size() == 0) {
472                     return;
473                 } else {
474                     Info firstInfo = new Info();
475                     for (RTNode child : _children) {
476                         boolean found;
477                         synchronized (child._finder) {
478                             found = child._finder.find(pattern, context, firstInfo);
479                         }
480 
481                         //check if child matches pattern
482                         if (found) {
483                             if (child._rank != -1) {
484                                 list.add(child);
485                             }
486                             // if this node's info value is unset, set it to the result of the
487                             // lookup
488 //                            if (child._info!=null && child._info.value==null) {
489                             if (child._info != null) {
490                                 // set the value to the result of the last find
491                                 child._info.value = firstInfo.value;
492                             } else {
493                                 // for some reason, child._info is null, so simply initialize it.
494                                 child._info = new Info();
495                                 // set the value to the result of the last find
496                                 child._info.value = firstInfo.value;
497                             }
498                             //check if child is the parent of node then enter that node
499                             child.addToList(pattern, context, list);
500                         }
501                     }
502                 }
503             }
504 
505             @Override
toString()506             public String toString() {
507                 return toString("", new StringBuilder()).toString();
508             }
509 
toString(String prefix, StringBuilder result)510             private StringBuilder toString(String prefix, StringBuilder result) {
511                 result.append(prefix).append(this._finder.toString()).append("\n");
512                 for (RegexTree<T>.RTNode child : _children) {
513                     child.toString(prefix + "\t", result);
514                 }
515                 return result;
516             }
517 
518             //greatest common prefix between two strings
greatestCommonPrefix(String a, String b)519             public String greatestCommonPrefix(String a, String b) {
520                 int minLength = Math.min(a.length(), b.length());
521                 for (int i = 0; i < minLength; i++) {
522                     if (a.charAt(i) != b.charAt(i)) {
523                         return a.substring(0, i);
524                     }
525                 }
526                 return a.substring(0, minLength);
527             }
528         }
529 
530         class RTNodeRankComparator implements Comparator<RTNode> {
531             @Override
compare(RTNode a, RTNode b)532             public int compare(RTNode a, RTNode b) {
533                 if (a == b) {
534                     return 0;
535                 } else if (a == null) {
536                     return -1;
537                 } else if (b == null) {
538                     return 1;
539                 } else if (a._rank == b._rank) {
540                     return 0;
541                 } else if (a._rank > b._rank) {
542                     return 1;
543                 } else {
544                     return -1;
545                 }
546             }
547         }
548 
549         @Override
toString()550         public String toString() {
551             return root.toString();
552         }
553     }
554 
555     private static class StarPatternMap<T> implements StorageInterfaceBase<T> {
556         private Map<String, List<SPNode>> _spmap;
557         private int _size = 0;
558 
StarPatternMap()559         public StarPatternMap() {
560             _spmap = new HashMap<>();
561 //            _size = 0;
562         }
563 
564         @Override
size()565         public int size() {
566             return _size;
567         }
568 
569         @Override
put(Finder pattern, T value)570         public void put(Finder pattern, T value) {
571             //System.out.println("pattern.toString() is => "+pattern.toString());
572             String starPattern = pathStarrer.transform2(pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*"));
573             //System.out.println("Putting => "+starPattern);
574             List<SPNode> candidates = _spmap.get(starPattern);
575             if (candidates == null) {
576                 candidates = new ArrayList<>();
577             }
578             SPNode newNode = new SPNode(pattern, value);
579             candidates.add(newNode);
580             _spmap.put(starPattern, candidates);
581             _size++;
582         }
583 
584         @Override
get(Finder finder)585         public T get(Finder finder) {
586             String starPattern = pathStarrer.transform2(finder.toString());
587             List<SPNode> candidates = _spmap.get(starPattern);
588             if (candidates == null) {
589                 return null;
590             }
591             for (SPNode cand : candidates) {
592                 if (cand._finder.equals(finder)) {
593                     return cand._val;
594                 }
595             }
596             return null;
597         }
598 
599         @Override
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)600         public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) {
601             List<SPNode> list = new ArrayList<>();
602             List<T> retList = new ArrayList<>();
603 
604             String starPattern = pathStarrer.transform2(pattern);
605             List<SPNode> candidates = _spmap.get(starPattern);
606             if (candidates == null) {
607                 return retList;
608             }
609             for (SPNode cand : candidates) {
610                 Info info = new Info();
611                 if (cand._finder.find(pattern, context, info)) {
612                     list.add(cand);
613                     if (firstInfo != null) {
614                         firstInfo.value = info.value;
615                     }
616                 }
617             }
618 
619             for (SPNode n : list) {
620                 retList.add(n._val);
621                 if (matcherList != null) {
622                     matcherList.add(n._finder);
623                 }
624             }
625 
626             return retList;
627         }
628 
629         @Override
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)630         public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) {
631             List<Finder> matcherList = new ArrayList<>();
632             Output<String[]> firstInfo = new Output<>();
633             List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first
634             if (arguments != null && firstInfo.value != null) {
635 //                arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
636                 arguments.value = matcherList.isEmpty() ? null : firstInfo.value;
637             }
638             if (matcherFound != null) {
639                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
640             }
641             return (matches.size() > 0) ? matches.get(0) : null;
642         }
643 
644         @Override
entrySet()645         public Set<Entry<Finder, T>> entrySet() {
646             LinkedHashMap<Finder, T> ret = new LinkedHashMap<>();
647 
648             for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) {
649                 List<SPNode> candidates = entry.getValue();
650                 for (SPNode node : candidates) {
651                     ret.put(node._finder, node._val);
652                 }
653             }
654             return ret.entrySet();
655         }
656 
657         /**
658          * A Node of a StarPatternMap
659          * @author ribnitz
660          *
661          */
662         public class SPNode extends NodeBase<T> {
663 //            Finder _finder;
664 //            T _val;
665 
SPNode(Finder finder, T val)666             public SPNode(Finder finder, T val) {
667 //                _finder = finder;
668 //                _val = val;
669                 super(finder, val);
670             }
671 
672             @Override
toString()673             public String toString() {
674                 return this._finder.toString();
675             }
676         }
677     }
678 
679     /**
680      * The basic class of an information node, featuring a Finder, a value and an Info
681      *
682      * @author ribnitz
683      *
684      * @param <T>
685      */
686     private static class NodeBase<T> {
687         Finder _finder;
688         T _val;
689         Info _info = new Info();
690 
NodeBase(Finder finder, T value)691         public NodeBase(Finder finder, T value) {
692             this._finder = finder;
693             this._val = value;
694         }
695     }
696 
697     public static Transform<String, RegexFinder> RegexFinderTransform = new Transform<String, RegexFinder>() {
698         @Override
699         public RegexFinder transform(String source) {
700             return new RegexFinder(source);
701         }
702     };
703 
704     /**
705      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //. To work better
706      * with XPaths.
707      */
708     public static Transform<String, RegexFinder> RegexFinderTransformPath = new Transform<String, RegexFinder>() {
709         @Override
710         public RegexFinder transform(String source) {
711             final String newSource = source.replace("[@", "\\[@");
712             return new RegexFinder(newSource.startsWith("//")
713                 ? "^" + newSource
714                 : newSource);
715         }
716     };
717 
718     /**
719      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //, and ' is changed to ".
720      * To work better with XPaths.
721      */
722     public static Transform<String, RegexFinder> RegexFinderTransformPath2 = new Transform<String, RegexFinder>() {
723         @Override
724         public RegexFinder transform(String source) {
725             final String newSource = source.replace("[@", "\\[@").replace('\'', '"');
726             return new RegexFinder(newSource.startsWith("//")
727                 ? "^" + newSource
728                 : newSource);
729         }
730     };
731 
732     /**
733      * Allows for merging items of the same type.
734      *
735      * @param <T>
736      */
737     public interface Merger<T> {
merge(T a, T into)738         T merge(T a, T into);
739     }
740 
741     /**
742      * Returns the result of a regex lookup.
743      *
744      * @param source
745      * @return
746      */
get(String source)747     public final T get(String source) {
748         return get(source, null, null, null, null);
749     }
750 
751     /**
752      * Returns the result of a regex lookup, with the group arguments that matched.
753      *
754      * @param source
755      * @param context
756      *            TODO
757      * @return
758      */
get(String source, Object context, Output<String[]> arguments)759     public T get(String source, Object context, Output<String[]> arguments) {
760         return get(source, context, arguments, null, null);
761     }
762 
763     /**
764      * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure cases for
765      * debugging.
766      *
767      * @param source
768      * @param context
769      * @return
770      */
get(String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)771     public T get(String source, Object context, Output<String[]> arguments,
772         Output<Finder> matcherFound, List<String> failures) {
773 
774         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
775             //   T ret = SPEntries.get(source, context, arguments, matcherFound);
776             T ret = storage.get(source, context, arguments, matcherFound);
777             if (ret != null) {
778                 return ret;
779             }
780 
781             if (failures != null) {
782                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
783 //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
784                     Finder matcher = entry.getKey();
785                     synchronized (matcher) {
786                         int failPoint = matcher.getFailPoint(source);
787                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
788                             + matcher.toString();
789                         failures.add(show);
790                     }
791                 }
792             }
793         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
794             //      T ret = RTEntries.get(source, context, arguments, matcherFound);
795             T ret = storage.get(source, context, arguments, matcherFound);
796             if (ret != null) {
797                 return ret;
798             }
799 
800             if (failures != null) {
801                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
802 //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
803                     Finder matcher = entry.getKey();
804                     synchronized (matcher) {
805                         int failPoint = matcher.getFailPoint(source);
806                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
807                             + matcher.toString();
808                         failures.add(show);
809                     }
810                 }
811             }
812         } else {
813             //slow but versatile implementation
814             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
815                 Finder matcher = entry.getKey();
816                 synchronized (matcher) {
817                     Info firstInfo = new Info();
818                     if (matcher.find(source, context, firstInfo)) {
819                         if (arguments != null) {
820 //                            arguments.value = matcher.getInfo();
821                             arguments.value = firstInfo.value;
822                         }
823                         if (matcherFound != null) {
824                             matcherFound.value = matcher;
825                         }
826                         return entry.getValue();
827                     } else if (failures != null) {
828                         int failPoint = matcher.getFailPoint(source);
829                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
830                             + matcher.toString();
831                         failures.add(show);
832                     }
833                 }
834             }
835         }
836 
837         // not really necessary, but makes debugging easier.
838         if (arguments != null) {
839             arguments.value = null;
840         }
841         if (matcherFound != null) {
842             matcherFound.value = null;
843         }
844         return null;
845     }
846 
847     /**
848      * Returns all results of a regex lookup, with the group arguments that matched. Supplies failure cases for
849      * debugging.
850      *
851      * @param source
852      * @param context
853      *            TODO
854      * @return
855      */
getAll(String source, Object context, List<Finder> matcherList, List<String> failures)856     public List<T> getAll(String source, Object context, List<Finder> matcherList, List<String> failures) {
857         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
858             Output<String[]> firstInfo = new Output<>();
859 //            List<T> matches = SPEntries.getAll(source, context, matcherList,firstInfo);
860             List<T> matches = storage.getAll(source, context, matcherList, firstInfo);
861             if (matches != null) {
862                 return matches;
863             }
864 
865             if (failures != null) {
866                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
867 //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
868                     Finder matcher = entry.getKey();
869                     synchronized (matcher) {
870                         int failPoint = matcher.getFailPoint(source);
871                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
872                             + matcher.toString();
873                         failures.add(show);
874                     }
875                 }
876             }
877             return null;
878         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
879             Output<String[]> info = new Output<>();
880 //            List<T> matches = RTEntries.getAll(source, context, matcherList,info);
881             List<T> matches = storage.getAll(source, context, matcherList, info);
882             if (matches != null) {
883                 return matches;
884             }
885 
886             if (failures != null) {
887                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
888 //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
889                     Finder matcher = entry.getKey();
890                     synchronized (matcher) {
891                         int failPoint = matcher.getFailPoint(source);
892                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
893                             + matcher.toString();
894                         failures.add(show);
895                     }
896                 }
897             }
898             return null;
899         } else {
900             //slow but versatile implementation
901             List<T> matches = new ArrayList<>();
902             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
903                 Finder matcher = entry.getKey();
904                 Info firstInfo = new Info();
905                 if (matcher.find(source, context, firstInfo)) {
906                     if (matcherList != null) {
907                         matcherList.add(matcher);
908                     }
909                     matches.add(entry.getValue());
910                 } else if (failures != null) {
911                     int failPoint = matcher.getFailPoint(source);
912                     String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
913                         + matcher.toString();
914                     failures.add(show);
915                 }
916             }
917             return matches;
918         }
919     }
920 
921     /**
922      * Find the patterns that haven't been matched. Requires the caller to collect the patterns that have, using
923      * matcherFound.
924      *
925      * @return outputUnmatched
926      */
getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched)927     public Map<String, T> getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched) {
928         outputUnmatched.clear();
929 
930         Set<Map.Entry<Finder, T>> entrySet;
931         switch (_lookupType) {
932         case STAR_PATTERN_LOOKUP:
933 //            entrySet = SPEntries.entrySet();
934             entrySet = storage.entrySet();
935             break;
936         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
937 //            entrySet = RTEntries.entrySet();
938             entrySet = storage.entrySet();
939             break;
940         default:
941             entrySet = MEntries.entrySet();
942             break;
943         }
944 
945         for (Map.Entry<Finder, T> entry : entrySet) {
946             String pattern = entry.getKey().toString();
947             if (!matched.contains(pattern)) {
948                 outputUnmatched.put(pattern, entry.getValue());
949             }
950         }
951         return outputUnmatched;
952     }
953 
954     /**
955      * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex pattern and the value is
956      * what gets returned.
957      *
958      * @param patternTransform
959      *            Used to transform string patterns into a Pattern. Can be used to process replacements (like
960      *            variables).
961      * @param valueTransform
962      *            Used to transform string values into another form.
963      * @param valueMerger
964      *            Used to merge values with the same key.
965      */
of(Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)966     public static <T, U> RegexLookup<T> of(Transform<String, Finder> patternTransform,
967         Transform<String, T> valueTransform, Merger<T> valueMerger) {
968         return new RegexLookup<T>().setPatternTransform(patternTransform).setValueTransform(valueTransform)
969             .setValueMerger(valueMerger);
970     }
971 
of(Transform<String, T> valueTransform)972     public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) {
973         return new RegexLookup<T>().setValueTransform(valueTransform).setPatternTransform(RegexFinderTransform);
974     }
975 
of()976     public static <T> RegexLookup<T> of() {
977         return new RegexLookup<T>().setPatternTransform(RegexFinderTransform);
978     }
979 
980     /**
981      * @deprecated Use {@link #of(LookupType,Transform)} instead
982      */
983     @Deprecated
of(LookupType lookupType)984     public static <T> RegexLookup<T> of(LookupType lookupType) {
985         return of(lookupType, RegexFinderTransform);
986     }
987 
of(LookupType lookupType, Transform<String, RegexFinder> transform)988     public static <T> RegexLookup<T> of(LookupType lookupType, Transform<String, RegexFinder> transform) {
989         return new RegexLookup<T>(lookupType).setPatternTransform(transform);
990     }
991 
setValueTransform(Transform<String, ? extends T> valueTransform)992     public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) {
993         this.valueTransform = valueTransform;
994         return this;
995     }
996 
setPatternTransform(Transform<String, ? extends Finder> patternTransform)997     public RegexLookup<T> setPatternTransform(Transform<String, ? extends Finder> patternTransform) {
998         this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform;
999         return this;
1000     }
1001 
setValueMerger(Merger<T> valueMerger)1002     public RegexLookup<T> setValueMerger(Merger<T> valueMerger) {
1003         this.valueMerger = valueMerger;
1004         return this;
1005     }
1006 
1007     /**
1008      * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated by "; ". Lines
1009      * starting with # are comments.
1010      */
loadFromFile(Class<?> baseClass, String filename)1011     public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) {
1012         RegexFileParser parser = setupRegexFileParser();
1013         parser.parse(baseClass, filename);
1014         return this;
1015     }
1016 
1017     /**
1018      * Load a RegexLookup from a list of strings, as if they came from a file. See loadFromFile
1019      */
loadFromString(String source)1020     public RegexLookup<T> loadFromString(String source) {
1021         RegexFileParser parser = setupRegexFileParser();
1022         parser.parseStrings("string", Splitter.on('\n').split(source));
1023         return this;
1024     }
1025 
setupRegexFileParser()1026     private RegexFileParser setupRegexFileParser() {
1027         RegexFileParser parser = new RegexFileParser();
1028         parser.setLineParser(new RegexLineParser() {
1029             @Override
1030             public void parse(String line) {
1031                 int pos = line.indexOf(RegexLookup.SEPARATOR);
1032                 if (pos < 0) {
1033                     throw new IllegalArgumentException("Illegal line, doesn't contain semicolon: " + line);
1034                 }
1035                 String source = line.substring(0, pos).trim();
1036                 String target = line.substring(pos + 2).trim();
1037                 try {
1038                     @SuppressWarnings("unchecked")
1039                     T result = valueTransform == null ? (T) target : valueTransform.transform(target);
1040                     add(source, result);
1041                 } catch (Exception e) {
1042                     throw new IllegalArgumentException("Failed to add <" + source + "> => <" + target + ">", e);
1043                 }
1044             }
1045         });
1046         parser.setVariableProcessor(new VariableProcessor() {
1047             @Override
1048             public void add(String variable, String variableName) {
1049                 addVariable(variable, variableName);
1050             }
1051 
1052             @Override
1053             public String replace(String str) {
1054                 return variables.replace(str);
1055             }
1056         });
1057         return parser;
1058     }
1059 
addVariable(String variable, String variableValue)1060     public RegexLookup<T> addVariable(String variable, String variableValue) {
1061         if (!variable.startsWith("%")) {
1062             throw new IllegalArgumentException("Variables must start with %");
1063         }
1064         variables.add(variable.trim(), variableValue.trim());
1065         return this;
1066     }
1067 
1068     /**
1069      * Add a pattern/value pair.
1070      *
1071      * @param stringPattern
1072      * @param target
1073      * @return this, for chaining
1074      */
add(String stringPattern, T target)1075     public RegexLookup<T> add(String stringPattern, T target) {
1076         if (stringPattern.contains("%")) {
1077             stringPattern = variables.replace(stringPattern);
1078         }
1079         Finder pattern0 = patternTransform.transform(stringPattern);
1080         return add(pattern0, target);
1081     }
1082 
1083     /**
1084      * Add a pattern/value pair.
1085      *
1086      * @param pattern
1087      * @param target
1088      * @return this, for chaining
1089      */
add(Finder pattern, T target)1090     public RegexLookup<T> add(Finder pattern, T target) {
1091         if (!allowNull && target == null) {
1092             throw new NullPointerException("null disallowed, unless allowNull(true) is called.");
1093         }
1094 
1095         T old;
1096         switch (_lookupType) {
1097         case STAR_PATTERN_LOOKUP: // fallthrough
1098         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1099             old = storage.get(pattern);
1100 //            old = SPEntries.get(pattern);
1101             break;
1102 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1103 //            old = storage.get(pattern);
1104 //            old = RTEntries.get(pattern);
1105 //            break;
1106         default:
1107             old = MEntries.get(pattern);
1108             break;
1109         }
1110 
1111         if (old == null) {
1112             switch (_lookupType) {
1113             case STAR_PATTERN_LOOKUP: // fallthrough
1114             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1115                 storage.put(pattern, target);
1116 //                SPEntries.put(pattern, target);
1117                 break;
1118 //            case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1119 //                storage.put(pattern, target);
1120 //                RTEntries.put(pattern, target);
1121 //                break;
1122             default:
1123                 MEntries.put(pattern, target);
1124                 break;
1125             }
1126         } else if (valueMerger != null) {
1127             valueMerger.merge(target, old);
1128         } else {
1129             throw new IllegalArgumentException("Duplicate matcher without Merger defined " + pattern + "; old: " + old
1130                 + "; new: " + target);
1131         }
1132         return this;
1133     }
1134 
1135     @Override
iterator()1136     public Iterator<Map.Entry<Finder, T>> iterator() {
1137         switch (_lookupType) {
1138         case STAR_PATTERN_LOOKUP: // fall through
1139         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1140 //            return Collections.unmodifiableCollection(SPEntries.entrySet()).iterator();
1141             return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1142 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1143 //            return Collections.unmodifiableCollection(RTEntries.entrySet()).iterator();
1144 //            return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1145         default:
1146             return Collections.unmodifiableCollection(MEntries.entrySet()).iterator();
1147         }
1148     }
1149 
replace(String lookup, String... arguments)1150     public static String replace(String lookup, String... arguments) {
1151         StringBuilder result = new StringBuilder();
1152         int last = 0;
1153         while (true) {
1154             int pos = lookup.indexOf("$", last);
1155             if (pos < 0) {
1156                 result.append(lookup.substring(last, lookup.length()));
1157                 break;
1158             }
1159             result.append(lookup.substring(last, pos));
1160             final int arg = lookup.charAt(pos + 1) - '0';
1161             try {
1162                 result.append(arguments[arg]);
1163             } catch (Exception e) {
1164                 throw new IllegalArgumentException("Replacing $" + arg + " in <" + lookup
1165                     + ">, but too few arguments supplied.");
1166             }
1167             last = pos + 2;
1168         }
1169         return result.toString();
1170     }
1171 
1172     /**
1173      * @return the number of entries
1174      */
size()1175     public int size() {
1176         switch (_lookupType) {
1177         case STAR_PATTERN_LOOKUP: // fall through
1178         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1179 //            return SPEntries.size();
1180             return storage.size();
1181 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1182 //            return storage.size();
1183 //            return RTEntries.size();
1184         default:
1185             return MEntries.size();
1186         }
1187     }
1188 
1189     @Override
toString()1190     public String toString() {
1191         return storage.toString();
1192     }
1193 }