• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.util;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.HashSet;
8 import java.util.List;
9 import java.util.NavigableSet;
10 import java.util.Objects;
11 import java.util.Set;
12 import java.util.TreeSet;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 
16 import com.google.common.base.Joiner;
17 import com.google.common.collect.Iterables;
18 import com.google.common.collect.Multimap;
19 import com.google.common.collect.Multiset;
20 import com.google.common.collect.Sets;
21 import com.google.common.collect.TreeMultimap;
22 import com.google.common.collect.TreeMultiset;
23 
24 /**
25  * Helper class that allows logging the use of regular expressions. A class that will summarize them will get a
26  * NavigabSet of PatternCountInterface instances.
27  *
28  * @author ribnitz
29  *
30  */
31 public class RegexLogger {
32     /**
33      * Should debugging be done? - if not, a null implementation will be used
34      */
35     private static final boolean DEBUG = false;
36     /**
37      * Instance
38      */
39     private static RegexLoggerInterface instance = null;
40 
getInstance()41     public static RegexLoggerInterface getInstance() {
42         if (instance == null) {
43             if (DEBUG) {
44                 instance = new RegexLoggerImpl();
45             } else {
46                 instance = new NullRegexLogger();
47             }
48         }
49         return instance;
50     }
51 
52     private static class PatternStringWithBoolean implements Comparable<PatternStringWithBoolean> {
53         private final String pattern;
54         private final boolean calledFromRegexFinder;
55         private final int hashCode;
56 
PatternStringWithBoolean(String patternStr, boolean calledFromRegexFinder)57         public PatternStringWithBoolean(String patternStr, boolean calledFromRegexFinder) {
58             this.pattern = patternStr.trim();
59             this.calledFromRegexFinder = calledFromRegexFinder;
60             hashCode = Objects.hash(this.pattern, this.calledFromRegexFinder);
61         }
62 
63         @Override
hashCode()64         public int hashCode() {
65             return hashCode;
66         }
67 
getPattern()68         public String getPattern() {
69             return pattern;
70         }
71 
isCalledFromRegexFinder()72         public boolean isCalledFromRegexFinder() {
73             return calledFromRegexFinder;
74         }
75 
76         @Override
equals(Object obj)77         public boolean equals(Object obj) {
78             if (this == obj) {
79                 return true;
80             }
81             if (obj == null) {
82                 return false;
83             }
84             if (getClass() != obj.getClass()) {
85                 return false;
86             }
87             PatternStringWithBoolean other = (PatternStringWithBoolean) obj;
88             if (calledFromRegexFinder != other.calledFromRegexFinder) {
89                 return false;
90             }
91             if (hashCode != other.hashCode) {
92                 return false;
93             }
94             if (other.pattern != null) {
95                 return false;
96             }
97             if (!pattern.equals(other.pattern)) {
98                 return false;
99             }
100             return true;
101         }
102 
103         @Override
compareTo(PatternStringWithBoolean o)104         public int compareTo(PatternStringWithBoolean o) {
105             if (o == null) {
106                 return 1;
107             }
108             if (this == o) {
109                 return 0;
110             }
111             return pattern.compareTo(o.pattern);
112         }
113     }
114 
115     /**
116      * Interface used for logging Regular expressions
117      * @author ribnitz
118      *
119      */
120     public static interface RegexLoggerInterface {
121         /**
122          * Log that the given pattern was applied on the given matchStr, whether it matched, and
123          * what the type of the log was. Cls conains the calling class.
124          * @param pattern
125          * @param matchStr
126          * @param matched
127          * @param type
128          * @param cls
129          */
log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls)130         void log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls);
131 
log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls)132         void log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls);
133 
log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls)134         void log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls);
135 
log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls)136         void log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls);
137 
138         /**
139          * Get all the entries that matched
140          * @return
141          */
getEntries()142         NavigableSet<PatternCountInterface> getEntries();
143 
144         /**
145          * Get the entries that occurred at least minCount times. If there are no matches, an empty set is returned
146          * @param minCount
147          * @return
148          */
getEntries(final int minCount)149         NavigableSet<PatternCountInterface> getEntries(final int minCount);
150 
isEnabled()151         boolean isEnabled();
152     }
153 
154     /**
155      * Three of the methods can be delegations, which reduces the actual implementation to two methods
156      * @author ribnitz
157      *
158      */
159     private static abstract class AbstractRegexLogger implements RegexLoggerInterface {
160 
161         @Override
log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls)162         public void log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls) {
163             log(matcher.pattern(), matchStr, matched, type, cls);
164 
165         }
166 
167         @Override
log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls)168         public void log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls) {
169             log(pattern.pattern(), matchStr, matched, type, cls);
170         }
171 
172         @Override
log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls)173         public void log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls) {
174             log(pattern, matchStr, matched, 0, type, cls);
175         }
176 
177         /**
178          * Get all entries
179          */
180         @Override
getEntries()181         public NavigableSet<PatternCountInterface> getEntries() {
182             return getEntries(1);
183         }
184 
185         @Override
isEnabled()186         public boolean isEnabled() {
187             return DEBUG;
188         }
189 
190     }
191 
192     /**
193      * Null implementation
194      * @author ribnitz
195      *
196      */
197     private static class NullRegexLogger extends AbstractRegexLogger {
198 
199         @Override
log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls)200         public void log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls) {
201             // do nothing
202         }
203 
204         @Override
getEntries(int minCount)205         public NavigableSet<PatternCountInterface> getEntries(int minCount) {
206             NavigableSet<PatternCountInterface> returned = Sets.newTreeSet(Collections.EMPTY_SET);
207             return returned;
208         }
209     }
210 
211     /**
212      * Inetface used for the entries returnred by the RegexLogger
213      * @author ribnitz
214      *
215      */
216     public static interface PatternCountInterface {
217         /**
218          * Get the pattern used
219          * @return
220          */
getPattern()221         String getPattern();
222 
223         /**
224          * Get the number of successful matches obtained through FIND
225          * @return
226          */
getNumberOfFindMatches()227         int getNumberOfFindMatches();
228 
229         /**
230          * Get the number of unsuccessful matches obtained through FIND
231          * @return
232          */
getNumberOfFindFailures()233         int getNumberOfFindFailures();
234 
235         /**
236          * Get the number of successful matches obtained through MATCH
237          * @return
238          */
getNumberOfMatchMatches()239         int getNumberOfMatchMatches();
240 
241         /**
242          * Get the number of unsuccessful matches obtained through FIND
243          * @return
244          */
getNumberOfMatchFailures()245         int getNumberOfMatchFailures();
246 
247         /**
248          * Return true if this call was made from RegexFinder
249          * @return
250          */
isCalledFromRegexFinder()251         boolean isCalledFromRegexFinder();
252 
253         /**
254          * Get a set of all call locations
255          * @return
256          */
getCallLocations()257         Set<String> getCallLocations();
258 
259     }
260 
261     /**
262      * GetAll uses this class to add all the entries of a multiSet to the result set, constructing
263      * the object to return for each pattern. Objects will only be added once.
264      *
265      * This is the implementatioon that adds all items.
266      * @author ribnitz
267      *
268      */
269     private static class AddAllEntryProcessor {
270         protected final int minCount;
271         protected final CountSets c;
272         protected final Set<PatternStringWithBoolean> seen = new HashSet<>();
273         protected final NavigableSet<PatternCountInterface> result = new TreeSet<>();
274 
AddAllEntryProcessor(int minCount, CountSets c)275         public AddAllEntryProcessor(int minCount, CountSets c) {
276             this.minCount = minCount;
277             this.c = c;
278         }
279 
getResult()280         public NavigableSet<PatternCountInterface> getResult() {
281             return result;
282         }
283 
process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet)284         public void process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet) {
285             if (!seen.contains(item)) {
286                 result.add(new RegexKeyWithCount(item, c));
287                 seen.add(item);
288             }
289         }
290     }
291 
292     /**
293      * Sometimes getEntries is called with a minCount; this Class filters and only adds the
294      * items that occur at least minCount times.
295      * @author ribnitz
296      *
297      */
298     private static class EntryProcessor extends AddAllEntryProcessor {
EntryProcessor(int minCount, CountSets c)299         public EntryProcessor(int minCount, CountSets c) {
300             super(minCount, c);
301         }
302 
303         @Override
process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet)304         public void process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet) {
305             if (countSet.count(item) >= minCount) {
306                 super.process(item, countSet);
307             }
308         }
309     }
310 
311     /**
312      * Since all the inner classes are static, this object is used to pass around the refernces to the
313      * different sets/the state
314      *
315      * @author ribnitz
316      *
317      */
318     private static class CountSets {
319         final Multiset<PatternStringWithBoolean> matchedFindSet;
320         final Multiset<PatternStringWithBoolean> failedFindSet;
321         final Multiset<PatternStringWithBoolean> matchedMatchSet;
322         final Multiset<PatternStringWithBoolean> failedMatchSet;
323         final Multimap<PatternStringWithBoolean, String> stacktraces;
324 
CountSets(Multiset<PatternStringWithBoolean> matchedFindSet, Multiset<PatternStringWithBoolean> failedFindSet, Multiset<PatternStringWithBoolean> matchedMatchSet, Multiset<PatternStringWithBoolean> failedMatchSet, Multimap<PatternStringWithBoolean, String> occurrences)325         public CountSets(Multiset<PatternStringWithBoolean> matchedFindSet, Multiset<PatternStringWithBoolean> failedFindSet,
326             Multiset<PatternStringWithBoolean> matchedMatchSet, Multiset<PatternStringWithBoolean> failedMatchSet,
327             Multimap<PatternStringWithBoolean, String> occurrences) {
328             this.failedFindSet = failedFindSet;
329             this.failedMatchSet = failedMatchSet;
330             this.matchedMatchSet = matchedMatchSet;
331             this.stacktraces = occurrences;
332             this.matchedFindSet = matchedFindSet;
333         }
334     }
335 
336     private static class RegexKeyWithCount implements PatternCountInterface, Comparable<PatternCountInterface> {
337         private final String pattern;
338         private final int findMatchCount;
339         private final int findFailCount;
340         private final int matchMatchCount;
341         private final int matchFailCount;
342         private final boolean calledFromRegexFinder;
343         private final Set<String> callLocations = new HashSet<>();
344         private final int hashCode;
345 
RegexKeyWithCount(PatternStringWithBoolean key, CountSets bean)346         public RegexKeyWithCount(PatternStringWithBoolean key, CountSets bean) {
347             this.pattern = key.getPattern();
348             this.calledFromRegexFinder = key.isCalledFromRegexFinder();
349             this.findMatchCount = bean.matchedFindSet.count(key);
350             this.findFailCount = bean.failedFindSet.count(key);
351             this.matchMatchCount = bean.matchedMatchSet.count(key);
352             this.matchFailCount = bean.failedMatchSet.count(key);
353             Collection<String> tmp = bean.stacktraces.get(key);
354             for (String cur : tmp) {
355                 if (!callLocations.contains(cur)) {
356                     callLocations.add(cur);
357                 }
358             }
359             this.hashCode = Objects.hash(this.pattern,
360                 this.findMatchCount,
361                 this.findFailCount,
362                 this.matchFailCount,
363                 this.matchMatchCount,
364                 this.calledFromRegexFinder,
365                 this.callLocations);
366         }
367 
368         @Override
getPattern()369         public String getPattern() {
370             return pattern;
371         }
372 
373         @Override
hashCode()374         public int hashCode() {
375             return hashCode;
376         }
377 
378         @Override
getNumberOfFindMatches()379         public int getNumberOfFindMatches() {
380             return findMatchCount;
381         }
382 
383         @Override
getNumberOfFindFailures()384         public int getNumberOfFindFailures() {
385             return findFailCount;
386         }
387 
388         @Override
getNumberOfMatchMatches()389         public int getNumberOfMatchMatches() {
390             return matchMatchCount;
391         }
392 
393         @Override
getNumberOfMatchFailures()394         public int getNumberOfMatchFailures() {
395             return matchFailCount;
396         }
397 
398         @Override
equals(Object obj)399         public boolean equals(Object obj) {
400             if (this == obj) {
401                 return true;
402             }
403             if (obj == null) {
404                 return false;
405             }
406             if (hashCode != obj.hashCode()) {
407                 return false;
408             }
409             if (getClass() != obj.getClass()) {
410                 return false;
411             }
412             RegexKeyWithCount other = (RegexKeyWithCount) obj;
413             if (matchFailCount != other.matchFailCount) {
414                 return false;
415             }
416             if (matchMatchCount != other.matchMatchCount) {
417                 return false;
418             }
419             if (findFailCount != other.findFailCount) {
420                 return false;
421             }
422             if (findMatchCount != other.findMatchCount) {
423                 return false;
424             }
425             if (!pattern.equals(other.pattern)) {
426                 return false;
427             }
428             if (calledFromRegexFinder != other.calledFromRegexFinder) {
429                 return false;
430             }
431             if (callLocations != other.callLocations) {
432                 return false;
433             }
434             return true;
435         }
436 
437         @Override
compareTo(PatternCountInterface o)438         public int compareTo(PatternCountInterface o) {
439             if (o == null) {
440                 return 1;
441             }
442             return new Integer(matchFailCount + matchMatchCount + findFailCount + findMatchCount).compareTo(
443                 o.getNumberOfFindFailures() + o.getNumberOfFindMatches() + o.getNumberOfMatchFailures() + o.getNumberOfMatchMatches());
444         }
445 
446         @Override
isCalledFromRegexFinder()447         public boolean isCalledFromRegexFinder() {
448             return calledFromRegexFinder;
449         }
450 
451         @Override
getCallLocations()452         public Set<String> getCallLocations() {
453             return callLocations;
454         }
455 
456     }
457 
458     public enum LogType {
459         FIND, MATCH
460     }
461 
462     private static interface IterableTransformer<E, F> {
transform(Iterable<E> input)463         Iterable<F> transform(Iterable<E> input);
464     }
465 
466     private static class StringIterableTransformer implements IterableTransformer<String, String> {
467 
468         @Override
transform(Iterable<String> input)469         public Iterable<String> transform(Iterable<String> input) {
470             List<String> returned = new ArrayList<>(Iterables.size(input));
471             String lastClass = null;
472             for (String current : input) {
473                 String transformed = current;
474                 if (lastClass != null) {
475                     if (lastClass.startsWith("RegexLookup") && !current.startsWith("org.unicode.cldr.util.RegexLookup")) {
476                         returned.add(lastClass);
477                     }
478                     break;
479                 }
480                 if (current.startsWith("org.unicode.cldr.test.CheckCLDR") &&
481                     /*
482                      * TODO: fix this function to avoid referencing lastClass when it is null.
483                      * The condition lastClass == null here prevents compiler warning/error or possible NullPointerException,
484                      * since lastClass is ALWAYS null here; but this is obviously not the best solution.
485                      */
486                     (lastClass == null || !lastClass.startsWith("org.unicode.cldr.test.CheckCLDR"))) {
487                     lastClass = current;
488                     // leave out
489                     continue;
490                 }
491                 // remove org.unicode.cldr
492                 if (current.startsWith("org.unicode.cldr.util.")) {
493                     transformed = current.substring("org.unicode.cldr.util.".length());
494                 }
495                 // only the last RegexLookup will be added
496                 if (!transformed.startsWith("RegexLookup")) {
497                     returned.add(transformed);
498                 }
499                 lastClass = transformed;
500             }
501             return returned;
502         }
503     }
504 
505     private static class ClassnameOnlyStringTransformer implements IterableTransformer<String, String> {
506 
507         @Override
transform(Iterable<String> input)508         public Iterable<String> transform(Iterable<String> input) {
509             List<String> returned = new ArrayList<>(Iterables.size(input));
510             String lastClass = null;
511             for (String current : input) {
512                 if (current.lastIndexOf(".") > 0) {
513                     current = current.substring(current.lastIndexOf("."));
514                 }
515                 if (lastClass != null) {
516                     if (lastClass.startsWith("RegexLookup") && !current.startsWith("RegexLookup")) {
517                         returned.add(lastClass);
518                     }
519                     if (lastClass.startsWith("VettingViewer")) {
520                         break;
521                     }
522                     if (current.startsWith("CheckCLDR") && !lastClass.startsWith("CheckCLDR")) {
523                         lastClass = current;
524                         // leave out
525                         continue;
526                     }
527                 }
528                 // only the last RegexLookup will be added
529                 if (!current.startsWith("RegexLookup")) {
530                     returned.add(current);
531                 }
532                 lastClass = current;
533             }
534             return returned;
535         }
536     }
537 
538     /**
539      * This is the class doing the bulk of the work.
540      * @author ribnitz
541      */
542     private static class RegexLoggerImpl extends AbstractRegexLogger {
543 
544         /*
545          * Each has more than 1m hits, together they account for about 14m (of the 26m total)
546          */
547         private static final Set<String> exactMatchSet = new HashSet<>(Arrays.asList(new String[] {
548             "^//ldml.*",
549             "^//ldml/dates.*",
550             "^//ldml/units.*",
551             "^//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]",
552             "^//ldml/characters.*",
553             "^//ldml/listPatterns/listPattern.*",
554             "^//ldml/units/unitLength[@type=\"(long|short|narrow)\"].*",
555         }));
556         private static final Set<String> patternSet = new HashSet<>(Arrays.asList(new String[] {
557             "^//ldml/dates/fields",
558             "^//ldml/dates/calendars/calendar",
559             "/(availableFormats",
560         }));
561         private final Multiset<PatternStringWithBoolean> matchedFindSet = TreeMultiset.create();
562         private final Multiset<PatternStringWithBoolean> failedFindSet = TreeMultiset.create();
563         private final Multiset<PatternStringWithBoolean> matchedMatchSet = TreeMultiset.create();
564         private final Multiset<PatternStringWithBoolean> failedMatchSet = TreeMultiset.create();
565 
566         private final Multimap<PatternStringWithBoolean, String> occurrences = TreeMultimap.create();
567         private final IterableTransformer<String, String> transformer = new StringIterableTransformer();
568 
569         @Override
log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls)570         public void log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls) {
571             boolean isRegexFinder = findClassName("org.unicode.cldr.util.RegexLookup", 10);
572             PatternStringWithBoolean key = new PatternStringWithBoolean(pattern, isRegexFinder);
573             Collection<PatternStringWithBoolean> collectionToAdd = determineCollectionToUse(matched, type);
574             if (collectionToAdd != null) {
575                 collectionToAdd.add(key);
576             }
577             if (shouldLogPattern(pattern, isRegexFinder)) {
578                 addElementToList(key);
579             }
580         }
581 
determineCollectionToUse(boolean matched, LogType type)582         private Collection<PatternStringWithBoolean> determineCollectionToUse(boolean matched, LogType type) {
583             Collection<PatternStringWithBoolean> collectionToAdd = null;
584             switch (type) {
585             case FIND:
586                 if (matched) {
587                     collectionToAdd = matchedFindSet;
588                 } else {
589                     collectionToAdd = failedFindSet;
590                 }
591                 break;
592             case MATCH:
593                 if (matched) {
594                     collectionToAdd = matchedMatchSet;
595                 } else {
596                     collectionToAdd = failedMatchSet;
597                 }
598                 break;
599             }
600             return collectionToAdd;
601         }
602 
shouldLogPattern(String pattern, boolean isRegexFinder)603         private boolean shouldLogPattern(String pattern, boolean isRegexFinder) {
604             if (!isRegexFinder) {
605                 return true;
606             } else {
607                 if (exactMatchSet.contains(pattern)) {
608                     return true;
609                 } else {
610                     for (String cur : patternSet) {
611                         if (pattern.startsWith(cur)) {
612                             return true;
613                         }
614                     }
615                 }
616             }
617             return false;
618         }
619 
findClassName(String className, int depth)620         private boolean findClassName(String className, int depth) {
621             StackTraceElement[] st = Thread.currentThread().getStackTrace();
622             int startPos = (st.length > 2) ? 2 : 0;
623             int endPos = (startPos + depth > st.length) ? st.length : startPos + depth;
624             for (int i = startPos; i < endPos; i++) {
625                 StackTraceElement cur = st[i];
626                 String curClass = cur.getClassName();
627                 if (curClass.startsWith(className)) {
628                     return true;
629                 }
630             }
631             return false;
632         }
633 
634         private final static Joiner JOINER = Joiner.on(";");
635 
addElementToList(PatternStringWithBoolean key)636         private void addElementToList(PatternStringWithBoolean key) {
637             List<String> stList = processStackTrace("org.unicode.cldr.util.RegexLookup", 0);
638 
639             if (!stList.isEmpty()) {
640                 occurrences.put(key, JOINER.join(transformer.transform(stList)));
641             }
642         }
643 
processStackTrace(String classNameToStartAt, int depth)644         private List<String> processStackTrace(String classNameToStartAt, int depth) {
645             StackTraceElement[] st = Thread.currentThread().getStackTrace();
646             if (depth == 0) {
647                 depth = st.length;
648             }
649             int startPos;
650             if (depth < 0) {
651                 startPos = depth + st.length;
652                 depth = Math.abs(depth);
653             } else {
654                 startPos = (st.length > 2) ? 2 : 0;
655             }
656             int pos;
657             boolean found = false;
658             for (pos = startPos; pos < st.length; pos++) {
659                 if (st[pos].getClassName().startsWith(classNameToStartAt)) {
660                     found = true;
661                     break;
662                 }
663             }
664             if (!found) {
665                 return Collections.emptyList();
666             }
667             int endPos = (pos + depth > st.length) ? st.length : startPos + depth;
668             List<String> ret = new ArrayList<>(depth + 2);
669             for (int i = pos; i < endPos; i++) {
670                 StackTraceElement cur = st[i];
671                 String curClass = cur.getClassName();
672                 ret.add(curClass + ":" + cur.getLineNumber());
673             }
674             return ret;
675         }
676 
677         @Override
getEntries(final int minCount)678         public NavigableSet<PatternCountInterface> getEntries(final int minCount) {
679             CountSets c = new CountSets(matchedFindSet, failedFindSet, matchedMatchSet, failedMatchSet, occurrences);
680             final AddAllEntryProcessor processor = (minCount == 1) ? new AddAllEntryProcessor(minCount, c) : new EntryProcessor(minCount, c);
681             for (PatternStringWithBoolean item : matchedFindSet) {
682                 processor.process(item, matchedFindSet);
683             }
684             for (PatternStringWithBoolean item : failedFindSet) {
685                 processor.process(item, failedFindSet);
686             }
687             for (PatternStringWithBoolean item : matchedMatchSet) {
688                 processor.process(item, matchedMatchSet);
689             }
690             for (PatternStringWithBoolean item : failedMatchSet) {
691                 processor.process(item, failedMatchSet);
692             }
693             return Sets.unmodifiableNavigableSet(processor.getResult());
694         }
695     }
696 }
697