1 package org.unicode.cldr.util; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.Collections; 7 import java.util.HashSet; 8 import java.util.List; 9 import java.util.NavigableSet; 10 import java.util.Objects; 11 import java.util.Set; 12 import java.util.TreeSet; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 import com.google.common.base.Joiner; 17 import com.google.common.collect.Iterables; 18 import com.google.common.collect.Multimap; 19 import com.google.common.collect.Multiset; 20 import com.google.common.collect.Sets; 21 import com.google.common.collect.TreeMultimap; 22 import com.google.common.collect.TreeMultiset; 23 24 /** 25 * Helper class that allows logging the use of regular expressions. A class that will summarize them will get a 26 * NavigabSet of PatternCountInterface instances. 27 * 28 * @author ribnitz 29 * 30 */ 31 public class RegexLogger { 32 /** 33 * Should debugging be done? - if not, a null implementation will be used 34 */ 35 private static final boolean DEBUG = false; 36 /** 37 * Instance 38 */ 39 private static RegexLoggerInterface instance = null; 40 getInstance()41 public static RegexLoggerInterface getInstance() { 42 if (instance == null) { 43 if (DEBUG) { 44 instance = new RegexLoggerImpl(); 45 } else { 46 instance = new NullRegexLogger(); 47 } 48 } 49 return instance; 50 } 51 52 private static class PatternStringWithBoolean implements Comparable<PatternStringWithBoolean> { 53 private final String pattern; 54 private final boolean calledFromRegexFinder; 55 private final int hashCode; 56 PatternStringWithBoolean(String patternStr, boolean calledFromRegexFinder)57 public PatternStringWithBoolean(String patternStr, boolean calledFromRegexFinder) { 58 this.pattern = patternStr.trim(); 59 this.calledFromRegexFinder = calledFromRegexFinder; 60 hashCode = Objects.hash(this.pattern, this.calledFromRegexFinder); 61 } 62 63 @Override hashCode()64 public int hashCode() { 65 return hashCode; 66 } 67 getPattern()68 public String getPattern() { 69 return pattern; 70 } 71 isCalledFromRegexFinder()72 public boolean isCalledFromRegexFinder() { 73 return calledFromRegexFinder; 74 } 75 76 @Override equals(Object obj)77 public boolean equals(Object obj) { 78 if (this == obj) { 79 return true; 80 } 81 if (obj == null) { 82 return false; 83 } 84 if (getClass() != obj.getClass()) { 85 return false; 86 } 87 PatternStringWithBoolean other = (PatternStringWithBoolean) obj; 88 if (calledFromRegexFinder != other.calledFromRegexFinder) { 89 return false; 90 } 91 if (hashCode != other.hashCode) { 92 return false; 93 } 94 if (other.pattern != null) { 95 return false; 96 } 97 if (!pattern.equals(other.pattern)) { 98 return false; 99 } 100 return true; 101 } 102 103 @Override compareTo(PatternStringWithBoolean o)104 public int compareTo(PatternStringWithBoolean o) { 105 if (o == null) { 106 return 1; 107 } 108 if (this == o) { 109 return 0; 110 } 111 return pattern.compareTo(o.pattern); 112 } 113 } 114 115 /** 116 * Interface used for logging Regular expressions 117 * @author ribnitz 118 * 119 */ 120 public static interface RegexLoggerInterface { 121 /** 122 * Log that the given pattern was applied on the given matchStr, whether it matched, and 123 * what the type of the log was. Cls conains the calling class. 124 * @param pattern 125 * @param matchStr 126 * @param matched 127 * @param type 128 * @param cls 129 */ log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls)130 void log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls); 131 log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls)132 void log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls); 133 log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls)134 void log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls); 135 log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls)136 void log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls); 137 138 /** 139 * Get all the entries that matched 140 * @return 141 */ getEntries()142 NavigableSet<PatternCountInterface> getEntries(); 143 144 /** 145 * Get the entries that occurred at least minCount times. If there are no matches, an empty set is returned 146 * @param minCount 147 * @return 148 */ getEntries(final int minCount)149 NavigableSet<PatternCountInterface> getEntries(final int minCount); 150 isEnabled()151 boolean isEnabled(); 152 } 153 154 /** 155 * Three of the methods can be delegations, which reduces the actual implementation to two methods 156 * @author ribnitz 157 * 158 */ 159 private static abstract class AbstractRegexLogger implements RegexLoggerInterface { 160 161 @Override log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls)162 public void log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls) { 163 log(matcher.pattern(), matchStr, matched, type, cls); 164 165 } 166 167 @Override log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls)168 public void log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls) { 169 log(pattern.pattern(), matchStr, matched, type, cls); 170 } 171 172 @Override log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls)173 public void log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls) { 174 log(pattern, matchStr, matched, 0, type, cls); 175 } 176 177 /** 178 * Get all entries 179 */ 180 @Override getEntries()181 public NavigableSet<PatternCountInterface> getEntries() { 182 return getEntries(1); 183 } 184 185 @Override isEnabled()186 public boolean isEnabled() { 187 return DEBUG; 188 } 189 190 } 191 192 /** 193 * Null implementation 194 * @author ribnitz 195 * 196 */ 197 private static class NullRegexLogger extends AbstractRegexLogger { 198 199 @Override log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls)200 public void log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls) { 201 // do nothing 202 } 203 204 @Override getEntries(int minCount)205 public NavigableSet<PatternCountInterface> getEntries(int minCount) { 206 NavigableSet<PatternCountInterface> returned = Sets.newTreeSet(Collections.EMPTY_SET); 207 return returned; 208 } 209 } 210 211 /** 212 * Inetface used for the entries returnred by the RegexLogger 213 * @author ribnitz 214 * 215 */ 216 public static interface PatternCountInterface { 217 /** 218 * Get the pattern used 219 * @return 220 */ getPattern()221 String getPattern(); 222 223 /** 224 * Get the number of successful matches obtained through FIND 225 * @return 226 */ getNumberOfFindMatches()227 int getNumberOfFindMatches(); 228 229 /** 230 * Get the number of unsuccessful matches obtained through FIND 231 * @return 232 */ getNumberOfFindFailures()233 int getNumberOfFindFailures(); 234 235 /** 236 * Get the number of successful matches obtained through MATCH 237 * @return 238 */ getNumberOfMatchMatches()239 int getNumberOfMatchMatches(); 240 241 /** 242 * Get the number of unsuccessful matches obtained through FIND 243 * @return 244 */ getNumberOfMatchFailures()245 int getNumberOfMatchFailures(); 246 247 /** 248 * Return true if this call was made from RegexFinder 249 * @return 250 */ isCalledFromRegexFinder()251 boolean isCalledFromRegexFinder(); 252 253 /** 254 * Get a set of all call locations 255 * @return 256 */ getCallLocations()257 Set<String> getCallLocations(); 258 259 } 260 261 /** 262 * GetAll uses this class to add all the entries of a multiSet to the result set, constructing 263 * the object to return for each pattern. Objects will only be added once. 264 * 265 * This is the implementatioon that adds all items. 266 * @author ribnitz 267 * 268 */ 269 private static class AddAllEntryProcessor { 270 protected final int minCount; 271 protected final CountSets c; 272 protected final Set<PatternStringWithBoolean> seen = new HashSet<>(); 273 protected final NavigableSet<PatternCountInterface> result = new TreeSet<>(); 274 AddAllEntryProcessor(int minCount, CountSets c)275 public AddAllEntryProcessor(int minCount, CountSets c) { 276 this.minCount = minCount; 277 this.c = c; 278 } 279 getResult()280 public NavigableSet<PatternCountInterface> getResult() { 281 return result; 282 } 283 process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet)284 public void process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet) { 285 if (!seen.contains(item)) { 286 result.add(new RegexKeyWithCount(item, c)); 287 seen.add(item); 288 } 289 } 290 } 291 292 /** 293 * Sometimes getEntries is called with a minCount; this Class filters and only adds the 294 * items that occur at least minCount times. 295 * @author ribnitz 296 * 297 */ 298 private static class EntryProcessor extends AddAllEntryProcessor { EntryProcessor(int minCount, CountSets c)299 public EntryProcessor(int minCount, CountSets c) { 300 super(minCount, c); 301 } 302 303 @Override process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet)304 public void process(PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet) { 305 if (countSet.count(item) >= minCount) { 306 super.process(item, countSet); 307 } 308 } 309 } 310 311 /** 312 * Since all the inner classes are static, this object is used to pass around the refernces to the 313 * different sets/the state 314 * 315 * @author ribnitz 316 * 317 */ 318 private static class CountSets { 319 final Multiset<PatternStringWithBoolean> matchedFindSet; 320 final Multiset<PatternStringWithBoolean> failedFindSet; 321 final Multiset<PatternStringWithBoolean> matchedMatchSet; 322 final Multiset<PatternStringWithBoolean> failedMatchSet; 323 final Multimap<PatternStringWithBoolean, String> stacktraces; 324 CountSets(Multiset<PatternStringWithBoolean> matchedFindSet, Multiset<PatternStringWithBoolean> failedFindSet, Multiset<PatternStringWithBoolean> matchedMatchSet, Multiset<PatternStringWithBoolean> failedMatchSet, Multimap<PatternStringWithBoolean, String> occurrences)325 public CountSets(Multiset<PatternStringWithBoolean> matchedFindSet, Multiset<PatternStringWithBoolean> failedFindSet, 326 Multiset<PatternStringWithBoolean> matchedMatchSet, Multiset<PatternStringWithBoolean> failedMatchSet, 327 Multimap<PatternStringWithBoolean, String> occurrences) { 328 this.failedFindSet = failedFindSet; 329 this.failedMatchSet = failedMatchSet; 330 this.matchedMatchSet = matchedMatchSet; 331 this.stacktraces = occurrences; 332 this.matchedFindSet = matchedFindSet; 333 } 334 } 335 336 private static class RegexKeyWithCount implements PatternCountInterface, Comparable<PatternCountInterface> { 337 private final String pattern; 338 private final int findMatchCount; 339 private final int findFailCount; 340 private final int matchMatchCount; 341 private final int matchFailCount; 342 private final boolean calledFromRegexFinder; 343 private final Set<String> callLocations = new HashSet<>(); 344 private final int hashCode; 345 RegexKeyWithCount(PatternStringWithBoolean key, CountSets bean)346 public RegexKeyWithCount(PatternStringWithBoolean key, CountSets bean) { 347 this.pattern = key.getPattern(); 348 this.calledFromRegexFinder = key.isCalledFromRegexFinder(); 349 this.findMatchCount = bean.matchedFindSet.count(key); 350 this.findFailCount = bean.failedFindSet.count(key); 351 this.matchMatchCount = bean.matchedMatchSet.count(key); 352 this.matchFailCount = bean.failedMatchSet.count(key); 353 Collection<String> tmp = bean.stacktraces.get(key); 354 for (String cur : tmp) { 355 if (!callLocations.contains(cur)) { 356 callLocations.add(cur); 357 } 358 } 359 this.hashCode = Objects.hash(this.pattern, 360 this.findMatchCount, 361 this.findFailCount, 362 this.matchFailCount, 363 this.matchMatchCount, 364 this.calledFromRegexFinder, 365 this.callLocations); 366 } 367 368 @Override getPattern()369 public String getPattern() { 370 return pattern; 371 } 372 373 @Override hashCode()374 public int hashCode() { 375 return hashCode; 376 } 377 378 @Override getNumberOfFindMatches()379 public int getNumberOfFindMatches() { 380 return findMatchCount; 381 } 382 383 @Override getNumberOfFindFailures()384 public int getNumberOfFindFailures() { 385 return findFailCount; 386 } 387 388 @Override getNumberOfMatchMatches()389 public int getNumberOfMatchMatches() { 390 return matchMatchCount; 391 } 392 393 @Override getNumberOfMatchFailures()394 public int getNumberOfMatchFailures() { 395 return matchFailCount; 396 } 397 398 @Override equals(Object obj)399 public boolean equals(Object obj) { 400 if (this == obj) { 401 return true; 402 } 403 if (obj == null) { 404 return false; 405 } 406 if (hashCode != obj.hashCode()) { 407 return false; 408 } 409 if (getClass() != obj.getClass()) { 410 return false; 411 } 412 RegexKeyWithCount other = (RegexKeyWithCount) obj; 413 if (matchFailCount != other.matchFailCount) { 414 return false; 415 } 416 if (matchMatchCount != other.matchMatchCount) { 417 return false; 418 } 419 if (findFailCount != other.findFailCount) { 420 return false; 421 } 422 if (findMatchCount != other.findMatchCount) { 423 return false; 424 } 425 if (!pattern.equals(other.pattern)) { 426 return false; 427 } 428 if (calledFromRegexFinder != other.calledFromRegexFinder) { 429 return false; 430 } 431 if (callLocations != other.callLocations) { 432 return false; 433 } 434 return true; 435 } 436 437 @Override compareTo(PatternCountInterface o)438 public int compareTo(PatternCountInterface o) { 439 if (o == null) { 440 return 1; 441 } 442 return new Integer(matchFailCount + matchMatchCount + findFailCount + findMatchCount).compareTo( 443 o.getNumberOfFindFailures() + o.getNumberOfFindMatches() + o.getNumberOfMatchFailures() + o.getNumberOfMatchMatches()); 444 } 445 446 @Override isCalledFromRegexFinder()447 public boolean isCalledFromRegexFinder() { 448 return calledFromRegexFinder; 449 } 450 451 @Override getCallLocations()452 public Set<String> getCallLocations() { 453 return callLocations; 454 } 455 456 } 457 458 public enum LogType { 459 FIND, MATCH 460 } 461 462 private static interface IterableTransformer<E, F> { transform(Iterable<E> input)463 Iterable<F> transform(Iterable<E> input); 464 } 465 466 private static class StringIterableTransformer implements IterableTransformer<String, String> { 467 468 @Override transform(Iterable<String> input)469 public Iterable<String> transform(Iterable<String> input) { 470 List<String> returned = new ArrayList<>(Iterables.size(input)); 471 String lastClass = null; 472 for (String current : input) { 473 String transformed = current; 474 if (lastClass != null) { 475 if (lastClass.startsWith("RegexLookup") && !current.startsWith("org.unicode.cldr.util.RegexLookup")) { 476 returned.add(lastClass); 477 } 478 break; 479 } 480 if (current.startsWith("org.unicode.cldr.test.CheckCLDR") && 481 /* 482 * TODO: fix this function to avoid referencing lastClass when it is null. 483 * The condition lastClass == null here prevents compiler warning/error or possible NullPointerException, 484 * since lastClass is ALWAYS null here; but this is obviously not the best solution. 485 */ 486 (lastClass == null || !lastClass.startsWith("org.unicode.cldr.test.CheckCLDR"))) { 487 lastClass = current; 488 // leave out 489 continue; 490 } 491 // remove org.unicode.cldr 492 if (current.startsWith("org.unicode.cldr.util.")) { 493 transformed = current.substring("org.unicode.cldr.util.".length()); 494 } 495 // only the last RegexLookup will be added 496 if (!transformed.startsWith("RegexLookup")) { 497 returned.add(transformed); 498 } 499 lastClass = transformed; 500 } 501 return returned; 502 } 503 } 504 505 private static class ClassnameOnlyStringTransformer implements IterableTransformer<String, String> { 506 507 @Override transform(Iterable<String> input)508 public Iterable<String> transform(Iterable<String> input) { 509 List<String> returned = new ArrayList<>(Iterables.size(input)); 510 String lastClass = null; 511 for (String current : input) { 512 if (current.lastIndexOf(".") > 0) { 513 current = current.substring(current.lastIndexOf(".")); 514 } 515 if (lastClass != null) { 516 if (lastClass.startsWith("RegexLookup") && !current.startsWith("RegexLookup")) { 517 returned.add(lastClass); 518 } 519 if (lastClass.startsWith("VettingViewer")) { 520 break; 521 } 522 if (current.startsWith("CheckCLDR") && !lastClass.startsWith("CheckCLDR")) { 523 lastClass = current; 524 // leave out 525 continue; 526 } 527 } 528 // only the last RegexLookup will be added 529 if (!current.startsWith("RegexLookup")) { 530 returned.add(current); 531 } 532 lastClass = current; 533 } 534 return returned; 535 } 536 } 537 538 /** 539 * This is the class doing the bulk of the work. 540 * @author ribnitz 541 */ 542 private static class RegexLoggerImpl extends AbstractRegexLogger { 543 544 /* 545 * Each has more than 1m hits, together they account for about 14m (of the 26m total) 546 */ 547 private static final Set<String> exactMatchSet = new HashSet<>(Arrays.asList(new String[] { 548 "^//ldml.*", 549 "^//ldml/dates.*", 550 "^//ldml/units.*", 551 "^//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", 552 "^//ldml/characters.*", 553 "^//ldml/listPatterns/listPattern.*", 554 "^//ldml/units/unitLength[@type=\"(long|short|narrow)\"].*", 555 })); 556 private static final Set<String> patternSet = new HashSet<>(Arrays.asList(new String[] { 557 "^//ldml/dates/fields", 558 "^//ldml/dates/calendars/calendar", 559 "/(availableFormats", 560 })); 561 private final Multiset<PatternStringWithBoolean> matchedFindSet = TreeMultiset.create(); 562 private final Multiset<PatternStringWithBoolean> failedFindSet = TreeMultiset.create(); 563 private final Multiset<PatternStringWithBoolean> matchedMatchSet = TreeMultiset.create(); 564 private final Multiset<PatternStringWithBoolean> failedMatchSet = TreeMultiset.create(); 565 566 private final Multimap<PatternStringWithBoolean, String> occurrences = TreeMultimap.create(); 567 private final IterableTransformer<String, String> transformer = new StringIterableTransformer(); 568 569 @Override log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls)570 public void log(String pattern, String matchStr, boolean matched, double time, LogType type, Class<?> cls) { 571 boolean isRegexFinder = findClassName("org.unicode.cldr.util.RegexLookup", 10); 572 PatternStringWithBoolean key = new PatternStringWithBoolean(pattern, isRegexFinder); 573 Collection<PatternStringWithBoolean> collectionToAdd = determineCollectionToUse(matched, type); 574 if (collectionToAdd != null) { 575 collectionToAdd.add(key); 576 } 577 if (shouldLogPattern(pattern, isRegexFinder)) { 578 addElementToList(key); 579 } 580 } 581 determineCollectionToUse(boolean matched, LogType type)582 private Collection<PatternStringWithBoolean> determineCollectionToUse(boolean matched, LogType type) { 583 Collection<PatternStringWithBoolean> collectionToAdd = null; 584 switch (type) { 585 case FIND: 586 if (matched) { 587 collectionToAdd = matchedFindSet; 588 } else { 589 collectionToAdd = failedFindSet; 590 } 591 break; 592 case MATCH: 593 if (matched) { 594 collectionToAdd = matchedMatchSet; 595 } else { 596 collectionToAdd = failedMatchSet; 597 } 598 break; 599 } 600 return collectionToAdd; 601 } 602 shouldLogPattern(String pattern, boolean isRegexFinder)603 private boolean shouldLogPattern(String pattern, boolean isRegexFinder) { 604 if (!isRegexFinder) { 605 return true; 606 } else { 607 if (exactMatchSet.contains(pattern)) { 608 return true; 609 } else { 610 for (String cur : patternSet) { 611 if (pattern.startsWith(cur)) { 612 return true; 613 } 614 } 615 } 616 } 617 return false; 618 } 619 findClassName(String className, int depth)620 private boolean findClassName(String className, int depth) { 621 StackTraceElement[] st = Thread.currentThread().getStackTrace(); 622 int startPos = (st.length > 2) ? 2 : 0; 623 int endPos = (startPos + depth > st.length) ? st.length : startPos + depth; 624 for (int i = startPos; i < endPos; i++) { 625 StackTraceElement cur = st[i]; 626 String curClass = cur.getClassName(); 627 if (curClass.startsWith(className)) { 628 return true; 629 } 630 } 631 return false; 632 } 633 634 private final static Joiner JOINER = Joiner.on(";"); 635 addElementToList(PatternStringWithBoolean key)636 private void addElementToList(PatternStringWithBoolean key) { 637 List<String> stList = processStackTrace("org.unicode.cldr.util.RegexLookup", 0); 638 639 if (!stList.isEmpty()) { 640 occurrences.put(key, JOINER.join(transformer.transform(stList))); 641 } 642 } 643 processStackTrace(String classNameToStartAt, int depth)644 private List<String> processStackTrace(String classNameToStartAt, int depth) { 645 StackTraceElement[] st = Thread.currentThread().getStackTrace(); 646 if (depth == 0) { 647 depth = st.length; 648 } 649 int startPos; 650 if (depth < 0) { 651 startPos = depth + st.length; 652 depth = Math.abs(depth); 653 } else { 654 startPos = (st.length > 2) ? 2 : 0; 655 } 656 int pos; 657 boolean found = false; 658 for (pos = startPos; pos < st.length; pos++) { 659 if (st[pos].getClassName().startsWith(classNameToStartAt)) { 660 found = true; 661 break; 662 } 663 } 664 if (!found) { 665 return Collections.emptyList(); 666 } 667 int endPos = (pos + depth > st.length) ? st.length : startPos + depth; 668 List<String> ret = new ArrayList<>(depth + 2); 669 for (int i = pos; i < endPos; i++) { 670 StackTraceElement cur = st[i]; 671 String curClass = cur.getClassName(); 672 ret.add(curClass + ":" + cur.getLineNumber()); 673 } 674 return ret; 675 } 676 677 @Override getEntries(final int minCount)678 public NavigableSet<PatternCountInterface> getEntries(final int minCount) { 679 CountSets c = new CountSets(matchedFindSet, failedFindSet, matchedMatchSet, failedMatchSet, occurrences); 680 final AddAllEntryProcessor processor = (minCount == 1) ? new AddAllEntryProcessor(minCount, c) : new EntryProcessor(minCount, c); 681 for (PatternStringWithBoolean item : matchedFindSet) { 682 processor.process(item, matchedFindSet); 683 } 684 for (PatternStringWithBoolean item : failedFindSet) { 685 processor.process(item, failedFindSet); 686 } 687 for (PatternStringWithBoolean item : matchedMatchSet) { 688 processor.process(item, matchedMatchSet); 689 } 690 for (PatternStringWithBoolean item : failedMatchSet) { 691 processor.process(item, failedMatchSet); 692 } 693 return Sets.unmodifiableNavigableSet(processor.getResult()); 694 } 695 } 696 } 697