1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2016, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util.props; 8 9 import java.io.PrintWriter; 10 import java.io.StringWriter; 11 import java.text.MessageFormat; 12 import java.util.Collection; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Locale; 16 import java.util.Map; 17 18 import org.unicode.cldr.draft.FileUtilities; 19 import org.unicode.cldr.util.Tabber; 20 import org.unicode.cldr.util.Visitor; 21 22 import com.ibm.icu.impl.Utility; 23 import com.ibm.icu.text.NumberFormat; 24 import com.ibm.icu.text.Transliterator; 25 import com.ibm.icu.text.UTF16; 26 import com.ibm.icu.text.UnicodeSet; 27 28 public class BagFormatter { 29 static final boolean DEBUG = false; 30 public static final boolean SHOW_FILES; 31 static { 32 boolean showFiles = false; 33 try { 34 showFiles = System.getProperty("SHOW_FILES") != null; 35 } 36 catch (SecurityException e) { 37 } 38 SHOW_FILES = showFiles; 39 } 40 41 public static final PrintWriter CONSOLE = new PrintWriter(System.out,true); 42 43 private static PrintWriter log = CONSOLE; 44 45 private boolean abbreviated = false; 46 private String separator = ","; 47 private String prefix = "["; 48 private String suffix = "]"; 49 private UnicodeProperty.Factory source; 50 private UnicodeLabel nameSource; 51 private UnicodeLabel labelSource; 52 private UnicodeLabel rangeBreakSource; 53 private UnicodeLabel valueSource; 54 private String propName = ""; 55 private boolean showCount = true; 56 //private boolean suppressReserved = true; 57 private boolean hexValue = false; 58 private static final String NULL_VALUE = "_NULL_VALUE_"; 59 private int fullTotal = -1; 60 private boolean showTotal = true; 61 private String lineSeparator = System.lineSeparator(); 62 private Tabber tabber = new Tabber.MonoTabber(); 63 64 /** 65 * Compare two UnicodeSets, and show the differences 66 * @param name1 name of first set to be compared 67 * @param set1 first set 68 * @param name2 name of second set to be compared 69 * @param set2 second set 70 * @return formatted string 71 */ showSetDifferences( String name1, UnicodeSet set1, String name2, UnicodeSet set2)72 public String showSetDifferences( 73 String name1, 74 UnicodeSet set1, 75 String name2, 76 UnicodeSet set2) { 77 78 StringWriter result = new StringWriter(); 79 showSetDifferences(new PrintWriter(result),name1,set1,name2,set2); 80 result.flush(); 81 return result.getBuffer().toString(); 82 } 83 showSetDifferences( String name1, Collection set1, String name2, Collection set2)84 public String showSetDifferences( 85 String name1, 86 Collection set1, 87 String name2, 88 Collection set2) { 89 90 StringWriter result = new StringWriter(); 91 showSetDifferences(new PrintWriter(result), name1, set1, name2, set2); 92 result.flush(); 93 return result.getBuffer().toString(); 94 } 95 showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2)96 public void showSetDifferences( 97 PrintWriter pw, 98 String name1, 99 UnicodeSet set1, 100 String name2, 101 UnicodeSet set2) { 102 showSetDifferences(pw, name1, set1, name2, set2, -1); 103 } 104 /** 105 * Compare two UnicodeSets, and show the differences 106 * @param name1 name of first set to be compared 107 * @param set1 first set 108 * @param name2 name of second set to be compared 109 * @param set2 second set 110 */ showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2, int flags)111 public void showSetDifferences( 112 PrintWriter pw, 113 String name1, 114 UnicodeSet set1, 115 String name2, 116 UnicodeSet set2, 117 int flags) 118 { 119 if (pw == null) pw = FileUtilities.CONSOLE; 120 String[] names = { name1, name2 }; 121 122 UnicodeSet temp; 123 124 if ((flags&1) != 0) { 125 temp = new UnicodeSet(set1).removeAll(set2); 126 pw.print(lineSeparator); 127 pw.print(inOut.format(names)); 128 pw.print(lineSeparator); 129 showSetNames(pw, temp); 130 } 131 132 if ((flags&2) != 0) { 133 temp = new UnicodeSet(set2).removeAll(set1); 134 pw.print(lineSeparator); 135 pw.print(outIn.format(names)); 136 pw.print(lineSeparator); 137 showSetNames(pw, temp); 138 } 139 140 if ((flags&4) != 0) { 141 temp = new UnicodeSet(set2).retainAll(set1); 142 pw.print(lineSeparator); 143 pw.print(inIn.format(names)); 144 pw.print(lineSeparator); 145 showSetNames(pw, temp); 146 } 147 pw.flush(); 148 } 149 showSetDifferences( PrintWriter pw, String name1, Collection set1, String name2, Collection set2)150 public void showSetDifferences( 151 PrintWriter pw, 152 String name1, 153 Collection set1, 154 String name2, 155 Collection set2) { 156 157 if (pw == null) pw = FileUtilities.CONSOLE; 158 String[] names = { name1, name2 }; 159 // damn'd collection doesn't have a clone, so 160 // we go with Set, even though that 161 // may not preserve order and duplicates 162 Collection temp = new HashSet(set1); 163 temp.removeAll(set2); 164 pw.println(); 165 pw.println(inOut.format(names)); 166 showSetNames(pw, temp); 167 168 temp.clear(); 169 temp.addAll(set2); 170 temp.removeAll(set1); 171 pw.println(); 172 pw.println(outIn.format(names)); 173 showSetNames(pw, temp); 174 175 temp.clear(); 176 temp.addAll(set1); 177 temp.retainAll(set2); 178 pw.println(); 179 pw.println(inIn.format(names)); 180 showSetNames(pw, temp); 181 } 182 183 /** 184 * Returns a list of items in the collection, with each separated by the separator. 185 * Each item must not be null; its toString() is called for a printable representation 186 * @param c source collection 187 * @return a String representation of the list 188 */ showSetNames(Object c)189 public String showSetNames(Object c) { 190 StringWriter buffer = new StringWriter(); 191 PrintWriter output = new PrintWriter(buffer); 192 showSetNames(output,c); 193 return buffer.toString(); 194 } 195 196 /** 197 * Returns a list of items in the collection, with each separated by the separator. 198 * Each item must not be null; its toString() is called for a printable representation 199 * @param output destination to which to write names 200 * @param c source collection 201 */ showSetNames(PrintWriter output, Object c)202 public void showSetNames(PrintWriter output, Object c) { 203 mainVisitor.doAt(c, output); 204 output.flush(); 205 } 206 getAbbreviatedName( String src, String pattern, String substitute)207 public String getAbbreviatedName( 208 String src, 209 String pattern, 210 String substitute) { 211 212 int matchEnd = NameIterator.findMatchingEnd(src, pattern); 213 int sdiv = src.length() - matchEnd; 214 int pdiv = pattern.length() - matchEnd; 215 StringBuffer result = new StringBuffer(); 216 addMatching( 217 src.substring(0, sdiv), 218 pattern.substring(0, pdiv), 219 substitute, 220 result); 221 addMatching( 222 src.substring(sdiv), 223 pattern.substring(pdiv), 224 substitute, 225 result); 226 return result.toString(); 227 } 228 229 abstract public static class Relation { getRelation(String a, String b)230 abstract public String getRelation(String a, String b); 231 } 232 233 static class NullRelation extends Relation { 234 @Override getRelation(String a, String b)235 public String getRelation(String a, String b) { return ""; } 236 } 237 238 private Relation r = new NullRelation(); 239 setRelation(Relation r)240 public BagFormatter setRelation(Relation r) { 241 this.r = r; 242 return this; // for chaining 243 } 244 getRelation()245 public Relation getRelation() { 246 return r; 247 } 248 249 /* 250 r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s) 251 */ 252 /* 253 static final UnicodeSet NO_NAME = 254 new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]"); 255 static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement(); 256 static final UnicodeSet NAME_CHARACTERS = 257 new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]"); 258 259 public UnicodeSet getSetForName(String namePattern) { 260 UnicodeSet result = new UnicodeSet(); 261 Matcher m = Pattern.compile(namePattern).matcher(""); 262 // check for no-name items, and add in bulk 263 m.reset("<no name>"); 264 if (m.matches()) { 265 result.addAll(NO_NAME); 266 } 267 // check all others 268 UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME); 269 while (usi.next()) { 270 String name = getName(usi.codepoint); 271 if (name == null) 272 continue; 273 m.reset(name); 274 if (m.matches()) { 275 result.add(usi.codepoint); 276 } 277 } 278 // Note: if Regex had some API so that if we could tell that 279 // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-" 280 // then we could optimize by skipping whole swathes of characters 281 return result; 282 } 283 */ 284 setMergeRanges(boolean in)285 public BagFormatter setMergeRanges(boolean in) { 286 mergeRanges = in; 287 return this; 288 } setShowSetAlso(boolean b)289 public BagFormatter setShowSetAlso(boolean b) { 290 showSetAlso = b; 291 return this; 292 } 293 getName(int codePoint)294 public String getName(int codePoint) { 295 return getName("", codePoint, codePoint); 296 } 297 getName(String sep, int start, int end)298 public String getName(String sep, int start, int end) { 299 if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return ""; 300 String result = getName(start, false); 301 if (start == end) return sep + result; 302 String endString = getName(end, false); 303 if (result.length() == 0 && endString.length() == 0) return sep; 304 if (abbreviated) endString = getAbbreviatedName(endString,result,"~"); 305 return sep + result + ".." + endString; 306 } 307 getName(String s)308 public String getName(String s) { 309 return getName(s, false); 310 } 311 312 public static class NameLabel extends UnicodeLabel { 313 UnicodeProperty nameProp; 314 UnicodeSet control; 315 UnicodeSet private_use; 316 UnicodeSet noncharacter; 317 UnicodeSet surrogate; 318 NameLabel(UnicodeProperty.Factory source)319 public NameLabel(UnicodeProperty.Factory source) { 320 nameProp = source.getProperty("Name"); 321 control = source.getSet("gc=Cc"); 322 private_use = source.getSet("gc=Co"); 323 surrogate = source.getSet("gc=Cs"); 324 noncharacter = source.getSet("noncharactercodepoint=yes"); 325 } 326 327 @Override getValue(int codePoint, boolean isShort)328 public String getValue(int codePoint, boolean isShort) { 329 String hcp = !isShort 330 ? "U+" + Utility.hex(codePoint, 4) + " " 331 : ""; 332 String result = nameProp.getValue(codePoint); 333 if (result != null) 334 return hcp + result; 335 if (control.contains(codePoint)) { 336 return "<control-" + Utility.hex(codePoint, 4) + ">"; 337 } 338 if (private_use.contains(codePoint)) { 339 return "<private-use-" + Utility.hex(codePoint, 4) + ">"; 340 } 341 if (surrogate.contains(codePoint)) { 342 return "<surrogate-" + Utility.hex(codePoint, 4) + ">"; 343 } 344 if (noncharacter.contains(codePoint)) { 345 return "<noncharacter-" + Utility.hex(codePoint, 4) + ">"; 346 } 347 //if (suppressReserved) return ""; 348 return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">"; 349 } 350 351 } 352 353 // refactored getName(int codePoint, boolean withCodePoint)354 public String getName(int codePoint, boolean withCodePoint) { 355 String result = getNameSource().getValue(codePoint, !withCodePoint); 356 return fixName == null ? result : fixName.transliterate(result); 357 } 358 getName(String s, boolean withCodePoint)359 public String getName(String s, boolean withCodePoint) { 360 String result = getNameSource().getValue(s, separator, !withCodePoint); 361 return fixName == null ? result : fixName.transliterate(result); 362 } 363 hex(String s)364 public String hex(String s) { 365 return hex(s,separator); 366 } 367 hex(String s, String sep)368 public String hex(String s, String sep) { 369 return UnicodeLabel.HEX.getValue(s, sep, true); 370 } 371 hex(int start, int end)372 public String hex(int start, int end) { 373 String s = Utility.hex(start,4); 374 if (start == end) return s; 375 return s + ".." + Utility.hex(end,4); 376 } 377 setUnicodePropertyFactory(UnicodeProperty.Factory source)378 public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) { 379 this.source = source; 380 return this; 381 } 382 getUnicodePropertyFactory()383 private UnicodeProperty.Factory getUnicodePropertyFactory() { 384 if (source == null) source = ICUPropertyFactory.make(); 385 return source; 386 } 387 BagFormatter()388 public BagFormatter () { 389 } 390 BagFormatter(UnicodeProperty.Factory source)391 public BagFormatter (UnicodeProperty.Factory source) { 392 setUnicodePropertyFactory(source); 393 } 394 join(Object o)395 public String join(Object o) { 396 return labelVisitor.join(o); 397 } 398 399 // ===== PRIVATES ===== 400 401 private Join labelVisitor = new Join(); 402 403 private boolean mergeRanges = true; 404 private Transliterator showLiteral = null; 405 private Transliterator fixName = null; 406 private boolean showSetAlso = false; 407 408 private RangeFinder rf = new RangeFinder(); 409 410 private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:"); 411 private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:"); 412 private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:"); 413 414 private MyVisitor mainVisitor = new MyVisitor(); 415 416 /* 417 private String getLabels(int start, int end) { 418 Set names = new TreeSet(); 419 for (int cp = start; cp <= end; ++cp) { 420 names.add(getLabel(cp)); 421 } 422 return labelVisitor.join(names); 423 } 424 */ 425 addMatching( String src, String pattern, String substitute, StringBuffer result)426 private void addMatching( 427 String src, 428 String pattern, 429 String substitute, 430 StringBuffer result) { 431 NameIterator n1 = new NameIterator(src); 432 NameIterator n2 = new NameIterator(pattern); 433 boolean first = true; 434 while (true) { 435 String s1 = n1.next(); 436 if (s1 == null) 437 break; 438 String s2 = n2.next(); 439 if (!first) 440 result.append(" "); 441 first = false; 442 if (s1.equals(s2)) 443 result.append(substitute); 444 else 445 result.append(s1); 446 } 447 } 448 449 private static NumberFormat nf = 450 NumberFormat.getIntegerInstance(Locale.ENGLISH); 451 static { 452 nf.setGroupingUsed(false); 453 } 454 455 private int maxWidthOverride = -1; 456 private int maxLabelWidthOverride = -1; 457 setValueWidthOverride(int maxWidthOverride)458 public BagFormatter setValueWidthOverride(int maxWidthOverride) { 459 this.maxWidthOverride = maxWidthOverride; 460 return this; 461 } 462 getValueWidthOverride()463 public int getValueWidthOverride() { 464 return maxWidthOverride; 465 } 466 setLabelWidthOverride(int maxWidthOverride)467 public BagFormatter setLabelWidthOverride(int maxWidthOverride) { 468 this.maxLabelWidthOverride = maxWidthOverride; 469 return this; 470 } 471 getLabelWidthOverride()472 public int getLabelWidthOverride() { 473 return maxLabelWidthOverride; 474 } 475 476 477 private class MyVisitor extends Visitor { 478 private PrintWriter output; 479 String commentSeparator; 480 int counter; 481 int valueSize; 482 int labelSize; 483 boolean isHtml; 484 boolean inTable = false; 485 toOutput(String s)486 public void toOutput(String s) { 487 if (isHtml) { 488 if (inTable) { 489 output.print("</table>"); 490 inTable = false; 491 } 492 output.print("<p>"); 493 } 494 output.print(s); 495 if (isHtml) 496 output.println("</p>"); 497 else 498 output.print(lineSeparator); 499 } 500 toTable(String s)501 public void toTable(String s) { 502 if (isHtml && !inTable) { 503 output.print("<table>"); 504 inTable = true; 505 } 506 output.print(tabber.process(s) + lineSeparator); 507 } 508 doAt(Object c, PrintWriter out)509 public void doAt(Object c, PrintWriter out) { 510 output = out; 511 isHtml = tabber instanceof Tabber.HTMLTabber; 512 counter = 0; 513 514 tabber.clear(); 515 // old: 516 // 0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D> 517 // new 518 // 0009..000D ; White_Space #Cc [5] <control>..<control> 519 tabber.add(mergeRanges ? 14 : 6,Tabber.LEFT); 520 521 if (propName.length() > 0) { 522 tabber.add(propName.length() + 2,Tabber.LEFT); 523 } 524 525 valueSize = maxWidthOverride > 0 ? maxWidthOverride : getValueSource().getMaxWidth(shortValue); 526 527 if (DEBUG) System.out.println("ValueSize: " + valueSize); 528 if (valueSize > 0) { 529 tabber.add(valueSize + 2,Tabber.LEFT); // value 530 } 531 532 tabber.add(3,Tabber.LEFT); // comment character 533 534 labelSize = maxLabelWidthOverride > 0 ? maxLabelWidthOverride : getLabelSource(true).getMaxWidth(shortLabel); 535 if (labelSize > 0) { 536 tabber.add(labelSize + 1,Tabber.LEFT); // value 537 } 538 539 if (mergeRanges && showCount) { 540 tabber.add(5,Tabber.RIGHT); 541 } 542 543 if (showLiteral != null) { 544 tabber.add(4,Tabber.LEFT); 545 } 546 //myTabber.add(7,Tabber.LEFT); 547 548 commentSeparator = (showCount || showLiteral != null 549 || getLabelSource(true) != UnicodeLabel.NULL 550 || getNameSource() != UnicodeLabel.NULL) 551 ? "\t #" : ""; 552 553 if (DEBUG) System.out.println("Tabber: " + tabber.toString()); 554 if (DEBUG) System.out.println("Tabber: " + tabber.process( 555 "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER")); 556 doAt(c); 557 } 558 559 @SuppressWarnings("unused") format(Object o)560 public String format(Object o) { 561 StringWriter sw = new StringWriter(); 562 PrintWriter pw = new PrintWriter(sw); 563 doAt(o); 564 pw.flush(); 565 String result = sw.getBuffer().toString(); 566 pw.close(); 567 return result; 568 } 569 570 @Override doBefore(Object container, Object o)571 protected void doBefore(Object container, Object o) { 572 if (showSetAlso && container instanceof UnicodeSet) { 573 toOutput("#" + container); 574 } 575 } 576 577 @Override doBetween(Object container, Object lastItem, Object nextItem)578 protected void doBetween(Object container, Object lastItem, Object nextItem) { 579 } 580 581 @Override doAfter(Object container, Object o)582 protected void doAfter(Object container, Object o) { 583 if (fullTotal != -1 && fullTotal != counter) { 584 if (showTotal) { 585 toOutput(""); 586 toOutput("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here."); 587 toOutput("# Total code points: " + nf.format(fullTotal)); 588 } 589 fullTotal = -1; 590 } else if (showTotal) { 591 toOutput(""); 592 toOutput("# Total code points: " + nf.format(counter)); 593 } 594 } 595 596 @Override doSimpleAt(Object o)597 protected void doSimpleAt(Object o) { 598 if (o instanceof Map.Entry) { 599 Map.Entry oo = (Map.Entry)o; 600 Object key = oo.getKey(); 601 Object value = oo.getValue(); 602 doBefore(o, key); 603 doAt(key); 604 output.println("\u2192"); 605 doAt(value); 606 doAfter(o, value); 607 counter++; 608 } else if (o instanceof Visitor.CodePointRange) { 609 doAt((Visitor.CodePointRange) o); 610 } else { 611 String thing = o.toString(); 612 String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true); 613 if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value; 614 String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true); 615 if (label.length() != 0) label = " " + label; 616 toTable( 617 hex(thing) 618 + value 619 + commentSeparator 620 + label 621 + insertLiteral(thing) 622 + "\t" 623 + getName(thing)); 624 counter++; 625 } 626 } 627 doAt(Visitor.CodePointRange usi)628 protected void doAt(Visitor.CodePointRange usi) { 629 if (!mergeRanges) { 630 for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) { 631 showLine(cp, cp); 632 } 633 } else { 634 rf.reset(usi.codepoint, usi.codepointEnd + 1); 635 while (rf.next()) { 636 showLine(rf.start, rf.limit - 1); 637 } 638 } 639 } 640 showLine(int start, int end)641 private void showLine(int start, int end) { 642 String label = getLabelSource(true).getValue(start, shortLabel); 643 String value = getValue(start, shortValue); 644 if (value == NULL_VALUE) return; 645 646 counter += end - start + 1; 647 String pn = propName; 648 if (pn.length() != 0) { 649 pn = "\t; " + pn; 650 } 651 if (valueSize > 0) { 652 value = "\t; " + value; 653 } else if (value.length() > 0) { 654 throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue)); 655 } 656 if (labelSize > 0) { 657 label = "\t" + label; 658 } else if (label.length() > 0) { 659 throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel)); 660 } 661 662 String count = ""; 663 if (mergeRanges && showCount) { 664 if (end == start) count = "\t"; 665 else count = "\t ["+ nf.format(end - start + 1)+ "]"; 666 } 667 668 toTable( 669 hex(start, end) 670 + pn 671 + value 672 + commentSeparator 673 + label 674 + count 675 + insertLiteral(start, end) 676 + getName("\t ", start, end)); 677 } 678 insertLiteral(String thing)679 private String insertLiteral(String thing) { 680 return (showLiteral == null ? "" 681 : " \t(" + showLiteral.transliterate(thing) + ") "); 682 } 683 insertLiteral(int start, int end)684 private String insertLiteral(int start, int end) { 685 return (showLiteral == null ? "" : 686 " \t(" + showLiteral.transliterate(UTF16.valueOf(start)) 687 + ((start != end) 688 ? (".." + showLiteral.transliterate(UTF16.valueOf(end))) 689 : "") 690 + ") "); 691 } 692 /* 693 private String insertLiteral(int cp) { 694 return (showLiteral == null ? "" 695 : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") "); 696 } 697 */ 698 } 699 700 /** 701 * Iterate through a string, breaking at words. 702 * @author Davis 703 */ 704 private static class NameIterator { 705 String source; 706 int position; 707 int limit; 708 NameIterator(String source)709 NameIterator(String source) { 710 this.source = source; 711 this.limit = source.length(); 712 } 713 /** 714 * Find next word, including trailing spaces 715 * @return the next word 716 */ next()717 String next() { 718 if (position >= limit) 719 return null; 720 int pos = source.indexOf(' ', position); 721 if (pos < 0 || pos >= limit) 722 pos = limit; 723 String result = source.substring(position, pos); 724 position = pos + 1; 725 return result; 726 } 727 findMatchingEnd(String s1, String s2)728 static int findMatchingEnd(String s1, String s2) { 729 int i = s1.length(); 730 int j = s2.length(); 731 try { 732 while (true) { 733 --i; // decrement both before calling function! 734 --j; 735 if (s1.charAt(i) != s2.charAt(j)) 736 break; 737 } 738 } catch (Exception e) {} // run off start 739 740 ++i; // counteract increment 741 i = s1.indexOf(' ', i); // move forward to space 742 if (i < 0) 743 return 0; 744 return s1.length() - i; 745 } 746 } 747 748 private class RangeFinder { 749 int start, limit; 750 private int veryLimit; 751 //String label, value; reset(int rangeStart, int rangeLimit)752 void reset(int rangeStart, int rangeLimit) { 753 limit = rangeStart; 754 veryLimit = rangeLimit; 755 } next()756 boolean next() { 757 if (limit >= veryLimit) 758 return false; 759 start = limit; // set to end of last 760 String label = getLabelSource(false).getValue(limit, true); 761 String value = getValue(limit, true); 762 String breaker = getRangeBreakSource().getValue(limit,true); 763 if (DEBUG && 0x3FFD < limit && limit < 0x9FD6) { 764 System.out.println(Utility.hex(limit) + ", Label: " + label + ", Value: " + value + ", Break: " + breaker); 765 } 766 limit++; 767 for (; limit < veryLimit; limit++) { 768 String s = getLabelSource(false).getValue(limit, true); 769 String v = getValue(limit, true); 770 String b = getRangeBreakSource().getValue(limit, true); 771 if (DEBUG && limit > 0x9FD4) { 772 System.out.println(Utility.hex(limit) + ", *Label: " + s + ", Value: " + v + ", Break: " + b); 773 } 774 if (!equalTo(s, label) 775 || !equalTo(v, value) 776 || !equalTo(b, breaker)) { 777 break; 778 } 779 } 780 // at this point, limit is the first item that has a different label than source 781 // OR, we got to the end, and limit == veryLimit 782 return true; 783 } 784 } 785 equalTo(Object a, Object b)786 boolean equalTo(Object a, Object b) { 787 if (a == b) return true; 788 if (a == null) return false; 789 return a.equals(b); 790 } 791 792 boolean shortLabel = true; 793 boolean shortValue = true; 794 getPrefix()795 public String getPrefix() { 796 return prefix; 797 } 798 getSuffix()799 public String getSuffix() { 800 return suffix; 801 } 802 setPrefix(String string)803 public BagFormatter setPrefix(String string) { 804 prefix = string; 805 return this; 806 } 807 setSuffix(String string)808 public BagFormatter setSuffix(String string) { 809 suffix = string; 810 return this; 811 } 812 isAbbreviated()813 public boolean isAbbreviated() { 814 return abbreviated; 815 } 816 setAbbreviated(boolean b)817 public BagFormatter setAbbreviated(boolean b) { 818 abbreviated = b; 819 return this; 820 } 821 getLabelSource(boolean visible)822 public UnicodeLabel getLabelSource(boolean visible) { 823 if (labelSource == null) { 824 Map labelMap = new HashMap(); 825 //labelMap.put("Lo","L&"); 826 labelMap.put("Lu","L&"); 827 labelMap.put("Lt","L&"); 828 labelMap.put("Ll","L&"); 829 labelSource = new UnicodeProperty.FilteredProperty( 830 getUnicodePropertyFactory().getProperty("General_Category"), 831 new UnicodeProperty.MapFilter(labelMap) 832 ).setAllowValueAliasCollisions(true); 833 } 834 return labelSource; 835 } 836 837 /** 838 * @deprecated 839 */ 840 @Deprecated addAll(UnicodeSet source, Collection target)841 public static void addAll(UnicodeSet source, Collection target) { 842 source.addAllTo(target); 843 } 844 845 // UTILITIES 846 847 public static final Transliterator hex = Transliterator.getInstance( 848 "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex"); 849 getSeparator()850 public String getSeparator() { 851 return separator; 852 } setSeparator(String string)853 public BagFormatter setSeparator(String string) { 854 separator = string; 855 return this; 856 } getShowLiteral()857 public Transliterator getShowLiteral() { 858 return showLiteral; 859 } setShowLiteral(Transliterator transliterator)860 public BagFormatter setShowLiteral(Transliterator transliterator) { 861 showLiteral = transliterator; 862 return this; 863 } 864 865 // ===== CONVENIENCES ===== 866 private class Join extends Visitor { 867 StringBuffer output = new StringBuffer(); 868 @SuppressWarnings("unused") 869 int depth = 0; join(Object o)870 String join (Object o) { 871 output.setLength(0); 872 doAt(o); 873 return output.toString(); 874 } 875 @Override doBefore(Object container, Object item)876 protected void doBefore(Object container, Object item) { 877 ++depth; 878 output.append(prefix); 879 } 880 @Override doAfter(Object container, Object item)881 protected void doAfter(Object container, Object item) { 882 output.append(suffix); 883 --depth; 884 } 885 @Override doBetween(Object container, Object lastItem, Object nextItem)886 protected void doBetween(Object container, Object lastItem, Object nextItem) { 887 output.append(separator); 888 } 889 @Override doSimpleAt(Object o)890 protected void doSimpleAt(Object o) { 891 if (o != null) output.append(o.toString()); 892 } 893 } 894 895 /** 896 * @param label 897 */ setLabelSource(UnicodeLabel label)898 public BagFormatter setLabelSource(UnicodeLabel label) { 899 if (label == null) label = UnicodeLabel.NULL; 900 labelSource = label; 901 return this; 902 } 903 904 /** 905 * @return the NameLable representing the source 906 */ getNameSource()907 public UnicodeLabel getNameSource() { 908 if (nameSource == null) { 909 nameSource = new NameLabel(getUnicodePropertyFactory()); 910 } 911 return nameSource; 912 } 913 914 /** 915 * @param label 916 */ setNameSource(UnicodeLabel label)917 public BagFormatter setNameSource(UnicodeLabel label) { 918 if (label == null) label = UnicodeLabel.NULL; 919 nameSource = label; 920 return this; 921 } 922 923 /** 924 * @return the UnicodeLabel representing the value 925 */ getValueSource()926 public UnicodeLabel getValueSource() { 927 if (valueSource == null) valueSource = UnicodeLabel.NULL; 928 return valueSource; 929 } 930 getValue(int cp, boolean shortVal)931 private String getValue(int cp, boolean shortVal) { 932 String result = getValueSource().getValue(cp, shortVal); 933 if (result == null) return NULL_VALUE; 934 if (hexValue) result = hex(result, " "); 935 return result; 936 } 937 938 /** 939 * @param label 940 */ setValueSource(UnicodeLabel label)941 public BagFormatter setValueSource(UnicodeLabel label) { 942 if (label == null) label = UnicodeLabel.NULL; 943 valueSource = label; 944 return this; 945 } 946 setValueSource(String label)947 public BagFormatter setValueSource(String label) { 948 return setValueSource(new UnicodeLabel.Constant(label)); 949 } 950 951 /** 952 * @return true if showCount is true 953 */ isShowCount()954 public boolean isShowCount() { 955 return showCount; 956 } 957 958 /** 959 * @param b true to show the count 960 * @return this (for chaining) 961 */ setShowCount(boolean b)962 public BagFormatter setShowCount(boolean b) { 963 showCount = b; 964 return this; 965 } 966 967 /** 968 * @return the property name 969 */ getPropName()970 public String getPropName() { 971 return propName; 972 } 973 974 /** 975 * @param string 976 * @return this (for chaining) 977 */ setPropName(String string)978 public BagFormatter setPropName(String string) { 979 if (string == null) string = ""; 980 propName = string; 981 return this; 982 } 983 984 /** 985 * @return true if this is a hexValue 986 */ isHexValue()987 public boolean isHexValue() { 988 return hexValue; 989 } 990 991 /** 992 * @param b 993 * @return this (for chaining) 994 */ setHexValue(boolean b)995 public BagFormatter setHexValue(boolean b) { 996 hexValue = b; 997 return this; 998 } 999 1000 /** 1001 * @return the full total 1002 */ getFullTotal()1003 public int getFullTotal() { 1004 return fullTotal; 1005 } 1006 1007 /** 1008 * @param i set the full total 1009 * @return this (for chaining) 1010 */ setFullTotal(int i)1011 public BagFormatter setFullTotal(int i) { 1012 fullTotal = i; 1013 return this; 1014 } 1015 1016 /** 1017 * @return the line separator 1018 */ getLineSeparator()1019 public String getLineSeparator() { 1020 return lineSeparator; 1021 } 1022 1023 /** 1024 * @param string 1025 * @return this (for chaining) 1026 */ setLineSeparator(String string)1027 public BagFormatter setLineSeparator(String string) { 1028 lineSeparator = string; 1029 return this; 1030 } 1031 1032 /** 1033 * @return the UnicodeLabel representing the range break source 1034 */ getRangeBreakSource()1035 public UnicodeLabel getRangeBreakSource() { 1036 if (rangeBreakSource == null) { 1037 Map labelMap = new HashMap(); 1038 // reflects the code point types on p 25 1039 labelMap.put("Lo", "G&"); 1040 labelMap.put("Lm", "G&"); 1041 labelMap.put("Lu", "G&"); 1042 labelMap.put("Lt", "G&"); 1043 labelMap.put("Ll", "G&"); 1044 labelMap.put("Mn", "G&"); 1045 labelMap.put("Me", "G&"); 1046 labelMap.put("Mc", "G&"); 1047 labelMap.put("Nd", "G&"); 1048 labelMap.put("Nl", "G&"); 1049 labelMap.put("No", "G&"); 1050 labelMap.put("Zs", "G&"); 1051 labelMap.put("Pd", "G&"); 1052 labelMap.put("Ps", "G&"); 1053 labelMap.put("Pe", "G&"); 1054 labelMap.put("Pc", "G&"); 1055 labelMap.put("Po", "G&"); 1056 labelMap.put("Pi", "G&"); 1057 labelMap.put("Pf", "G&"); 1058 labelMap.put("Sm", "G&"); 1059 labelMap.put("Sc", "G&"); 1060 labelMap.put("Sk", "G&"); 1061 labelMap.put("So", "G&"); 1062 1063 labelMap.put("Zl", "Cf"); 1064 labelMap.put("Zp", "Cf"); 1065 1066 rangeBreakSource = 1067 new UnicodeProperty 1068 .FilteredProperty( 1069 getUnicodePropertyFactory().getProperty( 1070 "General_Category"), 1071 new UnicodeProperty.MapFilter(labelMap)) 1072 .setAllowValueAliasCollisions(true); 1073 1074 /* 1075 "Cn", // = Other, Not Assigned 0 1076 "Cc", // = Other, Control 15 1077 "Cf", // = Other, Format 16 1078 UnicodeProperty.UNUSED, // missing 1079 "Co", // = Other, Private Use 18 1080 "Cs", // = Other, Surrogate 19 1081 */ 1082 } 1083 return rangeBreakSource; 1084 } 1085 1086 /** 1087 * @param label 1088 */ setRangeBreakSource(UnicodeLabel label)1089 public BagFormatter setRangeBreakSource(UnicodeLabel label) { 1090 if (label == null) label = UnicodeLabel.NULL; 1091 rangeBreakSource = label; 1092 return this; 1093 } 1094 1095 /** 1096 * @return Returns the fixName. 1097 */ getFixName()1098 public Transliterator getFixName() { 1099 return fixName; 1100 } 1101 /** 1102 * @param fixName The fixName to set. 1103 */ setFixName(Transliterator fixName)1104 public BagFormatter setFixName(Transliterator fixName) { 1105 this.fixName = fixName; 1106 return this; 1107 } 1108 getTabber()1109 public Tabber getTabber() { 1110 return tabber; 1111 } 1112 setTabber(Tabber tabber)1113 public void setTabber(Tabber tabber) { 1114 this.tabber = tabber; 1115 } 1116 isShowTotal()1117 public boolean isShowTotal() { 1118 return showTotal; 1119 } 1120 setShowTotal(boolean showTotal)1121 public void setShowTotal(boolean showTotal) { 1122 this.showTotal = showTotal; 1123 } 1124 } 1125