1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2016, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util.props; 8 9 import com.ibm.icu.impl.Utility; 10 import com.ibm.icu.text.NumberFormat; 11 import com.ibm.icu.text.Transliterator; 12 import com.ibm.icu.text.UTF16; 13 import com.ibm.icu.text.UnicodeSet; 14 import java.io.PrintWriter; 15 import java.io.StringWriter; 16 import java.text.MessageFormat; 17 import java.util.Collection; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Locale; 21 import java.util.Map; 22 import org.unicode.cldr.draft.FileUtilities; 23 import org.unicode.cldr.util.Tabber; 24 import org.unicode.cldr.util.Visitor; 25 26 public class BagFormatter { 27 static final boolean DEBUG = false; 28 public static final boolean SHOW_FILES; 29 30 static { 31 boolean showFiles = false; 32 try { 33 showFiles = System.getProperty("SHOW_FILES") != null; 34 } catch (SecurityException e) { 35 } 36 SHOW_FILES = showFiles; 37 } 38 39 public static final PrintWriter CONSOLE = new PrintWriter(System.out, true); 40 41 private static PrintWriter log = CONSOLE; 42 43 private boolean abbreviated = false; 44 private String separator = ","; 45 private String prefix = "["; 46 private String suffix = "]"; 47 private UnicodeProperty.Factory source; 48 private UnicodeLabel nameSource; 49 private UnicodeLabel labelSource; 50 private UnicodeLabel rangeBreakSource; 51 private UnicodeLabel valueSource; 52 private String propName = ""; 53 private boolean showCount = true; 54 // private boolean suppressReserved = true; 55 private boolean hexValue = false; 56 private static final String NULL_VALUE = "_NULL_VALUE_"; 57 private int fullTotal = -1; 58 private boolean showTotal = true; 59 private String lineSeparator = System.lineSeparator(); 60 private Tabber tabber = new Tabber.MonoTabber(); 61 62 /** 63 * Compare two UnicodeSets, and show the differences 64 * 65 * @param name1 name of first set to be compared 66 * @param set1 first set 67 * @param name2 name of second set to be compared 68 * @param set2 second set 69 * @return formatted string 70 */ showSetDifferences(String name1, UnicodeSet set1, String name2, UnicodeSet set2)71 public String showSetDifferences(String name1, UnicodeSet set1, String name2, UnicodeSet set2) { 72 73 StringWriter result = new StringWriter(); 74 showSetDifferences(new PrintWriter(result), name1, set1, name2, set2); 75 result.flush(); 76 return result.getBuffer().toString(); 77 } 78 showSetDifferences(String name1, Collection set1, String name2, Collection set2)79 public String showSetDifferences(String name1, Collection set1, String name2, Collection set2) { 80 81 StringWriter result = new StringWriter(); 82 showSetDifferences(new PrintWriter(result), name1, set1, name2, set2); 83 result.flush(); 84 return result.getBuffer().toString(); 85 } 86 showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2)87 public void showSetDifferences( 88 PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2) { 89 showSetDifferences(pw, name1, set1, name2, set2, -1); 90 } 91 /** 92 * Compare two UnicodeSets, and show the differences 93 * 94 * @param name1 name of first set to be compared 95 * @param set1 first set 96 * @param name2 name of second set to be compared 97 * @param set2 second set 98 */ showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2, int flags)99 public void showSetDifferences( 100 PrintWriter pw, 101 String name1, 102 UnicodeSet set1, 103 String name2, 104 UnicodeSet set2, 105 int flags) { 106 if (pw == null) pw = FileUtilities.CONSOLE; 107 String[] names = {name1, name2}; 108 109 UnicodeSet temp; 110 111 if ((flags & 1) != 0) { 112 temp = new UnicodeSet(set1).removeAll(set2); 113 pw.print(lineSeparator); 114 pw.print(inOut.format(names)); 115 pw.print(lineSeparator); 116 showSetNames(pw, temp); 117 } 118 119 if ((flags & 2) != 0) { 120 temp = new UnicodeSet(set2).removeAll(set1); 121 pw.print(lineSeparator); 122 pw.print(outIn.format(names)); 123 pw.print(lineSeparator); 124 showSetNames(pw, temp); 125 } 126 127 if ((flags & 4) != 0) { 128 temp = new UnicodeSet(set2).retainAll(set1); 129 pw.print(lineSeparator); 130 pw.print(inIn.format(names)); 131 pw.print(lineSeparator); 132 showSetNames(pw, temp); 133 } 134 pw.flush(); 135 } 136 showSetDifferences( PrintWriter pw, String name1, Collection set1, String name2, Collection set2)137 public void showSetDifferences( 138 PrintWriter pw, String name1, Collection set1, String name2, Collection set2) { 139 140 if (pw == null) pw = FileUtilities.CONSOLE; 141 String[] names = {name1, name2}; 142 // damn'd collection doesn't have a clone, so 143 // we go with Set, even though that 144 // may not preserve order and duplicates 145 Collection temp = new HashSet(set1); 146 temp.removeAll(set2); 147 pw.println(); 148 pw.println(inOut.format(names)); 149 showSetNames(pw, temp); 150 151 temp.clear(); 152 temp.addAll(set2); 153 temp.removeAll(set1); 154 pw.println(); 155 pw.println(outIn.format(names)); 156 showSetNames(pw, temp); 157 158 temp.clear(); 159 temp.addAll(set1); 160 temp.retainAll(set2); 161 pw.println(); 162 pw.println(inIn.format(names)); 163 showSetNames(pw, temp); 164 } 165 166 /** 167 * Returns a list of items in the collection, with each separated by the separator. Each item 168 * must not be null; its toString() is called for a printable representation 169 * 170 * @param c source collection 171 * @return a String representation of the list 172 */ showSetNames(Object c)173 public String showSetNames(Object c) { 174 StringWriter buffer = new StringWriter(); 175 PrintWriter output = new PrintWriter(buffer); 176 showSetNames(output, c); 177 return buffer.toString(); 178 } 179 180 /** 181 * Returns a list of items in the collection, with each separated by the separator. Each item 182 * must not be null; its toString() is called for a printable representation 183 * 184 * @param output destination to which to write names 185 * @param c source collection 186 */ showSetNames(PrintWriter output, Object c)187 public void showSetNames(PrintWriter output, Object c) { 188 mainVisitor.doAt(c, output); 189 output.flush(); 190 } 191 getAbbreviatedName(String src, String pattern, String substitute)192 public String getAbbreviatedName(String src, String pattern, String substitute) { 193 194 int matchEnd = NameIterator.findMatchingEnd(src, pattern); 195 int sdiv = src.length() - matchEnd; 196 int pdiv = pattern.length() - matchEnd; 197 StringBuffer result = new StringBuffer(); 198 addMatching(src.substring(0, sdiv), pattern.substring(0, pdiv), substitute, result); 199 addMatching(src.substring(sdiv), pattern.substring(pdiv), substitute, result); 200 return result.toString(); 201 } 202 203 public abstract static class Relation { getRelation(String a, String b)204 public abstract String getRelation(String a, String b); 205 } 206 207 static class NullRelation extends Relation { 208 @Override getRelation(String a, String b)209 public String getRelation(String a, String b) { 210 return ""; 211 } 212 } 213 214 private Relation r = new NullRelation(); 215 setRelation(Relation r)216 public BagFormatter setRelation(Relation r) { 217 this.r = r; 218 return this; // for chaining 219 } 220 getRelation()221 public Relation getRelation() { 222 return r; 223 } 224 225 /* 226 r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s) 227 */ 228 /* 229 static final UnicodeSet NO_NAME = 230 new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]"); 231 static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement(); 232 static final UnicodeSet NAME_CHARACTERS = 233 new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]"); 234 235 public UnicodeSet getSetForName(String namePattern) { 236 UnicodeSet result = new UnicodeSet(); 237 Matcher m = Pattern.compile(namePattern).matcher(""); 238 // check for no-name items, and add in bulk 239 m.reset("<no name>"); 240 if (m.matches()) { 241 result.addAll(NO_NAME); 242 } 243 // check all others 244 UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME); 245 while (usi.next()) { 246 String name = getName(usi.codepoint); 247 if (name == null) 248 continue; 249 m.reset(name); 250 if (m.matches()) { 251 result.add(usi.codepoint); 252 } 253 } 254 // Note: if Regex had some API so that if we could tell that 255 // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-" 256 // then we could optimize by skipping whole swathes of characters 257 return result; 258 } 259 */ 260 setMergeRanges(boolean in)261 public BagFormatter setMergeRanges(boolean in) { 262 mergeRanges = in; 263 return this; 264 } 265 setShowSetAlso(boolean b)266 public BagFormatter setShowSetAlso(boolean b) { 267 showSetAlso = b; 268 return this; 269 } 270 getName(int codePoint)271 public String getName(int codePoint) { 272 return getName("", codePoint, codePoint); 273 } 274 getName(String sep, int start, int end)275 public String getName(String sep, int start, int end) { 276 if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return ""; 277 String result = getName(start, false); 278 if (start == end) return sep + result; 279 String endString = getName(end, false); 280 if (result.length() == 0 && endString.length() == 0) return sep; 281 if (abbreviated) endString = getAbbreviatedName(endString, result, "~"); 282 return sep + result + ".." + endString; 283 } 284 getName(String s)285 public String getName(String s) { 286 return getName(s, false); 287 } 288 289 public static class NameLabel extends UnicodeLabel { 290 UnicodeProperty nameProp; 291 UnicodeSet control; 292 UnicodeSet private_use; 293 UnicodeSet noncharacter; 294 UnicodeSet surrogate; 295 NameLabel(UnicodeProperty.Factory source)296 public NameLabel(UnicodeProperty.Factory source) { 297 nameProp = source.getProperty("Name"); 298 control = source.getSet("gc=Cc"); 299 private_use = source.getSet("gc=Co"); 300 surrogate = source.getSet("gc=Cs"); 301 noncharacter = source.getSet("noncharactercodepoint=yes"); 302 } 303 304 @Override getValue(int codePoint, boolean isShort)305 public String getValue(int codePoint, boolean isShort) { 306 String hcp = !isShort ? "U+" + Utility.hex(codePoint, 4) + " " : ""; 307 String result = nameProp.getValue(codePoint); 308 if (result != null) return hcp + result; 309 if (control.contains(codePoint)) { 310 return "<control-" + Utility.hex(codePoint, 4) + ">"; 311 } 312 if (private_use.contains(codePoint)) { 313 return "<private-use-" + Utility.hex(codePoint, 4) + ">"; 314 } 315 if (surrogate.contains(codePoint)) { 316 return "<surrogate-" + Utility.hex(codePoint, 4) + ">"; 317 } 318 if (noncharacter.contains(codePoint)) { 319 return "<noncharacter-" + Utility.hex(codePoint, 4) + ">"; 320 } 321 // if (suppressReserved) return ""; 322 return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">"; 323 } 324 } 325 326 // refactored getName(int codePoint, boolean withCodePoint)327 public String getName(int codePoint, boolean withCodePoint) { 328 String result = getNameSource().getValue(codePoint, !withCodePoint); 329 return fixName == null ? result : fixName.transliterate(result); 330 } 331 getName(String s, boolean withCodePoint)332 public String getName(String s, boolean withCodePoint) { 333 String result = getNameSource().getValue(s, separator, !withCodePoint); 334 return fixName == null ? result : fixName.transliterate(result); 335 } 336 hex(String s)337 public String hex(String s) { 338 return hex(s, separator); 339 } 340 hex(String s, String sep)341 public String hex(String s, String sep) { 342 return UnicodeLabel.HEX.getValue(s, sep, true); 343 } 344 hex(int start, int end)345 public String hex(int start, int end) { 346 String s = Utility.hex(start, 4); 347 if (start == end) return s; 348 return s + ".." + Utility.hex(end, 4); 349 } 350 setUnicodePropertyFactory(UnicodeProperty.Factory source)351 public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) { 352 this.source = source; 353 return this; 354 } 355 getUnicodePropertyFactory()356 private UnicodeProperty.Factory getUnicodePropertyFactory() { 357 if (source == null) source = ICUPropertyFactory.make(); 358 return source; 359 } 360 BagFormatter()361 public BagFormatter() {} 362 BagFormatter(UnicodeProperty.Factory source)363 public BagFormatter(UnicodeProperty.Factory source) { 364 setUnicodePropertyFactory(source); 365 } 366 join(Object o)367 public String join(Object o) { 368 return labelVisitor.join(o); 369 } 370 371 // ===== PRIVATES ===== 372 373 private Join labelVisitor = new Join(); 374 375 private boolean mergeRanges = true; 376 private Transliterator showLiteral = null; 377 private Transliterator fixName = null; 378 private boolean showSetAlso = false; 379 380 private RangeFinder rf = new RangeFinder(); 381 382 private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:"); 383 private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:"); 384 private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:"); 385 386 private MyVisitor mainVisitor = new MyVisitor(); 387 388 /* 389 private String getLabels(int start, int end) { 390 Set names = new TreeSet(); 391 for (int cp = start; cp <= end; ++cp) { 392 names.add(getLabel(cp)); 393 } 394 return labelVisitor.join(names); 395 } 396 */ 397 addMatching(String src, String pattern, String substitute, StringBuffer result)398 private void addMatching(String src, String pattern, String substitute, StringBuffer result) { 399 NameIterator n1 = new NameIterator(src); 400 NameIterator n2 = new NameIterator(pattern); 401 boolean first = true; 402 while (true) { 403 String s1 = n1.next(); 404 if (s1 == null) break; 405 String s2 = n2.next(); 406 if (!first) result.append(" "); 407 first = false; 408 if (s1.equals(s2)) result.append(substitute); 409 else result.append(s1); 410 } 411 } 412 413 private static NumberFormat nf = NumberFormat.getIntegerInstance(Locale.ENGLISH); 414 415 static { 416 nf.setGroupingUsed(false); 417 } 418 419 private int maxWidthOverride = -1; 420 private int maxLabelWidthOverride = -1; 421 setValueWidthOverride(int maxWidthOverride)422 public BagFormatter setValueWidthOverride(int maxWidthOverride) { 423 this.maxWidthOverride = maxWidthOverride; 424 return this; 425 } 426 getValueWidthOverride()427 public int getValueWidthOverride() { 428 return maxWidthOverride; 429 } 430 setLabelWidthOverride(int maxWidthOverride)431 public BagFormatter setLabelWidthOverride(int maxWidthOverride) { 432 this.maxLabelWidthOverride = maxWidthOverride; 433 return this; 434 } 435 getLabelWidthOverride()436 public int getLabelWidthOverride() { 437 return maxLabelWidthOverride; 438 } 439 440 private class MyVisitor extends Visitor { 441 private PrintWriter output; 442 String commentSeparator; 443 int counter; 444 int valueSize; 445 int labelSize; 446 boolean isHtml; 447 boolean inTable = false; 448 toOutput(String s)449 public void toOutput(String s) { 450 if (isHtml) { 451 if (inTable) { 452 output.print("</table>"); 453 inTable = false; 454 } 455 output.print("<p>"); 456 } 457 output.print(s); 458 if (isHtml) output.println("</p>"); 459 else output.print(lineSeparator); 460 } 461 toTable(String s)462 public void toTable(String s) { 463 if (isHtml && !inTable) { 464 output.print("<table>"); 465 inTable = true; 466 } 467 output.print(tabber.process(s) + lineSeparator); 468 } 469 doAt(Object c, PrintWriter out)470 public void doAt(Object c, PrintWriter out) { 471 output = out; 472 isHtml = tabber instanceof Tabber.HTMLTabber; 473 counter = 0; 474 475 tabber.clear(); 476 // old: 477 // 0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D> 478 // new 479 // 0009..000D ; White_Space #Cc [5] <control>..<control> 480 tabber.add(mergeRanges ? 14 : 6, Tabber.LEFT); 481 482 if (propName.length() > 0) { 483 tabber.add(propName.length() + 2, Tabber.LEFT); 484 } 485 486 valueSize = 487 maxWidthOverride > 0 488 ? maxWidthOverride 489 : getValueSource().getMaxWidth(shortValue); 490 491 if (DEBUG) System.out.println("ValueSize: " + valueSize); 492 if (valueSize > 0) { 493 tabber.add(valueSize + 2, Tabber.LEFT); // value 494 } 495 496 tabber.add(3, Tabber.LEFT); // comment character 497 498 labelSize = 499 maxLabelWidthOverride > 0 500 ? maxLabelWidthOverride 501 : getLabelSource(true).getMaxWidth(shortLabel); 502 if (labelSize > 0) { 503 tabber.add(labelSize + 1, Tabber.LEFT); // value 504 } 505 506 if (mergeRanges && showCount) { 507 tabber.add(5, Tabber.RIGHT); 508 } 509 510 if (showLiteral != null) { 511 tabber.add(4, Tabber.LEFT); 512 } 513 // myTabber.add(7,Tabber.LEFT); 514 515 commentSeparator = 516 (showCount 517 || showLiteral != null 518 || getLabelSource(true) != UnicodeLabel.NULL 519 || getNameSource() != UnicodeLabel.NULL) 520 ? "\t #" 521 : ""; 522 523 if (DEBUG) System.out.println("Tabber: " + tabber.toString()); 524 if (DEBUG) 525 System.out.println( 526 "Tabber: " 527 + tabber.process( 528 "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER")); 529 doAt(c); 530 } 531 532 @SuppressWarnings("unused") format(Object o)533 public String format(Object o) { 534 StringWriter sw = new StringWriter(); 535 PrintWriter pw = new PrintWriter(sw); 536 doAt(o); 537 pw.flush(); 538 String result = sw.getBuffer().toString(); 539 pw.close(); 540 return result; 541 } 542 543 @Override doBefore(Object container, Object o)544 protected void doBefore(Object container, Object o) { 545 if (showSetAlso && container instanceof UnicodeSet) { 546 toOutput("#" + container); 547 } 548 } 549 550 @Override doBetween(Object container, Object lastItem, Object nextItem)551 protected void doBetween(Object container, Object lastItem, Object nextItem) {} 552 553 @Override doAfter(Object container, Object o)554 protected void doAfter(Object container, Object o) { 555 if (fullTotal != -1 && fullTotal != counter) { 556 if (showTotal) { 557 toOutput(""); 558 toOutput( 559 "# The above property value applies to " 560 + nf.format(fullTotal - counter) 561 + " code points not listed here."); 562 toOutput("# Total code points: " + nf.format(fullTotal)); 563 } 564 fullTotal = -1; 565 } else if (showTotal) { 566 toOutput(""); 567 toOutput("# Total code points: " + nf.format(counter)); 568 } 569 } 570 571 @Override doSimpleAt(Object o)572 protected void doSimpleAt(Object o) { 573 if (o instanceof Map.Entry) { 574 Map.Entry oo = (Map.Entry) o; 575 Object key = oo.getKey(); 576 Object value = oo.getValue(); 577 doBefore(o, key); 578 doAt(key); 579 output.println("\u2192"); 580 doAt(value); 581 doAfter(o, value); 582 counter++; 583 } else if (o instanceof Visitor.CodePointRange) { 584 doAt((Visitor.CodePointRange) o); 585 } else { 586 String thing = o.toString(); 587 String value = 588 getValueSource() == UnicodeLabel.NULL 589 ? "" 590 : getValueSource().getValue(thing, ",", true); 591 if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value; 592 String label = 593 getLabelSource(true) == UnicodeLabel.NULL 594 ? "" 595 : getLabelSource(true).getValue(thing, ",", true); 596 if (label.length() != 0) label = " " + label; 597 toTable( 598 hex(thing) 599 + value 600 + commentSeparator 601 + label 602 + insertLiteral(thing) 603 + "\t" 604 + getName(thing)); 605 counter++; 606 } 607 } 608 doAt(Visitor.CodePointRange usi)609 protected void doAt(Visitor.CodePointRange usi) { 610 if (!mergeRanges) { 611 for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) { 612 showLine(cp, cp); 613 } 614 } else { 615 rf.reset(usi.codepoint, usi.codepointEnd + 1); 616 while (rf.next()) { 617 showLine(rf.start, rf.limit - 1); 618 } 619 } 620 } 621 showLine(int start, int end)622 private void showLine(int start, int end) { 623 String label = getLabelSource(true).getValue(start, shortLabel); 624 String value = getValue(start, shortValue); 625 if (value == NULL_VALUE) return; 626 627 counter += end - start + 1; 628 String pn = propName; 629 if (pn.length() != 0) { 630 pn = "\t; " + pn; 631 } 632 if (valueSize > 0) { 633 value = "\t; " + value; 634 } else if (value.length() > 0) { 635 throw new IllegalArgumentException( 636 "maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue)); 637 } 638 if (labelSize > 0) { 639 label = "\t" + label; 640 } else if (label.length() > 0) { 641 throw new IllegalArgumentException( 642 "maxwidth bogus " 643 + label 644 + ", " 645 + getLabelSource(true).getMaxWidth(shortLabel)); 646 } 647 648 String count = ""; 649 if (mergeRanges && showCount) { 650 if (end == start) count = "\t"; 651 else count = "\t [" + nf.format(end - start + 1) + "]"; 652 } 653 654 toTable( 655 hex(start, end) 656 + pn 657 + value 658 + commentSeparator 659 + label 660 + count 661 + insertLiteral(start, end) 662 + getName("\t ", start, end)); 663 } 664 insertLiteral(String thing)665 private String insertLiteral(String thing) { 666 return (showLiteral == null ? "" : " \t(" + showLiteral.transliterate(thing) + ") "); 667 } 668 insertLiteral(int start, int end)669 private String insertLiteral(int start, int end) { 670 return (showLiteral == null 671 ? "" 672 : " \t(" 673 + showLiteral.transliterate(UTF16.valueOf(start)) 674 + ((start != end) 675 ? (".." + showLiteral.transliterate(UTF16.valueOf(end))) 676 : "") 677 + ") "); 678 } 679 /* 680 private String insertLiteral(int cp) { 681 return (showLiteral == null ? "" 682 : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") "); 683 } 684 */ 685 } 686 687 /** 688 * Iterate through a string, breaking at words. 689 * 690 * @author Davis 691 */ 692 private static class NameIterator { 693 String source; 694 int position; 695 int limit; 696 NameIterator(String source)697 NameIterator(String source) { 698 this.source = source; 699 this.limit = source.length(); 700 } 701 /** 702 * Find next word, including trailing spaces 703 * 704 * @return the next word 705 */ next()706 String next() { 707 if (position >= limit) return null; 708 int pos = source.indexOf(' ', position); 709 if (pos < 0 || pos >= limit) pos = limit; 710 String result = source.substring(position, pos); 711 position = pos + 1; 712 return result; 713 } 714 findMatchingEnd(String s1, String s2)715 static int findMatchingEnd(String s1, String s2) { 716 int i = s1.length(); 717 int j = s2.length(); 718 try { 719 while (true) { 720 --i; // decrement both before calling function! 721 --j; 722 if (s1.charAt(i) != s2.charAt(j)) break; 723 } 724 } catch (Exception e) { 725 } // run off start 726 727 ++i; // counteract increment 728 i = s1.indexOf(' ', i); // move forward to space 729 if (i < 0) return 0; 730 return s1.length() - i; 731 } 732 } 733 734 private class RangeFinder { 735 int start, limit; 736 private int veryLimit; 737 // String label, value; reset(int rangeStart, int rangeLimit)738 void reset(int rangeStart, int rangeLimit) { 739 limit = rangeStart; 740 veryLimit = rangeLimit; 741 } 742 next()743 boolean next() { 744 if (limit >= veryLimit) return false; 745 start = limit; // set to end of last 746 String label = getLabelSource(false).getValue(limit, true); 747 String value = getValue(limit, true); 748 String breaker = getRangeBreakSource().getValue(limit, true); 749 if (DEBUG && 0x3FFD < limit && limit < 0x9FD6) { 750 System.out.println( 751 Utility.hex(limit) 752 + ", Label: " 753 + label 754 + ", Value: " 755 + value 756 + ", Break: " 757 + breaker); 758 } 759 limit++; 760 for (; limit < veryLimit; limit++) { 761 String s = getLabelSource(false).getValue(limit, true); 762 String v = getValue(limit, true); 763 String b = getRangeBreakSource().getValue(limit, true); 764 if (DEBUG && limit > 0x9FD4) { 765 System.out.println( 766 Utility.hex(limit) 767 + ", *Label: " 768 + s 769 + ", Value: " 770 + v 771 + ", Break: " 772 + b); 773 } 774 if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) { 775 break; 776 } 777 } 778 // at this point, limit is the first item that has a different label than source 779 // OR, we got to the end, and limit == veryLimit 780 return true; 781 } 782 } 783 equalTo(Object a, Object b)784 boolean equalTo(Object a, Object b) { 785 if (a == b) return true; 786 if (a == null) return false; 787 return a.equals(b); 788 } 789 790 boolean shortLabel = true; 791 boolean shortValue = true; 792 getPrefix()793 public String getPrefix() { 794 return prefix; 795 } 796 getSuffix()797 public String getSuffix() { 798 return suffix; 799 } 800 setPrefix(String string)801 public BagFormatter setPrefix(String string) { 802 prefix = string; 803 return this; 804 } 805 setSuffix(String string)806 public BagFormatter setSuffix(String string) { 807 suffix = string; 808 return this; 809 } 810 isAbbreviated()811 public boolean isAbbreviated() { 812 return abbreviated; 813 } 814 setAbbreviated(boolean b)815 public BagFormatter setAbbreviated(boolean b) { 816 abbreviated = b; 817 return this; 818 } 819 getLabelSource(boolean visible)820 public UnicodeLabel getLabelSource(boolean visible) { 821 if (labelSource == null) { 822 Map labelMap = new HashMap(); 823 // labelMap.put("Lo","L&"); 824 labelMap.put("Lu", "L&"); 825 labelMap.put("Lt", "L&"); 826 labelMap.put("Ll", "L&"); 827 labelSource = 828 new UnicodeProperty.FilteredProperty( 829 getUnicodePropertyFactory().getProperty("General_Category"), 830 new UnicodeProperty.MapFilter(labelMap)) 831 .setAllowValueAliasCollisions(true); 832 } 833 return labelSource; 834 } 835 836 /** 837 * @deprecated 838 */ 839 @Deprecated addAll(UnicodeSet source, Collection target)840 public static void addAll(UnicodeSet source, Collection target) { 841 source.addAllTo(target); 842 } 843 844 // UTILITIES 845 846 public static final Transliterator hex = 847 Transliterator.getInstance("[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex"); 848 getSeparator()849 public String getSeparator() { 850 return separator; 851 } 852 setSeparator(String string)853 public BagFormatter setSeparator(String string) { 854 separator = string; 855 return this; 856 } 857 getShowLiteral()858 public Transliterator getShowLiteral() { 859 return showLiteral; 860 } 861 setShowLiteral(Transliterator transliterator)862 public BagFormatter setShowLiteral(Transliterator transliterator) { 863 showLiteral = transliterator; 864 return this; 865 } 866 867 // ===== CONVENIENCES ===== 868 private class Join extends Visitor { 869 StringBuffer output = new StringBuffer(); 870 871 @SuppressWarnings("unused") 872 int depth = 0; 873 join(Object o)874 String join(Object o) { 875 output.setLength(0); 876 doAt(o); 877 return output.toString(); 878 } 879 880 @Override doBefore(Object container, Object item)881 protected void doBefore(Object container, Object item) { 882 ++depth; 883 output.append(prefix); 884 } 885 886 @Override doAfter(Object container, Object item)887 protected void doAfter(Object container, Object item) { 888 output.append(suffix); 889 --depth; 890 } 891 892 @Override doBetween(Object container, Object lastItem, Object nextItem)893 protected void doBetween(Object container, Object lastItem, Object nextItem) { 894 output.append(separator); 895 } 896 897 @Override doSimpleAt(Object o)898 protected void doSimpleAt(Object o) { 899 if (o != null) output.append(o.toString()); 900 } 901 } 902 903 /** 904 * @param label 905 */ setLabelSource(UnicodeLabel label)906 public BagFormatter setLabelSource(UnicodeLabel label) { 907 if (label == null) label = UnicodeLabel.NULL; 908 labelSource = label; 909 return this; 910 } 911 912 /** 913 * @return the NameLable representing the source 914 */ getNameSource()915 public UnicodeLabel getNameSource() { 916 if (nameSource == null) { 917 nameSource = new NameLabel(getUnicodePropertyFactory()); 918 } 919 return nameSource; 920 } 921 922 /** 923 * @param label 924 */ setNameSource(UnicodeLabel label)925 public BagFormatter setNameSource(UnicodeLabel label) { 926 if (label == null) label = UnicodeLabel.NULL; 927 nameSource = label; 928 return this; 929 } 930 931 /** 932 * @return the UnicodeLabel representing the value 933 */ getValueSource()934 public UnicodeLabel getValueSource() { 935 if (valueSource == null) valueSource = UnicodeLabel.NULL; 936 return valueSource; 937 } 938 getValue(int cp, boolean shortVal)939 private String getValue(int cp, boolean shortVal) { 940 String result = getValueSource().getValue(cp, shortVal); 941 if (result == null) return NULL_VALUE; 942 if (hexValue) result = hex(result, " "); 943 return result; 944 } 945 946 /** 947 * @param label 948 */ setValueSource(UnicodeLabel label)949 public BagFormatter setValueSource(UnicodeLabel label) { 950 if (label == null) label = UnicodeLabel.NULL; 951 valueSource = label; 952 return this; 953 } 954 setValueSource(String label)955 public BagFormatter setValueSource(String label) { 956 return setValueSource(new UnicodeLabel.Constant(label)); 957 } 958 959 /** 960 * @return true if showCount is true 961 */ isShowCount()962 public boolean isShowCount() { 963 return showCount; 964 } 965 966 /** 967 * @param b true to show the count 968 * @return this (for chaining) 969 */ setShowCount(boolean b)970 public BagFormatter setShowCount(boolean b) { 971 showCount = b; 972 return this; 973 } 974 975 /** 976 * @return the property name 977 */ getPropName()978 public String getPropName() { 979 return propName; 980 } 981 982 /** 983 * @param string 984 * @return this (for chaining) 985 */ setPropName(String string)986 public BagFormatter setPropName(String string) { 987 if (string == null) string = ""; 988 propName = string; 989 return this; 990 } 991 992 /** 993 * @return true if this is a hexValue 994 */ isHexValue()995 public boolean isHexValue() { 996 return hexValue; 997 } 998 999 /** 1000 * @param b 1001 * @return this (for chaining) 1002 */ setHexValue(boolean b)1003 public BagFormatter setHexValue(boolean b) { 1004 hexValue = b; 1005 return this; 1006 } 1007 1008 /** 1009 * @return the full total 1010 */ getFullTotal()1011 public int getFullTotal() { 1012 return fullTotal; 1013 } 1014 1015 /** 1016 * @param i set the full total 1017 * @return this (for chaining) 1018 */ setFullTotal(int i)1019 public BagFormatter setFullTotal(int i) { 1020 fullTotal = i; 1021 return this; 1022 } 1023 1024 /** 1025 * @return the line separator 1026 */ getLineSeparator()1027 public String getLineSeparator() { 1028 return lineSeparator; 1029 } 1030 1031 /** 1032 * @param string 1033 * @return this (for chaining) 1034 */ setLineSeparator(String string)1035 public BagFormatter setLineSeparator(String string) { 1036 lineSeparator = string; 1037 return this; 1038 } 1039 1040 /** 1041 * @return the UnicodeLabel representing the range break source 1042 */ getRangeBreakSource()1043 public UnicodeLabel getRangeBreakSource() { 1044 if (rangeBreakSource == null) { 1045 Map labelMap = new HashMap(); 1046 // reflects the code point types on p 25 1047 labelMap.put("Lo", "G&"); 1048 labelMap.put("Lm", "G&"); 1049 labelMap.put("Lu", "G&"); 1050 labelMap.put("Lt", "G&"); 1051 labelMap.put("Ll", "G&"); 1052 labelMap.put("Mn", "G&"); 1053 labelMap.put("Me", "G&"); 1054 labelMap.put("Mc", "G&"); 1055 labelMap.put("Nd", "G&"); 1056 labelMap.put("Nl", "G&"); 1057 labelMap.put("No", "G&"); 1058 labelMap.put("Zs", "G&"); 1059 labelMap.put("Pd", "G&"); 1060 labelMap.put("Ps", "G&"); 1061 labelMap.put("Pe", "G&"); 1062 labelMap.put("Pc", "G&"); 1063 labelMap.put("Po", "G&"); 1064 labelMap.put("Pi", "G&"); 1065 labelMap.put("Pf", "G&"); 1066 labelMap.put("Sm", "G&"); 1067 labelMap.put("Sc", "G&"); 1068 labelMap.put("Sk", "G&"); 1069 labelMap.put("So", "G&"); 1070 1071 labelMap.put("Zl", "Cf"); 1072 labelMap.put("Zp", "Cf"); 1073 1074 rangeBreakSource = 1075 new UnicodeProperty.FilteredProperty( 1076 getUnicodePropertyFactory().getProperty("General_Category"), 1077 new UnicodeProperty.MapFilter(labelMap)) 1078 .setAllowValueAliasCollisions(true); 1079 1080 /* 1081 "Cn", // = Other, Not Assigned 0 1082 "Cc", // = Other, Control 15 1083 "Cf", // = Other, Format 16 1084 UnicodeProperty.UNUSED, // missing 1085 "Co", // = Other, Private Use 18 1086 "Cs", // = Other, Surrogate 19 1087 */ 1088 } 1089 return rangeBreakSource; 1090 } 1091 1092 /** 1093 * @param label 1094 */ setRangeBreakSource(UnicodeLabel label)1095 public BagFormatter setRangeBreakSource(UnicodeLabel label) { 1096 if (label == null) label = UnicodeLabel.NULL; 1097 rangeBreakSource = label; 1098 return this; 1099 } 1100 1101 /** 1102 * @return Returns the fixName. 1103 */ getFixName()1104 public Transliterator getFixName() { 1105 return fixName; 1106 } 1107 /** 1108 * @param fixName The fixName to set. 1109 */ setFixName(Transliterator fixName)1110 public BagFormatter setFixName(Transliterator fixName) { 1111 this.fixName = fixName; 1112 return this; 1113 } 1114 getTabber()1115 public Tabber getTabber() { 1116 return tabber; 1117 } 1118 setTabber(Tabber tabber)1119 public void setTabber(Tabber tabber) { 1120 this.tabber = tabber; 1121 } 1122 isShowTotal()1123 public boolean isShowTotal() { 1124 return showTotal; 1125 } 1126 setShowTotal(boolean showTotal)1127 public void setShowTotal(boolean showTotal) { 1128 this.showTotal = showTotal; 1129 } 1130 } 1131