• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2002-2016, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 package org.unicode.cldr.util.props;
8 
9 import com.ibm.icu.impl.Utility;
10 import com.ibm.icu.text.NumberFormat;
11 import com.ibm.icu.text.Transliterator;
12 import com.ibm.icu.text.UTF16;
13 import com.ibm.icu.text.UnicodeSet;
14 import java.io.PrintWriter;
15 import java.io.StringWriter;
16 import java.text.MessageFormat;
17 import java.util.Collection;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Locale;
21 import java.util.Map;
22 import org.unicode.cldr.draft.FileUtilities;
23 import org.unicode.cldr.util.Tabber;
24 import org.unicode.cldr.util.Visitor;
25 
26 public class BagFormatter {
27     static final boolean DEBUG = false;
28     public static final boolean SHOW_FILES;
29 
30     static {
31         boolean showFiles = false;
32         try {
33             showFiles = System.getProperty("SHOW_FILES") != null;
34         } catch (SecurityException e) {
35         }
36         SHOW_FILES = showFiles;
37     }
38 
39     public static final PrintWriter CONSOLE = new PrintWriter(System.out, true);
40 
41     private static PrintWriter log = CONSOLE;
42 
43     private boolean abbreviated = false;
44     private String separator = ",";
45     private String prefix = "[";
46     private String suffix = "]";
47     private UnicodeProperty.Factory source;
48     private UnicodeLabel nameSource;
49     private UnicodeLabel labelSource;
50     private UnicodeLabel rangeBreakSource;
51     private UnicodeLabel valueSource;
52     private String propName = "";
53     private boolean showCount = true;
54     // private boolean suppressReserved = true;
55     private boolean hexValue = false;
56     private static final String NULL_VALUE = "_NULL_VALUE_";
57     private int fullTotal = -1;
58     private boolean showTotal = true;
59     private String lineSeparator = System.lineSeparator();
60     private Tabber tabber = new Tabber.MonoTabber();
61 
62     /**
63      * Compare two UnicodeSets, and show the differences
64      *
65      * @param name1 name of first set to be compared
66      * @param set1 first set
67      * @param name2 name of second set to be compared
68      * @param set2 second set
69      * @return formatted string
70      */
showSetDifferences(String name1, UnicodeSet set1, String name2, UnicodeSet set2)71     public String showSetDifferences(String name1, UnicodeSet set1, String name2, UnicodeSet set2) {
72 
73         StringWriter result = new StringWriter();
74         showSetDifferences(new PrintWriter(result), name1, set1, name2, set2);
75         result.flush();
76         return result.getBuffer().toString();
77     }
78 
showSetDifferences(String name1, Collection set1, String name2, Collection set2)79     public String showSetDifferences(String name1, Collection set1, String name2, Collection set2) {
80 
81         StringWriter result = new StringWriter();
82         showSetDifferences(new PrintWriter(result), name1, set1, name2, set2);
83         result.flush();
84         return result.getBuffer().toString();
85     }
86 
showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2)87     public void showSetDifferences(
88             PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2) {
89         showSetDifferences(pw, name1, set1, name2, set2, -1);
90     }
91     /**
92      * Compare two UnicodeSets, and show the differences
93      *
94      * @param name1 name of first set to be compared
95      * @param set1 first set
96      * @param name2 name of second set to be compared
97      * @param set2 second set
98      */
showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2, int flags)99     public void showSetDifferences(
100             PrintWriter pw,
101             String name1,
102             UnicodeSet set1,
103             String name2,
104             UnicodeSet set2,
105             int flags) {
106         if (pw == null) pw = FileUtilities.CONSOLE;
107         String[] names = {name1, name2};
108 
109         UnicodeSet temp;
110 
111         if ((flags & 1) != 0) {
112             temp = new UnicodeSet(set1).removeAll(set2);
113             pw.print(lineSeparator);
114             pw.print(inOut.format(names));
115             pw.print(lineSeparator);
116             showSetNames(pw, temp);
117         }
118 
119         if ((flags & 2) != 0) {
120             temp = new UnicodeSet(set2).removeAll(set1);
121             pw.print(lineSeparator);
122             pw.print(outIn.format(names));
123             pw.print(lineSeparator);
124             showSetNames(pw, temp);
125         }
126 
127         if ((flags & 4) != 0) {
128             temp = new UnicodeSet(set2).retainAll(set1);
129             pw.print(lineSeparator);
130             pw.print(inIn.format(names));
131             pw.print(lineSeparator);
132             showSetNames(pw, temp);
133         }
134         pw.flush();
135     }
136 
showSetDifferences( PrintWriter pw, String name1, Collection set1, String name2, Collection set2)137     public void showSetDifferences(
138             PrintWriter pw, String name1, Collection set1, String name2, Collection set2) {
139 
140         if (pw == null) pw = FileUtilities.CONSOLE;
141         String[] names = {name1, name2};
142         // damn'd collection doesn't have a clone, so
143         // we go with Set, even though that
144         // may not preserve order and duplicates
145         Collection temp = new HashSet(set1);
146         temp.removeAll(set2);
147         pw.println();
148         pw.println(inOut.format(names));
149         showSetNames(pw, temp);
150 
151         temp.clear();
152         temp.addAll(set2);
153         temp.removeAll(set1);
154         pw.println();
155         pw.println(outIn.format(names));
156         showSetNames(pw, temp);
157 
158         temp.clear();
159         temp.addAll(set1);
160         temp.retainAll(set2);
161         pw.println();
162         pw.println(inIn.format(names));
163         showSetNames(pw, temp);
164     }
165 
166     /**
167      * Returns a list of items in the collection, with each separated by the separator. Each item
168      * must not be null; its toString() is called for a printable representation
169      *
170      * @param c source collection
171      * @return a String representation of the list
172      */
showSetNames(Object c)173     public String showSetNames(Object c) {
174         StringWriter buffer = new StringWriter();
175         PrintWriter output = new PrintWriter(buffer);
176         showSetNames(output, c);
177         return buffer.toString();
178     }
179 
180     /**
181      * Returns a list of items in the collection, with each separated by the separator. Each item
182      * must not be null; its toString() is called for a printable representation
183      *
184      * @param output destination to which to write names
185      * @param c source collection
186      */
showSetNames(PrintWriter output, Object c)187     public void showSetNames(PrintWriter output, Object c) {
188         mainVisitor.doAt(c, output);
189         output.flush();
190     }
191 
getAbbreviatedName(String src, String pattern, String substitute)192     public String getAbbreviatedName(String src, String pattern, String substitute) {
193 
194         int matchEnd = NameIterator.findMatchingEnd(src, pattern);
195         int sdiv = src.length() - matchEnd;
196         int pdiv = pattern.length() - matchEnd;
197         StringBuffer result = new StringBuffer();
198         addMatching(src.substring(0, sdiv), pattern.substring(0, pdiv), substitute, result);
199         addMatching(src.substring(sdiv), pattern.substring(pdiv), substitute, result);
200         return result.toString();
201     }
202 
203     public abstract static class Relation {
getRelation(String a, String b)204         public abstract String getRelation(String a, String b);
205     }
206 
207     static class NullRelation extends Relation {
208         @Override
getRelation(String a, String b)209         public String getRelation(String a, String b) {
210             return "";
211         }
212     }
213 
214     private Relation r = new NullRelation();
215 
setRelation(Relation r)216     public BagFormatter setRelation(Relation r) {
217         this.r = r;
218         return this; // for chaining
219     }
220 
getRelation()221     public Relation getRelation() {
222         return r;
223     }
224 
225     /*
226     r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
227     */
228     /*
229     static final UnicodeSet NO_NAME =
230         new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
231     static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
232     static final UnicodeSet NAME_CHARACTERS =
233         new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
234 
235     public UnicodeSet getSetForName(String namePattern) {
236         UnicodeSet result = new UnicodeSet();
237         Matcher m = Pattern.compile(namePattern).matcher("");
238         // check for no-name items, and add in bulk
239         m.reset("<no name>");
240         if (m.matches()) {
241             result.addAll(NO_NAME);
242         }
243         // check all others
244         UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
245         while (usi.next()) {
246             String name = getName(usi.codepoint);
247             if (name == null)
248                 continue;
249             m.reset(name);
250             if (m.matches()) {
251                 result.add(usi.codepoint);
252             }
253         }
254         // Note: if Regex had some API so that if we could tell that
255         // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
256         // then we could optimize by skipping whole swathes of characters
257         return result;
258     }
259      */
260 
setMergeRanges(boolean in)261     public BagFormatter setMergeRanges(boolean in) {
262         mergeRanges = in;
263         return this;
264     }
265 
setShowSetAlso(boolean b)266     public BagFormatter setShowSetAlso(boolean b) {
267         showSetAlso = b;
268         return this;
269     }
270 
getName(int codePoint)271     public String getName(int codePoint) {
272         return getName("", codePoint, codePoint);
273     }
274 
getName(String sep, int start, int end)275     public String getName(String sep, int start, int end) {
276         if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return "";
277         String result = getName(start, false);
278         if (start == end) return sep + result;
279         String endString = getName(end, false);
280         if (result.length() == 0 && endString.length() == 0) return sep;
281         if (abbreviated) endString = getAbbreviatedName(endString, result, "~");
282         return sep + result + ".." + endString;
283     }
284 
getName(String s)285     public String getName(String s) {
286         return getName(s, false);
287     }
288 
289     public static class NameLabel extends UnicodeLabel {
290         UnicodeProperty nameProp;
291         UnicodeSet control;
292         UnicodeSet private_use;
293         UnicodeSet noncharacter;
294         UnicodeSet surrogate;
295 
NameLabel(UnicodeProperty.Factory source)296         public NameLabel(UnicodeProperty.Factory source) {
297             nameProp = source.getProperty("Name");
298             control = source.getSet("gc=Cc");
299             private_use = source.getSet("gc=Co");
300             surrogate = source.getSet("gc=Cs");
301             noncharacter = source.getSet("noncharactercodepoint=yes");
302         }
303 
304         @Override
getValue(int codePoint, boolean isShort)305         public String getValue(int codePoint, boolean isShort) {
306             String hcp = !isShort ? "U+" + Utility.hex(codePoint, 4) + " " : "";
307             String result = nameProp.getValue(codePoint);
308             if (result != null) return hcp + result;
309             if (control.contains(codePoint)) {
310                 return "<control-" + Utility.hex(codePoint, 4) + ">";
311             }
312             if (private_use.contains(codePoint)) {
313                 return "<private-use-" + Utility.hex(codePoint, 4) + ">";
314             }
315             if (surrogate.contains(codePoint)) {
316                 return "<surrogate-" + Utility.hex(codePoint, 4) + ">";
317             }
318             if (noncharacter.contains(codePoint)) {
319                 return "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
320             }
321             // if (suppressReserved) return "";
322             return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">";
323         }
324     }
325 
326     // refactored
getName(int codePoint, boolean withCodePoint)327     public String getName(int codePoint, boolean withCodePoint) {
328         String result = getNameSource().getValue(codePoint, !withCodePoint);
329         return fixName == null ? result : fixName.transliterate(result);
330     }
331 
getName(String s, boolean withCodePoint)332     public String getName(String s, boolean withCodePoint) {
333         String result = getNameSource().getValue(s, separator, !withCodePoint);
334         return fixName == null ? result : fixName.transliterate(result);
335     }
336 
hex(String s)337     public String hex(String s) {
338         return hex(s, separator);
339     }
340 
hex(String s, String sep)341     public String hex(String s, String sep) {
342         return UnicodeLabel.HEX.getValue(s, sep, true);
343     }
344 
hex(int start, int end)345     public String hex(int start, int end) {
346         String s = Utility.hex(start, 4);
347         if (start == end) return s;
348         return s + ".." + Utility.hex(end, 4);
349     }
350 
setUnicodePropertyFactory(UnicodeProperty.Factory source)351     public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {
352         this.source = source;
353         return this;
354     }
355 
getUnicodePropertyFactory()356     private UnicodeProperty.Factory getUnicodePropertyFactory() {
357         if (source == null) source = ICUPropertyFactory.make();
358         return source;
359     }
360 
BagFormatter()361     public BagFormatter() {}
362 
BagFormatter(UnicodeProperty.Factory source)363     public BagFormatter(UnicodeProperty.Factory source) {
364         setUnicodePropertyFactory(source);
365     }
366 
join(Object o)367     public String join(Object o) {
368         return labelVisitor.join(o);
369     }
370 
371     // ===== PRIVATES =====
372 
373     private Join labelVisitor = new Join();
374 
375     private boolean mergeRanges = true;
376     private Transliterator showLiteral = null;
377     private Transliterator fixName = null;
378     private boolean showSetAlso = false;
379 
380     private RangeFinder rf = new RangeFinder();
381 
382     private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
383     private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
384     private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");
385 
386     private MyVisitor mainVisitor = new MyVisitor();
387 
388     /*
389     private String getLabels(int start, int end) {
390         Set names = new TreeSet();
391         for (int cp = start; cp <= end; ++cp) {
392             names.add(getLabel(cp));
393         }
394         return labelVisitor.join(names);
395     }
396      */
397 
addMatching(String src, String pattern, String substitute, StringBuffer result)398     private void addMatching(String src, String pattern, String substitute, StringBuffer result) {
399         NameIterator n1 = new NameIterator(src);
400         NameIterator n2 = new NameIterator(pattern);
401         boolean first = true;
402         while (true) {
403             String s1 = n1.next();
404             if (s1 == null) break;
405             String s2 = n2.next();
406             if (!first) result.append(" ");
407             first = false;
408             if (s1.equals(s2)) result.append(substitute);
409             else result.append(s1);
410         }
411     }
412 
413     private static NumberFormat nf = NumberFormat.getIntegerInstance(Locale.ENGLISH);
414 
415     static {
416         nf.setGroupingUsed(false);
417     }
418 
419     private int maxWidthOverride = -1;
420     private int maxLabelWidthOverride = -1;
421 
setValueWidthOverride(int maxWidthOverride)422     public BagFormatter setValueWidthOverride(int maxWidthOverride) {
423         this.maxWidthOverride = maxWidthOverride;
424         return this;
425     }
426 
getValueWidthOverride()427     public int getValueWidthOverride() {
428         return maxWidthOverride;
429     }
430 
setLabelWidthOverride(int maxWidthOverride)431     public BagFormatter setLabelWidthOverride(int maxWidthOverride) {
432         this.maxLabelWidthOverride = maxWidthOverride;
433         return this;
434     }
435 
getLabelWidthOverride()436     public int getLabelWidthOverride() {
437         return maxLabelWidthOverride;
438     }
439 
440     private class MyVisitor extends Visitor {
441         private PrintWriter output;
442         String commentSeparator;
443         int counter;
444         int valueSize;
445         int labelSize;
446         boolean isHtml;
447         boolean inTable = false;
448 
toOutput(String s)449         public void toOutput(String s) {
450             if (isHtml) {
451                 if (inTable) {
452                     output.print("</table>");
453                     inTable = false;
454                 }
455                 output.print("<p>");
456             }
457             output.print(s);
458             if (isHtml) output.println("</p>");
459             else output.print(lineSeparator);
460         }
461 
toTable(String s)462         public void toTable(String s) {
463             if (isHtml && !inTable) {
464                 output.print("<table>");
465                 inTable = true;
466             }
467             output.print(tabber.process(s) + lineSeparator);
468         }
469 
doAt(Object c, PrintWriter out)470         public void doAt(Object c, PrintWriter out) {
471             output = out;
472             isHtml = tabber instanceof Tabber.HTMLTabber;
473             counter = 0;
474 
475             tabber.clear();
476             // old:
477             // 0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>
478             // new
479             // 0009..000D    ; White_Space #Cc  [5] <control>..<control>
480             tabber.add(mergeRanges ? 14 : 6, Tabber.LEFT);
481 
482             if (propName.length() > 0) {
483                 tabber.add(propName.length() + 2, Tabber.LEFT);
484             }
485 
486             valueSize =
487                     maxWidthOverride > 0
488                             ? maxWidthOverride
489                             : getValueSource().getMaxWidth(shortValue);
490 
491             if (DEBUG) System.out.println("ValueSize: " + valueSize);
492             if (valueSize > 0) {
493                 tabber.add(valueSize + 2, Tabber.LEFT); // value
494             }
495 
496             tabber.add(3, Tabber.LEFT); // comment character
497 
498             labelSize =
499                     maxLabelWidthOverride > 0
500                             ? maxLabelWidthOverride
501                             : getLabelSource(true).getMaxWidth(shortLabel);
502             if (labelSize > 0) {
503                 tabber.add(labelSize + 1, Tabber.LEFT); // value
504             }
505 
506             if (mergeRanges && showCount) {
507                 tabber.add(5, Tabber.RIGHT);
508             }
509 
510             if (showLiteral != null) {
511                 tabber.add(4, Tabber.LEFT);
512             }
513             // myTabber.add(7,Tabber.LEFT);
514 
515             commentSeparator =
516                     (showCount
517                                     || showLiteral != null
518                                     || getLabelSource(true) != UnicodeLabel.NULL
519                                     || getNameSource() != UnicodeLabel.NULL)
520                             ? "\t #"
521                             : "";
522 
523             if (DEBUG) System.out.println("Tabber: " + tabber.toString());
524             if (DEBUG)
525                 System.out.println(
526                         "Tabber: "
527                                 + tabber.process(
528                                         "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER"));
529             doAt(c);
530         }
531 
532         @SuppressWarnings("unused")
format(Object o)533         public String format(Object o) {
534             StringWriter sw = new StringWriter();
535             PrintWriter pw = new PrintWriter(sw);
536             doAt(o);
537             pw.flush();
538             String result = sw.getBuffer().toString();
539             pw.close();
540             return result;
541         }
542 
543         @Override
doBefore(Object container, Object o)544         protected void doBefore(Object container, Object o) {
545             if (showSetAlso && container instanceof UnicodeSet) {
546                 toOutput("#" + container);
547             }
548         }
549 
550         @Override
doBetween(Object container, Object lastItem, Object nextItem)551         protected void doBetween(Object container, Object lastItem, Object nextItem) {}
552 
553         @Override
doAfter(Object container, Object o)554         protected void doAfter(Object container, Object o) {
555             if (fullTotal != -1 && fullTotal != counter) {
556                 if (showTotal) {
557                     toOutput("");
558                     toOutput(
559                             "# The above property value applies to "
560                                     + nf.format(fullTotal - counter)
561                                     + " code points not listed here.");
562                     toOutput("# Total code points: " + nf.format(fullTotal));
563                 }
564                 fullTotal = -1;
565             } else if (showTotal) {
566                 toOutput("");
567                 toOutput("# Total code points: " + nf.format(counter));
568             }
569         }
570 
571         @Override
doSimpleAt(Object o)572         protected void doSimpleAt(Object o) {
573             if (o instanceof Map.Entry) {
574                 Map.Entry oo = (Map.Entry) o;
575                 Object key = oo.getKey();
576                 Object value = oo.getValue();
577                 doBefore(o, key);
578                 doAt(key);
579                 output.println("\u2192");
580                 doAt(value);
581                 doAfter(o, value);
582                 counter++;
583             } else if (o instanceof Visitor.CodePointRange) {
584                 doAt((Visitor.CodePointRange) o);
585             } else {
586                 String thing = o.toString();
587                 String value =
588                         getValueSource() == UnicodeLabel.NULL
589                                 ? ""
590                                 : getValueSource().getValue(thing, ",", true);
591                 if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value;
592                 String label =
593                         getLabelSource(true) == UnicodeLabel.NULL
594                                 ? ""
595                                 : getLabelSource(true).getValue(thing, ",", true);
596                 if (label.length() != 0) label = " " + label;
597                 toTable(
598                         hex(thing)
599                                 + value
600                                 + commentSeparator
601                                 + label
602                                 + insertLiteral(thing)
603                                 + "\t"
604                                 + getName(thing));
605                 counter++;
606             }
607         }
608 
doAt(Visitor.CodePointRange usi)609         protected void doAt(Visitor.CodePointRange usi) {
610             if (!mergeRanges) {
611                 for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
612                     showLine(cp, cp);
613                 }
614             } else {
615                 rf.reset(usi.codepoint, usi.codepointEnd + 1);
616                 while (rf.next()) {
617                     showLine(rf.start, rf.limit - 1);
618                 }
619             }
620         }
621 
showLine(int start, int end)622         private void showLine(int start, int end) {
623             String label = getLabelSource(true).getValue(start, shortLabel);
624             String value = getValue(start, shortValue);
625             if (value == NULL_VALUE) return;
626 
627             counter += end - start + 1;
628             String pn = propName;
629             if (pn.length() != 0) {
630                 pn = "\t; " + pn;
631             }
632             if (valueSize > 0) {
633                 value = "\t; " + value;
634             } else if (value.length() > 0) {
635                 throw new IllegalArgumentException(
636                         "maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));
637             }
638             if (labelSize > 0) {
639                 label = "\t" + label;
640             } else if (label.length() > 0) {
641                 throw new IllegalArgumentException(
642                         "maxwidth bogus "
643                                 + label
644                                 + ", "
645                                 + getLabelSource(true).getMaxWidth(shortLabel));
646             }
647 
648             String count = "";
649             if (mergeRanges && showCount) {
650                 if (end == start) count = "\t";
651                 else count = "\t [" + nf.format(end - start + 1) + "]";
652             }
653 
654             toTable(
655                     hex(start, end)
656                             + pn
657                             + value
658                             + commentSeparator
659                             + label
660                             + count
661                             + insertLiteral(start, end)
662                             + getName("\t ", start, end));
663         }
664 
insertLiteral(String thing)665         private String insertLiteral(String thing) {
666             return (showLiteral == null ? "" : " \t(" + showLiteral.transliterate(thing) + ") ");
667         }
668 
insertLiteral(int start, int end)669         private String insertLiteral(int start, int end) {
670             return (showLiteral == null
671                     ? ""
672                     : " \t("
673                             + showLiteral.transliterate(UTF16.valueOf(start))
674                             + ((start != end)
675                                     ? (".." + showLiteral.transliterate(UTF16.valueOf(end)))
676                                     : "")
677                             + ") ");
678         }
679         /*
680         private String insertLiteral(int cp) {
681             return (showLiteral == null ? ""
682                 :  " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");
683         }
684          */
685     }
686 
687     /**
688      * Iterate through a string, breaking at words.
689      *
690      * @author Davis
691      */
692     private static class NameIterator {
693         String source;
694         int position;
695         int limit;
696 
NameIterator(String source)697         NameIterator(String source) {
698             this.source = source;
699             this.limit = source.length();
700         }
701         /**
702          * Find next word, including trailing spaces
703          *
704          * @return the next word
705          */
next()706         String next() {
707             if (position >= limit) return null;
708             int pos = source.indexOf(' ', position);
709             if (pos < 0 || pos >= limit) pos = limit;
710             String result = source.substring(position, pos);
711             position = pos + 1;
712             return result;
713         }
714 
findMatchingEnd(String s1, String s2)715         static int findMatchingEnd(String s1, String s2) {
716             int i = s1.length();
717             int j = s2.length();
718             try {
719                 while (true) {
720                     --i; // decrement both before calling function!
721                     --j;
722                     if (s1.charAt(i) != s2.charAt(j)) break;
723                 }
724             } catch (Exception e) {
725             } // run off start
726 
727             ++i; // counteract increment
728             i = s1.indexOf(' ', i); // move forward to space
729             if (i < 0) return 0;
730             return s1.length() - i;
731         }
732     }
733 
734     private class RangeFinder {
735         int start, limit;
736         private int veryLimit;
737         // String label, value;
reset(int rangeStart, int rangeLimit)738         void reset(int rangeStart, int rangeLimit) {
739             limit = rangeStart;
740             veryLimit = rangeLimit;
741         }
742 
next()743         boolean next() {
744             if (limit >= veryLimit) return false;
745             start = limit; // set to end of last
746             String label = getLabelSource(false).getValue(limit, true);
747             String value = getValue(limit, true);
748             String breaker = getRangeBreakSource().getValue(limit, true);
749             if (DEBUG && 0x3FFD < limit && limit < 0x9FD6) {
750                 System.out.println(
751                         Utility.hex(limit)
752                                 + ", Label: "
753                                 + label
754                                 + ", Value: "
755                                 + value
756                                 + ", Break: "
757                                 + breaker);
758             }
759             limit++;
760             for (; limit < veryLimit; limit++) {
761                 String s = getLabelSource(false).getValue(limit, true);
762                 String v = getValue(limit, true);
763                 String b = getRangeBreakSource().getValue(limit, true);
764                 if (DEBUG && limit > 0x9FD4) {
765                     System.out.println(
766                             Utility.hex(limit)
767                                     + ", *Label: "
768                                     + s
769                                     + ", Value: "
770                                     + v
771                                     + ", Break: "
772                                     + b);
773                 }
774                 if (!equalTo(s, label) || !equalTo(v, value) || !equalTo(b, breaker)) {
775                     break;
776                 }
777             }
778             // at this point, limit is the first item that has a different label than source
779             // OR, we got to the end, and limit == veryLimit
780             return true;
781         }
782     }
783 
equalTo(Object a, Object b)784     boolean equalTo(Object a, Object b) {
785         if (a == b) return true;
786         if (a == null) return false;
787         return a.equals(b);
788     }
789 
790     boolean shortLabel = true;
791     boolean shortValue = true;
792 
getPrefix()793     public String getPrefix() {
794         return prefix;
795     }
796 
getSuffix()797     public String getSuffix() {
798         return suffix;
799     }
800 
setPrefix(String string)801     public BagFormatter setPrefix(String string) {
802         prefix = string;
803         return this;
804     }
805 
setSuffix(String string)806     public BagFormatter setSuffix(String string) {
807         suffix = string;
808         return this;
809     }
810 
isAbbreviated()811     public boolean isAbbreviated() {
812         return abbreviated;
813     }
814 
setAbbreviated(boolean b)815     public BagFormatter setAbbreviated(boolean b) {
816         abbreviated = b;
817         return this;
818     }
819 
getLabelSource(boolean visible)820     public UnicodeLabel getLabelSource(boolean visible) {
821         if (labelSource == null) {
822             Map labelMap = new HashMap();
823             // labelMap.put("Lo","L&");
824             labelMap.put("Lu", "L&");
825             labelMap.put("Lt", "L&");
826             labelMap.put("Ll", "L&");
827             labelSource =
828                     new UnicodeProperty.FilteredProperty(
829                                     getUnicodePropertyFactory().getProperty("General_Category"),
830                                     new UnicodeProperty.MapFilter(labelMap))
831                             .setAllowValueAliasCollisions(true);
832         }
833         return labelSource;
834     }
835 
836     /**
837      * @deprecated
838      */
839     @Deprecated
addAll(UnicodeSet source, Collection target)840     public static void addAll(UnicodeSet source, Collection target) {
841         source.addAllTo(target);
842     }
843 
844     // UTILITIES
845 
846     public static final Transliterator hex =
847             Transliterator.getInstance("[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");
848 
getSeparator()849     public String getSeparator() {
850         return separator;
851     }
852 
setSeparator(String string)853     public BagFormatter setSeparator(String string) {
854         separator = string;
855         return this;
856     }
857 
getShowLiteral()858     public Transliterator getShowLiteral() {
859         return showLiteral;
860     }
861 
setShowLiteral(Transliterator transliterator)862     public BagFormatter setShowLiteral(Transliterator transliterator) {
863         showLiteral = transliterator;
864         return this;
865     }
866 
867     // ===== CONVENIENCES =====
868     private class Join extends Visitor {
869         StringBuffer output = new StringBuffer();
870 
871         @SuppressWarnings("unused")
872         int depth = 0;
873 
join(Object o)874         String join(Object o) {
875             output.setLength(0);
876             doAt(o);
877             return output.toString();
878         }
879 
880         @Override
doBefore(Object container, Object item)881         protected void doBefore(Object container, Object item) {
882             ++depth;
883             output.append(prefix);
884         }
885 
886         @Override
doAfter(Object container, Object item)887         protected void doAfter(Object container, Object item) {
888             output.append(suffix);
889             --depth;
890         }
891 
892         @Override
doBetween(Object container, Object lastItem, Object nextItem)893         protected void doBetween(Object container, Object lastItem, Object nextItem) {
894             output.append(separator);
895         }
896 
897         @Override
doSimpleAt(Object o)898         protected void doSimpleAt(Object o) {
899             if (o != null) output.append(o.toString());
900         }
901     }
902 
903     /**
904      * @param label
905      */
setLabelSource(UnicodeLabel label)906     public BagFormatter setLabelSource(UnicodeLabel label) {
907         if (label == null) label = UnicodeLabel.NULL;
908         labelSource = label;
909         return this;
910     }
911 
912     /**
913      * @return the NameLable representing the source
914      */
getNameSource()915     public UnicodeLabel getNameSource() {
916         if (nameSource == null) {
917             nameSource = new NameLabel(getUnicodePropertyFactory());
918         }
919         return nameSource;
920     }
921 
922     /**
923      * @param label
924      */
setNameSource(UnicodeLabel label)925     public BagFormatter setNameSource(UnicodeLabel label) {
926         if (label == null) label = UnicodeLabel.NULL;
927         nameSource = label;
928         return this;
929     }
930 
931     /**
932      * @return the UnicodeLabel representing the value
933      */
getValueSource()934     public UnicodeLabel getValueSource() {
935         if (valueSource == null) valueSource = UnicodeLabel.NULL;
936         return valueSource;
937     }
938 
getValue(int cp, boolean shortVal)939     private String getValue(int cp, boolean shortVal) {
940         String result = getValueSource().getValue(cp, shortVal);
941         if (result == null) return NULL_VALUE;
942         if (hexValue) result = hex(result, " ");
943         return result;
944     }
945 
946     /**
947      * @param label
948      */
setValueSource(UnicodeLabel label)949     public BagFormatter setValueSource(UnicodeLabel label) {
950         if (label == null) label = UnicodeLabel.NULL;
951         valueSource = label;
952         return this;
953     }
954 
setValueSource(String label)955     public BagFormatter setValueSource(String label) {
956         return setValueSource(new UnicodeLabel.Constant(label));
957     }
958 
959     /**
960      * @return true if showCount is true
961      */
isShowCount()962     public boolean isShowCount() {
963         return showCount;
964     }
965 
966     /**
967      * @param b true to show the count
968      * @return this (for chaining)
969      */
setShowCount(boolean b)970     public BagFormatter setShowCount(boolean b) {
971         showCount = b;
972         return this;
973     }
974 
975     /**
976      * @return the property name
977      */
getPropName()978     public String getPropName() {
979         return propName;
980     }
981 
982     /**
983      * @param string
984      * @return this (for chaining)
985      */
setPropName(String string)986     public BagFormatter setPropName(String string) {
987         if (string == null) string = "";
988         propName = string;
989         return this;
990     }
991 
992     /**
993      * @return true if this is a hexValue
994      */
isHexValue()995     public boolean isHexValue() {
996         return hexValue;
997     }
998 
999     /**
1000      * @param b
1001      * @return this (for chaining)
1002      */
setHexValue(boolean b)1003     public BagFormatter setHexValue(boolean b) {
1004         hexValue = b;
1005         return this;
1006     }
1007 
1008     /**
1009      * @return the full total
1010      */
getFullTotal()1011     public int getFullTotal() {
1012         return fullTotal;
1013     }
1014 
1015     /**
1016      * @param i set the full total
1017      * @return this (for chaining)
1018      */
setFullTotal(int i)1019     public BagFormatter setFullTotal(int i) {
1020         fullTotal = i;
1021         return this;
1022     }
1023 
1024     /**
1025      * @return the line separator
1026      */
getLineSeparator()1027     public String getLineSeparator() {
1028         return lineSeparator;
1029     }
1030 
1031     /**
1032      * @param string
1033      * @return this (for chaining)
1034      */
setLineSeparator(String string)1035     public BagFormatter setLineSeparator(String string) {
1036         lineSeparator = string;
1037         return this;
1038     }
1039 
1040     /**
1041      * @return the UnicodeLabel representing the range break source
1042      */
getRangeBreakSource()1043     public UnicodeLabel getRangeBreakSource() {
1044         if (rangeBreakSource == null) {
1045             Map labelMap = new HashMap();
1046             // reflects the code point types on p 25
1047             labelMap.put("Lo", "G&");
1048             labelMap.put("Lm", "G&");
1049             labelMap.put("Lu", "G&");
1050             labelMap.put("Lt", "G&");
1051             labelMap.put("Ll", "G&");
1052             labelMap.put("Mn", "G&");
1053             labelMap.put("Me", "G&");
1054             labelMap.put("Mc", "G&");
1055             labelMap.put("Nd", "G&");
1056             labelMap.put("Nl", "G&");
1057             labelMap.put("No", "G&");
1058             labelMap.put("Zs", "G&");
1059             labelMap.put("Pd", "G&");
1060             labelMap.put("Ps", "G&");
1061             labelMap.put("Pe", "G&");
1062             labelMap.put("Pc", "G&");
1063             labelMap.put("Po", "G&");
1064             labelMap.put("Pi", "G&");
1065             labelMap.put("Pf", "G&");
1066             labelMap.put("Sm", "G&");
1067             labelMap.put("Sc", "G&");
1068             labelMap.put("Sk", "G&");
1069             labelMap.put("So", "G&");
1070 
1071             labelMap.put("Zl", "Cf");
1072             labelMap.put("Zp", "Cf");
1073 
1074             rangeBreakSource =
1075                     new UnicodeProperty.FilteredProperty(
1076                                     getUnicodePropertyFactory().getProperty("General_Category"),
1077                                     new UnicodeProperty.MapFilter(labelMap))
1078                             .setAllowValueAliasCollisions(true);
1079 
1080             /*
1081             "Cn", // = Other, Not Assigned 0
1082             "Cc", // = Other, Control 15
1083             "Cf", // = Other, Format 16
1084             UnicodeProperty.UNUSED, // missing
1085             "Co", // = Other, Private Use 18
1086             "Cs", // = Other, Surrogate 19
1087              */
1088         }
1089         return rangeBreakSource;
1090     }
1091 
1092     /**
1093      * @param label
1094      */
setRangeBreakSource(UnicodeLabel label)1095     public BagFormatter setRangeBreakSource(UnicodeLabel label) {
1096         if (label == null) label = UnicodeLabel.NULL;
1097         rangeBreakSource = label;
1098         return this;
1099     }
1100 
1101     /**
1102      * @return Returns the fixName.
1103      */
getFixName()1104     public Transliterator getFixName() {
1105         return fixName;
1106     }
1107     /**
1108      * @param fixName The fixName to set.
1109      */
setFixName(Transliterator fixName)1110     public BagFormatter setFixName(Transliterator fixName) {
1111         this.fixName = fixName;
1112         return this;
1113     }
1114 
getTabber()1115     public Tabber getTabber() {
1116         return tabber;
1117     }
1118 
setTabber(Tabber tabber)1119     public void setTabber(Tabber tabber) {
1120         this.tabber = tabber;
1121     }
1122 
isShowTotal()1123     public boolean isShowTotal() {
1124         return showTotal;
1125     }
1126 
setShowTotal(boolean showTotal)1127     public void setShowTotal(boolean showTotal) {
1128         this.showTotal = showTotal;
1129     }
1130 }
1131