• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2002-2016, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  */
7 package org.unicode.cldr.util.props;
8 
9 import java.io.PrintWriter;
10 import java.io.StringWriter;
11 import java.text.MessageFormat;
12 import java.util.Collection;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Locale;
16 import java.util.Map;
17 
18 import org.unicode.cldr.draft.FileUtilities;
19 import org.unicode.cldr.util.Tabber;
20 import org.unicode.cldr.util.Visitor;
21 
22 import com.ibm.icu.impl.Utility;
23 import com.ibm.icu.text.NumberFormat;
24 import com.ibm.icu.text.Transliterator;
25 import com.ibm.icu.text.UTF16;
26 import com.ibm.icu.text.UnicodeSet;
27 
28 public class BagFormatter {
29     static final boolean DEBUG = false;
30     public static final boolean SHOW_FILES;
31     static {
32     boolean showFiles = false;
33     try {
34         showFiles = System.getProperty("SHOW_FILES") != null;
35     }
36     catch (SecurityException e) {
37     }
38     SHOW_FILES = showFiles;
39     }
40 
41     public static final PrintWriter CONSOLE = new PrintWriter(System.out,true);
42 
43     private static PrintWriter log = CONSOLE;
44 
45     private boolean abbreviated = false;
46     private String separator = ",";
47     private String prefix = "[";
48     private String suffix = "]";
49     private UnicodeProperty.Factory source;
50     private UnicodeLabel nameSource;
51     private UnicodeLabel labelSource;
52     private UnicodeLabel rangeBreakSource;
53     private UnicodeLabel valueSource;
54     private String propName = "";
55     private boolean showCount = true;
56     //private boolean suppressReserved = true;
57     private boolean hexValue = false;
58     private static final String NULL_VALUE = "_NULL_VALUE_";
59     private int fullTotal = -1;
60     private boolean showTotal = true;
61     private String lineSeparator = System.lineSeparator();
62     private Tabber tabber = new Tabber.MonoTabber();
63 
64     /**
65      * Compare two UnicodeSets, and show the differences
66      * @param name1 name of first set to be compared
67      * @param set1 first set
68      * @param name2 name of second set to be compared
69      * @param set2 second set
70      * @return formatted string
71      */
showSetDifferences( String name1, UnicodeSet set1, String name2, UnicodeSet set2)72     public String showSetDifferences(
73         String name1,
74         UnicodeSet set1,
75         String name2,
76         UnicodeSet set2) {
77 
78         StringWriter result = new StringWriter();
79         showSetDifferences(new PrintWriter(result),name1,set1,name2,set2);
80         result.flush();
81         return result.getBuffer().toString();
82     }
83 
showSetDifferences( String name1, Collection set1, String name2, Collection set2)84     public String showSetDifferences(
85         String name1,
86         Collection set1,
87         String name2,
88         Collection set2) {
89 
90         StringWriter result = new StringWriter();
91         showSetDifferences(new PrintWriter(result), name1, set1, name2, set2);
92         result.flush();
93         return result.getBuffer().toString();
94     }
95 
showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2)96     public void showSetDifferences(
97         PrintWriter pw,
98         String name1,
99         UnicodeSet set1,
100         String name2,
101         UnicodeSet set2) {
102         showSetDifferences(pw, name1, set1, name2, set2, -1);
103     }
104     /**
105      * Compare two UnicodeSets, and show the differences
106      * @param name1 name of first set to be compared
107      * @param set1 first set
108      * @param name2 name of second set to be compared
109      * @param set2 second set
110      */
showSetDifferences( PrintWriter pw, String name1, UnicodeSet set1, String name2, UnicodeSet set2, int flags)111     public void showSetDifferences(
112         PrintWriter pw,
113         String name1,
114         UnicodeSet set1,
115         String name2,
116         UnicodeSet set2,
117         int flags)
118     {
119         if (pw == null) pw = FileUtilities.CONSOLE;
120         String[] names = { name1, name2 };
121 
122         UnicodeSet temp;
123 
124         if ((flags&1) != 0) {
125             temp = new UnicodeSet(set1).removeAll(set2);
126             pw.print(lineSeparator);
127             pw.print(inOut.format(names));
128             pw.print(lineSeparator);
129             showSetNames(pw, temp);
130         }
131 
132         if ((flags&2) != 0) {
133             temp = new UnicodeSet(set2).removeAll(set1);
134             pw.print(lineSeparator);
135             pw.print(outIn.format(names));
136             pw.print(lineSeparator);
137             showSetNames(pw, temp);
138         }
139 
140         if ((flags&4) != 0) {
141             temp = new UnicodeSet(set2).retainAll(set1);
142             pw.print(lineSeparator);
143             pw.print(inIn.format(names));
144             pw.print(lineSeparator);
145             showSetNames(pw, temp);
146         }
147         pw.flush();
148     }
149 
showSetDifferences( PrintWriter pw, String name1, Collection set1, String name2, Collection set2)150     public void showSetDifferences(
151         PrintWriter pw,
152         String name1,
153         Collection set1,
154         String name2,
155         Collection set2) {
156 
157         if (pw == null) pw = FileUtilities.CONSOLE;
158         String[] names = { name1, name2 };
159         // damn'd collection doesn't have a clone, so
160         // we go with Set, even though that
161         // may not preserve order and duplicates
162         Collection temp = new HashSet(set1);
163         temp.removeAll(set2);
164         pw.println();
165         pw.println(inOut.format(names));
166         showSetNames(pw, temp);
167 
168         temp.clear();
169         temp.addAll(set2);
170         temp.removeAll(set1);
171         pw.println();
172         pw.println(outIn.format(names));
173         showSetNames(pw, temp);
174 
175         temp.clear();
176         temp.addAll(set1);
177         temp.retainAll(set2);
178         pw.println();
179         pw.println(inIn.format(names));
180         showSetNames(pw, temp);
181     }
182 
183     /**
184      * Returns a list of items in the collection, with each separated by the separator.
185      * Each item must not be null; its toString() is called for a printable representation
186      * @param c source collection
187      * @return a String representation of the list
188      */
showSetNames(Object c)189     public String showSetNames(Object c) {
190         StringWriter buffer = new StringWriter();
191         PrintWriter output = new PrintWriter(buffer);
192         showSetNames(output,c);
193         return buffer.toString();
194     }
195 
196     /**
197      * Returns a list of items in the collection, with each separated by the separator.
198      * Each item must not be null; its toString() is called for a printable representation
199      * @param output destination to which to write names
200      * @param c source collection
201      */
showSetNames(PrintWriter output, Object c)202     public void showSetNames(PrintWriter output, Object c) {
203         mainVisitor.doAt(c, output);
204         output.flush();
205     }
206 
getAbbreviatedName( String src, String pattern, String substitute)207     public String getAbbreviatedName(
208         String src,
209         String pattern,
210         String substitute) {
211 
212         int matchEnd = NameIterator.findMatchingEnd(src, pattern);
213         int sdiv = src.length() - matchEnd;
214         int pdiv = pattern.length() - matchEnd;
215         StringBuffer result = new StringBuffer();
216         addMatching(
217             src.substring(0, sdiv),
218             pattern.substring(0, pdiv),
219             substitute,
220             result);
221         addMatching(
222             src.substring(sdiv),
223             pattern.substring(pdiv),
224             substitute,
225             result);
226         return result.toString();
227     }
228 
229     abstract public static class Relation {
getRelation(String a, String b)230         abstract public String getRelation(String a, String b);
231     }
232 
233     static class NullRelation extends Relation {
234         @Override
getRelation(String a, String b)235         public String getRelation(String a, String b) { return ""; }
236     }
237 
238     private Relation r = new NullRelation();
239 
setRelation(Relation r)240     public BagFormatter setRelation(Relation r) {
241         this.r = r;
242         return this; // for chaining
243     }
244 
getRelation()245     public Relation getRelation() {
246         return r;
247     }
248 
249     /*
250      r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
251      */
252     /*
253     static final UnicodeSet NO_NAME =
254         new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
255     static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
256     static final UnicodeSet NAME_CHARACTERS =
257         new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
258 
259     public UnicodeSet getSetForName(String namePattern) {
260         UnicodeSet result = new UnicodeSet();
261         Matcher m = Pattern.compile(namePattern).matcher("");
262         // check for no-name items, and add in bulk
263         m.reset("<no name>");
264         if (m.matches()) {
265             result.addAll(NO_NAME);
266         }
267         // check all others
268         UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
269         while (usi.next()) {
270             String name = getName(usi.codepoint);
271             if (name == null)
272                 continue;
273             m.reset(name);
274             if (m.matches()) {
275                 result.add(usi.codepoint);
276             }
277         }
278         // Note: if Regex had some API so that if we could tell that
279         // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
280         // then we could optimize by skipping whole swathes of characters
281         return result;
282     }
283      */
284 
setMergeRanges(boolean in)285     public BagFormatter setMergeRanges(boolean in) {
286         mergeRanges = in;
287         return this;
288     }
setShowSetAlso(boolean b)289     public BagFormatter setShowSetAlso(boolean b) {
290         showSetAlso = b;
291         return this;
292     }
293 
getName(int codePoint)294     public String getName(int codePoint) {
295         return getName("", codePoint, codePoint);
296     }
297 
getName(String sep, int start, int end)298     public String getName(String sep, int start, int end) {
299         if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return "";
300         String result = getName(start, false);
301         if (start == end) return sep + result;
302         String endString = getName(end, false);
303         if (result.length() == 0 && endString.length() == 0) return sep;
304         if (abbreviated) endString = getAbbreviatedName(endString,result,"~");
305         return sep + result + ".." + endString;
306     }
307 
getName(String s)308     public String getName(String s) {
309         return getName(s, false);
310     }
311 
312     public static class NameLabel extends UnicodeLabel {
313         UnicodeProperty nameProp;
314         UnicodeSet control;
315         UnicodeSet private_use;
316         UnicodeSet noncharacter;
317         UnicodeSet surrogate;
318 
NameLabel(UnicodeProperty.Factory source)319         public NameLabel(UnicodeProperty.Factory source) {
320             nameProp = source.getProperty("Name");
321             control = source.getSet("gc=Cc");
322             private_use = source.getSet("gc=Co");
323             surrogate = source.getSet("gc=Cs");
324             noncharacter = source.getSet("noncharactercodepoint=yes");
325         }
326 
327         @Override
getValue(int codePoint, boolean isShort)328         public String getValue(int codePoint, boolean isShort) {
329             String hcp = !isShort
330                 ? "U+" + Utility.hex(codePoint, 4) + " "
331                     : "";
332             String result = nameProp.getValue(codePoint);
333             if (result != null)
334                 return hcp + result;
335             if (control.contains(codePoint)) {
336                 return "<control-" + Utility.hex(codePoint, 4) + ">";
337             }
338             if (private_use.contains(codePoint)) {
339                 return "<private-use-" + Utility.hex(codePoint, 4) + ">";
340             }
341             if (surrogate.contains(codePoint)) {
342                 return "<surrogate-" + Utility.hex(codePoint, 4) + ">";
343             }
344             if (noncharacter.contains(codePoint)) {
345                 return "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
346             }
347             //if (suppressReserved) return "";
348             return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">";
349         }
350 
351     }
352 
353     // refactored
getName(int codePoint, boolean withCodePoint)354     public String getName(int codePoint, boolean withCodePoint) {
355         String result = getNameSource().getValue(codePoint, !withCodePoint);
356         return fixName == null ? result : fixName.transliterate(result);
357     }
358 
getName(String s, boolean withCodePoint)359     public String getName(String s, boolean withCodePoint) {
360         String result = getNameSource().getValue(s, separator, !withCodePoint);
361         return fixName == null ? result : fixName.transliterate(result);
362     }
363 
hex(String s)364     public String hex(String s) {
365         return hex(s,separator);
366     }
367 
hex(String s, String sep)368     public String hex(String s, String sep) {
369         return UnicodeLabel.HEX.getValue(s, sep, true);
370     }
371 
hex(int start, int end)372     public String hex(int start, int end) {
373         String s = Utility.hex(start,4);
374         if (start == end) return s;
375         return s + ".." + Utility.hex(end,4);
376     }
377 
setUnicodePropertyFactory(UnicodeProperty.Factory source)378     public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) {
379         this.source = source;
380         return this;
381     }
382 
getUnicodePropertyFactory()383     private UnicodeProperty.Factory getUnicodePropertyFactory() {
384         if (source == null) source = ICUPropertyFactory.make();
385         return source;
386     }
387 
BagFormatter()388     public BagFormatter () {
389     }
390 
BagFormatter(UnicodeProperty.Factory source)391     public BagFormatter (UnicodeProperty.Factory source) {
392         setUnicodePropertyFactory(source);
393     }
394 
join(Object o)395     public String join(Object o) {
396         return labelVisitor.join(o);
397     }
398 
399     // ===== PRIVATES =====
400 
401     private Join labelVisitor = new Join();
402 
403     private boolean mergeRanges = true;
404     private Transliterator showLiteral = null;
405     private Transliterator fixName = null;
406     private boolean showSetAlso = false;
407 
408     private RangeFinder rf = new RangeFinder();
409 
410     private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:");
411     private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:");
412     private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:");
413 
414     private MyVisitor mainVisitor = new MyVisitor();
415 
416     /*
417     private String getLabels(int start, int end) {
418         Set names = new TreeSet();
419         for (int cp = start; cp <= end; ++cp) {
420             names.add(getLabel(cp));
421         }
422         return labelVisitor.join(names);
423     }
424      */
425 
addMatching( String src, String pattern, String substitute, StringBuffer result)426     private void addMatching(
427         String src,
428         String pattern,
429         String substitute,
430         StringBuffer result) {
431         NameIterator n1 = new NameIterator(src);
432         NameIterator n2 = new NameIterator(pattern);
433         boolean first = true;
434         while (true) {
435             String s1 = n1.next();
436             if (s1 == null)
437                 break;
438             String s2 = n2.next();
439             if (!first)
440                 result.append(" ");
441             first = false;
442             if (s1.equals(s2))
443                 result.append(substitute);
444             else
445                 result.append(s1);
446         }
447     }
448 
449     private static NumberFormat nf =
450         NumberFormat.getIntegerInstance(Locale.ENGLISH);
451     static {
452         nf.setGroupingUsed(false);
453     }
454 
455     private int maxWidthOverride = -1;
456     private int maxLabelWidthOverride = -1;
457 
setValueWidthOverride(int maxWidthOverride)458     public BagFormatter setValueWidthOverride(int maxWidthOverride) {
459         this.maxWidthOverride  = maxWidthOverride;
460         return this;
461     }
462 
getValueWidthOverride()463     public int getValueWidthOverride() {
464         return maxWidthOverride;
465     }
466 
setLabelWidthOverride(int maxWidthOverride)467     public BagFormatter setLabelWidthOverride(int maxWidthOverride) {
468         this.maxLabelWidthOverride  = maxWidthOverride;
469         return this;
470     }
471 
getLabelWidthOverride()472     public int getLabelWidthOverride() {
473         return maxLabelWidthOverride;
474     }
475 
476 
477     private class MyVisitor extends Visitor {
478         private PrintWriter output;
479         String commentSeparator;
480         int counter;
481         int valueSize;
482         int labelSize;
483         boolean isHtml;
484         boolean inTable = false;
485 
toOutput(String s)486         public void toOutput(String s) {
487             if (isHtml) {
488                 if (inTable) {
489                     output.print("</table>");
490                     inTable = false;
491                 }
492                 output.print("<p>");
493             }
494             output.print(s);
495             if (isHtml)
496                 output.println("</p>");
497             else
498                 output.print(lineSeparator);
499         }
500 
toTable(String s)501         public void toTable(String s) {
502             if (isHtml && !inTable) {
503                 output.print("<table>");
504                 inTable = true;
505             }
506             output.print(tabber.process(s) +  lineSeparator);
507         }
508 
doAt(Object c, PrintWriter out)509         public void doAt(Object c, PrintWriter out) {
510             output = out;
511             isHtml = tabber instanceof Tabber.HTMLTabber;
512             counter = 0;
513 
514             tabber.clear();
515             // old:
516             // 0009..000D    ; White_Space # Cc   [5] <control-0009>..<control-000D>
517             // new
518             // 0009..000D    ; White_Space #Cc  [5] <control>..<control>
519             tabber.add(mergeRanges ? 14 : 6,Tabber.LEFT);
520 
521             if (propName.length() > 0) {
522                 tabber.add(propName.length() + 2,Tabber.LEFT);
523             }
524 
525             valueSize = maxWidthOverride > 0 ? maxWidthOverride : getValueSource().getMaxWidth(shortValue);
526 
527             if (DEBUG) System.out.println("ValueSize: " + valueSize);
528             if (valueSize > 0) {
529                 tabber.add(valueSize + 2,Tabber.LEFT); // value
530             }
531 
532             tabber.add(3,Tabber.LEFT); // comment character
533 
534             labelSize = maxLabelWidthOverride > 0 ? maxLabelWidthOverride : getLabelSource(true).getMaxWidth(shortLabel);
535             if (labelSize > 0) {
536                 tabber.add(labelSize + 1,Tabber.LEFT); // value
537             }
538 
539             if (mergeRanges && showCount) {
540                 tabber.add(5,Tabber.RIGHT);
541             }
542 
543             if (showLiteral != null) {
544                 tabber.add(4,Tabber.LEFT);
545             }
546             //myTabber.add(7,Tabber.LEFT);
547 
548             commentSeparator = (showCount || showLiteral != null
549                 || getLabelSource(true) != UnicodeLabel.NULL
550                 || getNameSource() != UnicodeLabel.NULL)
551                 ? "\t #" : "";
552 
553             if (DEBUG) System.out.println("Tabber: " + tabber.toString());
554             if (DEBUG) System.out.println("Tabber: " + tabber.process(
555                 "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER"));
556             doAt(c);
557         }
558 
559         @SuppressWarnings("unused")
format(Object o)560         public String format(Object o) {
561             StringWriter sw = new StringWriter();
562             PrintWriter pw = new PrintWriter(sw);
563             doAt(o);
564             pw.flush();
565             String result = sw.getBuffer().toString();
566             pw.close();
567             return result;
568         }
569 
570         @Override
doBefore(Object container, Object o)571         protected void doBefore(Object container, Object o) {
572             if (showSetAlso && container instanceof UnicodeSet) {
573                 toOutput("#" + container);
574             }
575         }
576 
577         @Override
doBetween(Object container, Object lastItem, Object nextItem)578         protected void doBetween(Object container, Object lastItem, Object nextItem) {
579         }
580 
581         @Override
doAfter(Object container, Object o)582         protected void doAfter(Object container, Object o) {
583             if (fullTotal != -1 && fullTotal != counter) {
584                 if (showTotal) {
585                     toOutput("");
586                     toOutput("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here.");
587                     toOutput("# Total code points: " + nf.format(fullTotal));
588                 }
589                 fullTotal = -1;
590             } else if (showTotal) {
591                 toOutput("");
592                 toOutput("# Total code points: " + nf.format(counter));
593             }
594         }
595 
596         @Override
doSimpleAt(Object o)597         protected void doSimpleAt(Object o) {
598             if (o instanceof Map.Entry) {
599                 Map.Entry oo = (Map.Entry)o;
600                 Object key = oo.getKey();
601                 Object value = oo.getValue();
602                 doBefore(o, key);
603                 doAt(key);
604                 output.println("\u2192");
605                 doAt(value);
606                 doAfter(o, value);
607                 counter++;
608             } else if (o instanceof Visitor.CodePointRange) {
609                 doAt((Visitor.CodePointRange) o);
610             } else {
611                 String thing = o.toString();
612                 String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true);
613                 if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value;
614                 String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true);
615                 if (label.length() != 0) label = " " + label;
616                 toTable(
617                     hex(thing)
618                     + value
619                     + commentSeparator
620                     + label
621                     + insertLiteral(thing)
622                     + "\t"
623                     + getName(thing));
624                 counter++;
625             }
626         }
627 
doAt(Visitor.CodePointRange usi)628         protected void doAt(Visitor.CodePointRange usi) {
629             if (!mergeRanges) {
630                 for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
631                     showLine(cp, cp);
632                 }
633             } else {
634                 rf.reset(usi.codepoint, usi.codepointEnd + 1);
635                 while (rf.next()) {
636                     showLine(rf.start, rf.limit - 1);
637                 }
638             }
639         }
640 
showLine(int start, int end)641         private void showLine(int start, int end) {
642             String label = getLabelSource(true).getValue(start, shortLabel);
643             String value = getValue(start, shortValue);
644             if (value == NULL_VALUE) return;
645 
646             counter += end - start + 1;
647             String pn = propName;
648             if (pn.length() != 0) {
649                 pn = "\t; " + pn;
650             }
651             if (valueSize > 0) {
652                 value = "\t; " + value;
653             } else if (value.length() > 0) {
654                 throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue));
655             }
656             if (labelSize > 0) {
657                 label = "\t" + label;
658             } else if (label.length() > 0) {
659                 throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel));
660             }
661 
662             String count = "";
663             if (mergeRanges && showCount) {
664                 if (end == start) count = "\t";
665                 else count = "\t ["+ nf.format(end - start + 1)+ "]";
666             }
667 
668             toTable(
669                 hex(start, end)
670                 + pn
671                 + value
672                 + commentSeparator
673                 + label
674                 + count
675                 + insertLiteral(start, end)
676                 + getName("\t ", start, end));
677         }
678 
insertLiteral(String thing)679         private String insertLiteral(String thing) {
680             return (showLiteral == null ? ""
681                 :  " \t(" + showLiteral.transliterate(thing) + ") ");
682         }
683 
insertLiteral(int start, int end)684         private String insertLiteral(int start, int end) {
685             return (showLiteral == null ? "" :
686                 " \t(" + showLiteral.transliterate(UTF16.valueOf(start))
687                 + ((start != end)
688                     ? (".." + showLiteral.transliterate(UTF16.valueOf(end)))
689                         : "")
690                 + ") ");
691         }
692         /*
693         private String insertLiteral(int cp) {
694             return (showLiteral == null ? ""
695                 :  " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");
696         }
697          */
698     }
699 
700     /**
701      * Iterate through a string, breaking at words.
702      * @author Davis
703      */
704     private static class NameIterator {
705         String source;
706         int position;
707         int limit;
708 
NameIterator(String source)709         NameIterator(String source) {
710             this.source = source;
711             this.limit = source.length();
712         }
713         /**
714          * Find next word, including trailing spaces
715          * @return the next word
716          */
next()717         String next() {
718             if (position >= limit)
719                 return null;
720             int pos = source.indexOf(' ', position);
721             if (pos < 0 || pos >= limit)
722                 pos = limit;
723             String result = source.substring(position, pos);
724             position = pos + 1;
725             return result;
726         }
727 
findMatchingEnd(String s1, String s2)728         static int findMatchingEnd(String s1, String s2) {
729             int i = s1.length();
730             int j = s2.length();
731             try {
732                 while (true) {
733                     --i; // decrement both before calling function!
734                     --j;
735                     if (s1.charAt(i) != s2.charAt(j))
736                         break;
737                 }
738             } catch (Exception e) {} // run off start
739 
740             ++i; // counteract increment
741             i = s1.indexOf(' ', i); // move forward to space
742             if (i < 0)
743                 return 0;
744             return s1.length() - i;
745         }
746     }
747 
748     private class RangeFinder {
749         int start, limit;
750         private int veryLimit;
751         //String label, value;
reset(int rangeStart, int rangeLimit)752         void reset(int rangeStart, int rangeLimit) {
753             limit = rangeStart;
754             veryLimit = rangeLimit;
755         }
next()756         boolean next() {
757             if (limit >= veryLimit)
758                 return false;
759             start = limit; // set to end of last
760             String label = getLabelSource(false).getValue(limit, true);
761             String value = getValue(limit, true);
762             String breaker = getRangeBreakSource().getValue(limit,true);
763             if (DEBUG && 0x3FFD < limit && limit < 0x9FD6) {
764                 System.out.println(Utility.hex(limit) + ", Label: " + label + ", Value: " + value + ", Break: " + breaker);
765             }
766             limit++;
767             for (; limit < veryLimit; limit++) {
768                 String s = getLabelSource(false).getValue(limit, true);
769                 String v = getValue(limit, true);
770                 String b = getRangeBreakSource().getValue(limit, true);
771                 if (DEBUG && limit > 0x9FD4) {
772                     System.out.println(Utility.hex(limit) + ", *Label: " + s + ", Value: " + v + ", Break: " + b);
773                 }
774                 if (!equalTo(s, label)
775                     || !equalTo(v, value)
776                     || !equalTo(b, breaker)) {
777                     break;
778                 }
779             }
780             // at this point, limit is the first item that has a different label than source
781             // OR, we got to the end, and limit == veryLimit
782             return true;
783         }
784     }
785 
equalTo(Object a, Object b)786     boolean equalTo(Object a, Object b) {
787         if (a == b) return true;
788         if (a == null) return false;
789         return a.equals(b);
790     }
791 
792     boolean shortLabel = true;
793     boolean shortValue = true;
794 
getPrefix()795     public String getPrefix() {
796         return prefix;
797     }
798 
getSuffix()799     public String getSuffix() {
800         return suffix;
801     }
802 
setPrefix(String string)803     public BagFormatter setPrefix(String string) {
804         prefix = string;
805         return this;
806     }
807 
setSuffix(String string)808     public BagFormatter setSuffix(String string) {
809         suffix = string;
810         return this;
811     }
812 
isAbbreviated()813     public boolean isAbbreviated() {
814         return abbreviated;
815     }
816 
setAbbreviated(boolean b)817     public BagFormatter setAbbreviated(boolean b) {
818         abbreviated = b;
819         return this;
820     }
821 
getLabelSource(boolean visible)822     public UnicodeLabel getLabelSource(boolean visible) {
823         if (labelSource == null) {
824             Map labelMap = new HashMap();
825             //labelMap.put("Lo","L&");
826             labelMap.put("Lu","L&");
827             labelMap.put("Lt","L&");
828             labelMap.put("Ll","L&");
829             labelSource = new UnicodeProperty.FilteredProperty(
830                 getUnicodePropertyFactory().getProperty("General_Category"),
831                 new UnicodeProperty.MapFilter(labelMap)
832                 ).setAllowValueAliasCollisions(true);
833         }
834         return labelSource;
835     }
836 
837     /**
838      * @deprecated
839      */
840     @Deprecated
addAll(UnicodeSet source, Collection target)841     public static void addAll(UnicodeSet source, Collection target) {
842         source.addAllTo(target);
843     }
844 
845     // UTILITIES
846 
847     public static final Transliterator hex = Transliterator.getInstance(
848         "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");
849 
getSeparator()850     public String getSeparator() {
851         return separator;
852     }
setSeparator(String string)853     public BagFormatter setSeparator(String string) {
854         separator = string;
855         return this;
856     }
getShowLiteral()857     public Transliterator getShowLiteral() {
858         return showLiteral;
859     }
setShowLiteral(Transliterator transliterator)860     public BagFormatter setShowLiteral(Transliterator transliterator) {
861         showLiteral = transliterator;
862         return this;
863     }
864 
865     // ===== CONVENIENCES =====
866     private class Join extends Visitor {
867         StringBuffer output = new StringBuffer();
868         @SuppressWarnings("unused")
869         int depth = 0;
join(Object o)870         String join (Object o) {
871             output.setLength(0);
872             doAt(o);
873             return output.toString();
874         }
875         @Override
doBefore(Object container, Object item)876         protected void doBefore(Object container, Object item) {
877             ++depth;
878             output.append(prefix);
879         }
880         @Override
doAfter(Object container, Object item)881         protected void doAfter(Object container, Object item) {
882             output.append(suffix);
883             --depth;
884         }
885         @Override
doBetween(Object container, Object lastItem, Object nextItem)886         protected void doBetween(Object container, Object lastItem, Object nextItem) {
887             output.append(separator);
888         }
889         @Override
doSimpleAt(Object o)890         protected void doSimpleAt(Object o) {
891             if (o != null) output.append(o.toString());
892         }
893     }
894 
895     /**
896      * @param label
897      */
setLabelSource(UnicodeLabel label)898     public BagFormatter setLabelSource(UnicodeLabel label) {
899         if (label == null) label = UnicodeLabel.NULL;
900         labelSource = label;
901         return this;
902     }
903 
904     /**
905      * @return the NameLable representing the source
906      */
getNameSource()907     public UnicodeLabel getNameSource() {
908         if (nameSource == null) {
909             nameSource = new NameLabel(getUnicodePropertyFactory());
910         }
911         return nameSource;
912     }
913 
914     /**
915      * @param label
916      */
setNameSource(UnicodeLabel label)917     public BagFormatter setNameSource(UnicodeLabel label) {
918         if (label == null) label = UnicodeLabel.NULL;
919         nameSource = label;
920         return this;
921     }
922 
923     /**
924      * @return the UnicodeLabel representing the value
925      */
getValueSource()926     public UnicodeLabel getValueSource() {
927         if (valueSource == null) valueSource = UnicodeLabel.NULL;
928         return valueSource;
929     }
930 
getValue(int cp, boolean shortVal)931     private String getValue(int cp, boolean shortVal) {
932         String result = getValueSource().getValue(cp, shortVal);
933         if (result == null) return NULL_VALUE;
934         if (hexValue) result = hex(result, " ");
935         return result;
936     }
937 
938     /**
939      * @param label
940      */
setValueSource(UnicodeLabel label)941     public BagFormatter setValueSource(UnicodeLabel label) {
942         if (label == null) label = UnicodeLabel.NULL;
943         valueSource = label;
944         return this;
945     }
946 
setValueSource(String label)947     public BagFormatter setValueSource(String label) {
948         return setValueSource(new UnicodeLabel.Constant(label));
949     }
950 
951     /**
952      * @return true if showCount is true
953      */
isShowCount()954     public boolean isShowCount() {
955         return showCount;
956     }
957 
958     /**
959      * @param b true to show the count
960      * @return this (for chaining)
961      */
setShowCount(boolean b)962     public BagFormatter setShowCount(boolean b) {
963         showCount = b;
964         return this;
965     }
966 
967     /**
968      * @return the property name
969      */
getPropName()970     public String getPropName() {
971         return propName;
972     }
973 
974     /**
975      * @param string
976      * @return this (for chaining)
977      */
setPropName(String string)978     public BagFormatter setPropName(String string) {
979         if (string == null) string = "";
980         propName = string;
981         return this;
982     }
983 
984     /**
985      * @return true if this is a hexValue
986      */
isHexValue()987     public boolean isHexValue() {
988         return hexValue;
989     }
990 
991     /**
992      * @param b
993      * @return this (for chaining)
994      */
setHexValue(boolean b)995     public BagFormatter setHexValue(boolean b) {
996         hexValue = b;
997         return this;
998     }
999 
1000     /**
1001      * @return the full total
1002      */
getFullTotal()1003     public int getFullTotal() {
1004         return fullTotal;
1005     }
1006 
1007     /**
1008      * @param i set the full total
1009      * @return this (for chaining)
1010      */
setFullTotal(int i)1011     public BagFormatter setFullTotal(int i) {
1012         fullTotal = i;
1013         return this;
1014     }
1015 
1016     /**
1017      * @return the line separator
1018      */
getLineSeparator()1019     public String getLineSeparator() {
1020         return lineSeparator;
1021     }
1022 
1023     /**
1024      * @param string
1025      * @return this (for chaining)
1026      */
setLineSeparator(String string)1027     public BagFormatter setLineSeparator(String string) {
1028         lineSeparator = string;
1029         return this;
1030     }
1031 
1032     /**
1033      * @return the UnicodeLabel representing the range break source
1034      */
getRangeBreakSource()1035     public UnicodeLabel getRangeBreakSource() {
1036         if (rangeBreakSource == null) {
1037             Map labelMap = new HashMap();
1038             // reflects the code point types on p 25
1039             labelMap.put("Lo", "G&");
1040             labelMap.put("Lm", "G&");
1041             labelMap.put("Lu", "G&");
1042             labelMap.put("Lt", "G&");
1043             labelMap.put("Ll", "G&");
1044             labelMap.put("Mn", "G&");
1045             labelMap.put("Me", "G&");
1046             labelMap.put("Mc", "G&");
1047             labelMap.put("Nd", "G&");
1048             labelMap.put("Nl", "G&");
1049             labelMap.put("No", "G&");
1050             labelMap.put("Zs", "G&");
1051             labelMap.put("Pd", "G&");
1052             labelMap.put("Ps", "G&");
1053             labelMap.put("Pe", "G&");
1054             labelMap.put("Pc", "G&");
1055             labelMap.put("Po", "G&");
1056             labelMap.put("Pi", "G&");
1057             labelMap.put("Pf", "G&");
1058             labelMap.put("Sm", "G&");
1059             labelMap.put("Sc", "G&");
1060             labelMap.put("Sk", "G&");
1061             labelMap.put("So", "G&");
1062 
1063             labelMap.put("Zl", "Cf");
1064             labelMap.put("Zp", "Cf");
1065 
1066             rangeBreakSource =
1067                 new UnicodeProperty
1068                 .FilteredProperty(
1069                     getUnicodePropertyFactory().getProperty(
1070                         "General_Category"),
1071                     new UnicodeProperty.MapFilter(labelMap))
1072                 .setAllowValueAliasCollisions(true);
1073 
1074             /*
1075             "Cn", // = Other, Not Assigned 0
1076             "Cc", // = Other, Control 15
1077             "Cf", // = Other, Format 16
1078             UnicodeProperty.UNUSED, // missing
1079             "Co", // = Other, Private Use 18
1080             "Cs", // = Other, Surrogate 19
1081              */
1082         }
1083         return rangeBreakSource;
1084     }
1085 
1086     /**
1087      * @param label
1088      */
setRangeBreakSource(UnicodeLabel label)1089     public BagFormatter setRangeBreakSource(UnicodeLabel label) {
1090         if (label == null) label = UnicodeLabel.NULL;
1091         rangeBreakSource = label;
1092         return this;
1093     }
1094 
1095     /**
1096      * @return Returns the fixName.
1097      */
getFixName()1098     public Transliterator getFixName() {
1099         return fixName;
1100     }
1101     /**
1102      * @param fixName The fixName to set.
1103      */
setFixName(Transliterator fixName)1104     public BagFormatter setFixName(Transliterator fixName) {
1105         this.fixName = fixName;
1106         return this;
1107     }
1108 
getTabber()1109     public Tabber getTabber() {
1110         return tabber;
1111     }
1112 
setTabber(Tabber tabber)1113     public void setTabber(Tabber tabber) {
1114         this.tabber = tabber;
1115     }
1116 
isShowTotal()1117     public boolean isShowTotal() {
1118         return showTotal;
1119     }
1120 
setShowTotal(boolean showTotal)1121     public void setShowTotal(boolean showTotal) {
1122         this.showTotal = showTotal;
1123     }
1124 }
1125