android-16.0.0_r2/s

/**
 * ****************************************************************************** Copyright (C)
 * 1996-2012, International Business Machines Corporation and * others. All Rights Reserved. *
 * ********************************************************************* Author: Mark Davis
 * *********************************************************************
 */
package org.unicode.cldr.util;

import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.StringTransform;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UTF16.StringComparator;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.ULocale;
import java.io.IOException;
import java.text.FieldPosition;
import java.util.Comparator;
import java.util.TreeSet;

/**
 * Provides more flexible formatting of UnicodeSet patterns. <br>
 * Used in the XML for UnicodeSets. <br>
 * For the Survey Tool, should use SimpleUnicodeSetFormatter.java
 */
public class UnicodeSetPrettyPrinter implements FormatterParser<UnicodeSet> {
    private static final StringComparator CODEPOINT_ORDER =
            new UTF16.StringComparator(true, false, 0);
    private static final UnicodeSet PATTERN_WHITESPACE =
            new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]")
                    .freeze();
    private static final UnicodeSet SORT_AT_END =
            new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
    private static final UnicodeSet QUOTED_SYNTAX =
            new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();

    private boolean first = true;
    private StringBuffer target = new StringBuffer();
    private int firstCodePoint = -2;
    private int lastCodePoint = -2;
    private boolean compressRanges = true;
    private String lastString = "";
    private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);
    private StringTransform quoter = null;

    private Comparator<String> ordering;
    private Comparator<String> spaceComp;

    /** Make from root collator obtained from ICU */
    public static final UnicodeSetPrettyPrinter ROOT_ICU =
            from(
                    (Comparator) Collator.getInstance(ULocale.ROOT).freeze(),
                    (Comparator)
                            Collator.getInstance(ULocale.ROOT)
                                    .setStrength2(Collator.PRIMARY)
                                    .freeze());

    /** Make from ICU Locale */
    public static UnicodeSetPrettyPrinter fromIcuLocale(String localeId) {
        Collator col = ComparatorUtilities.getIcuCollator(localeId, Collator.IDENTICAL).freeze();
        Collator spaceCol = col.cloneAsThawed().setStrength2(Collator.PRIMARY).freeze();
        return from((Comparator) col, (Comparator) spaceCol);
    }

    /** Make from CLDR Locale */
    public static UnicodeSetPrettyPrinter fromCldrLocale(String localeId) {
        Collator col = ComparatorUtilities.getCldrCollator(localeId, Collator.IDENTICAL).freeze();
        Collator spaceCol = col.cloneAsThawed().setStrength2(Collator.PRIMARY).freeze();
        return from((Comparator) col, (Comparator) spaceCol);
    }

    /** Utility for creating UnicodeSetPrettyPrinter */
    public static UnicodeSetPrettyPrinter from(
            Comparator<String> col, Comparator<String> spaceCol) {
        return new UnicodeSetPrettyPrinter()
                .setOrdering(col)
                .setSpaceComparator(spaceCol)
                .setCompressRanges(false);
    }

    public UnicodeSetPrettyPrinter() {}

    public StringTransform getQuoter() {
        return quoter;
    }

    public UnicodeSetPrettyPrinter setQuoter(StringTransform quoter) {
        this.quoter = quoter;
        return this; // for chaining
    }

    public boolean isCompressRanges() {
        return compressRanges;
    }

    /**
     * @param compressRanges if you want abcde instead of a-e, make this false
     * @return
     */
    public UnicodeSetPrettyPrinter setCompressRanges(boolean compressRanges) {
        this.compressRanges = compressRanges;
        return this;
    }

    public Comparator<String> getOrdering() {
        return ordering;
    }

    /**
     * @param ordering the resulting ordering of the list of characters in the pattern
     * @return
     */
    public UnicodeSetPrettyPrinter setOrdering(Comparator ordering) {
        this.ordering =
                ordering == null
                        ? CODEPOINT_ORDER
                        : new org.unicode.cldr.util.MultiComparator<String>(
                                ordering, CODEPOINT_ORDER);
        return this;
    }

    public Comparator<String> getSpaceComparator() {
        return spaceComp;
    }

    /**
     * @param spaceComp if the comparison returns non-zero, then a space will be inserted between
     *     characters
     * @return this, for chaining
     */
    public UnicodeSetPrettyPrinter setSpaceComparator(Comparator spaceComp) {
        this.spaceComp = spaceComp;
        return this;
    }

    public UnicodeSet getToQuote() {
        return toQuote;
    }

    /**
     * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically
     * quote pattern whitespace)
     *
     * @param toQuote
     */
    public UnicodeSetPrettyPrinter setToQuote(UnicodeSet toQuote) {
        if (toQuote != null) {
            toQuote = toQuote.cloneAsThawed();
            toQuote.addAll(PATTERN_WHITESPACE);
            this.toQuote = toQuote;
        }
        return this;
    }

    /**
     * Get the pattern for a particular set.
     *
     * @param uset
     * @return formatted UnicodeSet
     */
    @Override
    public synchronized String format(UnicodeSet uset) {
        try {
            first = true;
            UnicodeSet putAtEnd =
                    new UnicodeSet(uset)
                            .retainAll(SORT_AT_END); // remove all the unassigned gorp for now
            // make sure that comparison separates all strings, even canonically equivalent ones
            TreeSet<String> orderedStrings = new TreeSet<>(ordering);
            for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange(); ) {
                if (it.codepoint == UnicodeSetIterator.IS_STRING) {
                    orderedStrings.add(it.string);
                } else {
                    for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
                        if (!putAtEnd.contains(i)) {
                            orderedStrings.add(UTF16.valueOf(i));
                        }
                    }
                }
            }
            target.setLength(0);
            target.append("[");
            for (String item : orderedStrings) {
                appendUnicodeSetItem(item);
            }
            for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd);
                    it.next(); ) { // add back the unassigned gorp
                appendUnicodeSetItem(
                        it.codepoint); // we know that these are only codepoints, not strings, so
                // this
                // is safe
            }
            flushLast();
            target.append("]");
            String sresult = target.toString();

            return sresult;
        } catch (Exception e) {
            return uset.toPattern(false);
        }
    }

    private UnicodeSetPrettyPrinter appendUnicodeSetItem(String s) {
        if (UTF16.hasMoreCodePointsThan(s, 1)) {
            flushLast();
            addSpaceAsNeededBefore(s);
            appendQuoted(s);
            lastString = s;
        } else {
            appendUnicodeSetItem(UTF16.charAt(s, 0));
        }
        return this;
    }

    private void appendUnicodeSetItem(int cp) {
        if (!compressRanges) flushLast();
        if (cp == lastCodePoint + 1) {
            lastCodePoint = cp; // continue range
        } else { // start range
            flushLast();
            firstCodePoint = lastCodePoint = cp;
        }
    }

    /** */
    private void addSpaceAsNeededBefore(String s) {
        if (first) {
            first = false;
        } else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {
            target.append(' ');
        } else {
            int cp = UTF16.charAt(s, 0);
            if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {
                int type = UCharacter.getType(cp);
                if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
                    target.append(' ');
                } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
                    target.append(' '); // make sure we don't accidentally merge two surrogates
                }
            }
        }
    }

    private void addSpaceAsNeededBefore(int codepoint) {
        addSpaceAsNeededBefore(UTF16.valueOf(codepoint));
    }

    private void flushLast() {
        if (lastCodePoint >= 0) {
            addSpaceAsNeededBefore(firstCodePoint);
            if (firstCodePoint != lastCodePoint) {
                appendQuoted(firstCodePoint);
                if (firstCodePoint + 1 != lastCodePoint) {
                    target.append('-');
                } else {
                    addSpaceAsNeededBefore(lastCodePoint);
                }
            }
            appendQuoted(lastCodePoint);
            lastString = UTF16.valueOf(lastCodePoint);
            firstCodePoint = lastCodePoint = -2;
        }
    }

    private void appendQuoted(String s) {
        if (toQuote.containsSome(s) && quoter != null) {
            target.append(quoter.transform(s));
        } else {
            int cp;
            target.append("{");
            for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
                appendQuoted(cp = UTF16.charAt(s, i));
            }
            target.append("}");
        }
    }

    UnicodeSetPrettyPrinter appendQuoted(int codePoint) {
        if (toQuote.contains(codePoint)) {
            if (quoter != null) {
                target.append(quoter.transform(UTF16.valueOf(codePoint)));
                return this;
            }
            if (codePoint > 0xFFFF) {
                target.append("\\U");
                target.append(Utility.hex(codePoint, 8));
            } else {
                target.append("\\u");
                target.append(Utility.hex(codePoint, 4));
            }
            return this;
        }
        switch (codePoint) {
            case '[': // SET_OPEN:
            case ']': // SET_CLOSE:
            case '-': // HYPHEN:
            case '^': // COMPLEMENT:
            case '&': // INTERSECTION:
            case '\\': // BACKSLASH:
            case '{':
            case '}':
            case '$':
            case ':':
                target.append('\\');
                break;
            default:
                // Escape whitespace
                if (PATTERN_WHITESPACE.contains(codePoint)) {
                    target.append('\\');
                }
                break;
        }
        UTF16.append(target, codePoint);
        return this;
    }
    //  Appender append(String s) {
    //  target.append(s);
    //  return this;
    //  }
    //  public String toString() {
    //  return target.toString();
    //  }

    public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {
        try {
            return toAppendTo.append(format(obj));
        } catch (IOException e) {
            throw new ICUUncheckedIOException(e);
        }
    }

    @Override
    public UnicodeSet parse(String formattedString) {
        return new UnicodeSet(formattedString);
    }
}