android-10.0.0_r47/s

/*
 **********************************************************************
 * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
 **********************************************************************
 * Author: Mark Davis
 **********************************************************************
 */
package org.unicode.cldr.util;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;

import org.unicode.cldr.util.Dictionary.DictionaryBuilder;
import org.unicode.cldr.util.IntMap.BasicIntMapFactory;
import org.unicode.cldr.util.IntMap.IntMapFactory;
import org.unicode.cldr.util.StateDictionary.Cell;
import org.unicode.cldr.util.StateDictionary.Row;
import org.unicode.cldr.util.StateDictionary.Row.Uniqueness;

/**
 * A simple state-table based dictionary builder.
 *
 * @author markdavis
 * @param <T>
 *            the return type for the dictionary
 */
public class StateDictionaryBuilder<T> implements DictionaryBuilder<T> {

    private static final boolean SHOW_SIZE = true;

    // only used while building
    private Row buildingCurrentAddRow;
    private int builtTotalBytes;
    private int builtTotalStrings;

    // results of build
    private ArrayList<Row> builtRows;
    private Row builtBaseRow;
    private IntMap<T> builtResults;
    private int builtMaxByteLength;

    private StringByteConverter byteConverter = new CompactStringByteConverter(true); // new StringUtf8Converter(); //
    // new ByteString(false); // new
    // ByteString(true); //

    private IntMapFactory<T> intMapFactory = new BasicIntMapFactory<T>();

    /**
     * Get/set the IntMapFactory used to store the values for T. The default is BasicIntMapFactory.
     *
     * @return
     */
    public IntMapFactory<T> getIntMapFactory() {
        return intMapFactory;
    }

    public StateDictionaryBuilder<T> setIntMapFactory(IntMapFactory<T> intMapFactory) {
        this.intMapFactory = intMapFactory;
        return this;
    }

    /**
     * Get/Set the StringByteConverter used to convert strings to bytes and back. The default is a compacted form:
     * ByteString(true).
     *
     * @return
     */
    public StringByteConverter getByteConverter() {
        return byteConverter;
    }

    public StateDictionaryBuilder<T> setByteConverter(StringByteConverter byteConverter) {
        this.byteConverter = byteConverter;
        return this;
    }

    /**
     * Create a new simple StateDictionary. This format is relatively fast to
     * produce, and has a fair amount of compaction. The Map must be sorted
     * according to Dictionary.CHAR_SEQUENCE_COMPARATOR. It must not contain the key "".
     *
     * @param source
     * @return
     */
    public StateDictionary<T> make(Map<CharSequence, T> source) {
        // clear out state
        buildingCurrentAddRow = null;
        builtTotalBytes = builtTotalStrings = builtMaxByteLength = 0;
        builtRows = new ArrayList<Row>();
        builtBaseRow = makeRow();
        builtResults = intMapFactory.make(source.values());
        if (SHOW_SIZE) System.out.println("***VALUE STORAGE: " + builtResults.approximateStorage());

        Map<T, Integer> valueToInt = builtResults.getValueMap();
        Map<byte[], Integer> sorted = new TreeMap<byte[], Integer>(SHORTER_BYTE_ARRAY_COMPARATOR);
        for (CharSequence text : source.keySet()) {
            sorted.put(byteConverter.toBytes(text), valueToInt.get(source.get(text)));
        }

        for (byte[] key : sorted.keySet()) {
            addMapping(key, sorted.get(key));
        }

        // now compact the rows
        // first find out which rows are equivalent (recursively)
        Map<Row, Row> replacements = new HashMap<Row, Row>();
        {
            Map<Row, Row> equivalents = new TreeMap<Row, Row>(StateDictionary.rowComparator);
            for (Row row : builtRows) {
                Row cardinal = equivalents.get(row);
                if (cardinal == null) {
                    equivalents.put(row, row);
                } else {
                    replacements.put(row, cardinal);
                }
            }
        }
        if (SHOW_SIZE) System.out.println("***ROWS: " + builtRows.size() + "\t REPLACEMENTS: " + replacements.size());

        // now replace all references to rows by their equivalents
        for (Row row : builtRows) {
            for (Byte key : row.byteToCell.keySet()) {
                Cell cell = row.byteToCell.get(key);
                Row newRow = replacements.get(cell.nextRow);
                if (newRow != null) {
                    cell.nextRow = newRow;
                }
            }
        }
        // now compact the rows array
        ArrayList<Row> newRows = new ArrayList<Row>();
        for (Row row : builtRows) {
            if (!replacements.containsKey(row)) {
                newRows.add(row);
            }
        }
        // and set the Uniqueness values
        setUniqueValues(builtBaseRow);
        builtRows = newRows;
        if (SHOW_SIZE) System.out.println("***ROWS: " + builtRows.size());
        return new StateDictionary<T>(builtBaseRow, builtRows, builtResults, builtMaxByteLength, byteConverter);
    }

    private Row makeRow() {
        Row row = new Row(builtRows.size());
        builtRows.add(row);
        return row;
    }

    private void addMapping(byte[] key, int result) {
        buildingCurrentAddRow = builtBaseRow;

        int lastIndex = key.length - 1;
        for (int i = 0; i <= lastIndex; ++i) {
            result = add(key[i], result, i == lastIndex);
        }
        builtTotalBytes += key.length;
        builtTotalStrings += 1;
        if (builtMaxByteLength < key.length) {
            builtMaxByteLength = key.length;
        }
    }

    private int add(byte key, int result, boolean last) {
        Cell matchingCell = buildingCurrentAddRow.byteToCell.get(key);
        if (matchingCell != null) {
            if (matchingCell.nextRow == null && !last) {
                matchingCell.nextRow = makeRow();
                // we add a continuation, so decrement
                --buildingCurrentAddRow.terminatingReturnCount;
            }
            buildingCurrentAddRow = matchingCell.nextRow;
            return result - matchingCell.deltaResult;
        }
        Cell cell = new Cell();
        buildingCurrentAddRow.byteToCell.put(key, cell);
        cell.deltaResult = result;
        if (last) {
            cell.returns = true;
            ++buildingCurrentAddRow.returnCount;
            ++buildingCurrentAddRow.terminatingReturnCount;
        } else {
            cell.nextRow = buildingCurrentAddRow = makeRow();
        }
        // when we create a new cell for the first time, we deposit the
        // result, so we can clear it now
        return 0;
    }

    /**
     * Follow all the path values, and determine whether all possible results from
     * a row (when following bytes) are the same. If so, set the flag on the row.
     * The return value
     *
     * @param savedMatchValue
     *            TODO
     * @return true if unique
     */
    // NOTE: The way the table is built, we are guaranteed that the current value
    // is "correct" (with no deltas needed) for at least one of the paths
    // resulting from the row, EXCEPT in the very first row. Thus except for the first row,
    // every row will have at least one cell with a zero deltaResult. That in turn means that
    // if there is a deltaResult on a cell, at least one chain resulting from taking that cell
    // will have that result.
    // So, if there are any deltaResults in a row, it is not unique.
    // Otherwise, if any of the cells have non-unique rows, it is not unique
    // Otherwise it is unique.
    // So this traverses the rows, setting the values for anything it hasn't seen.
    // TODO: could be optimized (maybe) by saving the rows seen so far. On the other hand,
    // this might not be worth the time in adding them to a save list.
    private boolean setUniqueValues(Row currentRow) {
        if (currentRow.hasUniqueValue == Uniqueness.UNIQUE) {
            return true;
        }
        // see if there is any reason to make us not Unique
        // According to the structure above:
        // It will be that we have a child that is not unique,
        // or that we have a nonzero delta value
        // We don't have to look at anything else.
        // We DO have to call setUniqueValues on each of our children, to make sure they are set.
        for (Cell cell : currentRow.byteToCell.values()) {
            if (cell.nextRow != null && !setUniqueValues(cell.nextRow)) {
                currentRow.hasUniqueValue = Uniqueness.AMBIGUOUS;
            } else if (cell.deltaResult != 0) {
                currentRow.hasUniqueValue = Uniqueness.AMBIGUOUS;
            }
        }
        if (currentRow.hasUniqueValue == Uniqueness.AMBIGUOUS) {
            return false;
        }
        currentRow.hasUniqueValue = Uniqueness.UNIQUE;
        return true;
    }

    static final Comparator<byte[]> SHORTER_BYTE_ARRAY_COMPARATOR = new Comparator<byte[]>() {

        public int compare(byte[] o1, byte[] o2) {
            int minLen = o1.length;
            if (minLen > o2.length) {
                minLen = o2.length;
            }
            for (int i = 0; i < minLen; ++i) {
                if (o1[i] != o2[i]) {
                    return o1[i] < o2[i] ? -1 : 1; // return lesser first
                }
            }
            return o1.length < o2.length ? -1 : o1.length > o2.length ? 1 : 0;
        }

    };
}