1 /* 2 ********************************************************************** 3 * Copyright (c) 2006-2007, Google and others. All Rights Reserved. 4 ********************************************************************** 5 * Author: Mark Davis 6 ********************************************************************** 7 */ 8 package org.unicode.cldr.util; 9 10 import java.util.ArrayList; 11 import java.util.Collection; 12 import java.util.Comparator; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Map; 16 import java.util.Set; 17 import java.util.TreeSet; 18 19 /** 20 * Provide way to associate each of a set of T objects with an integer, where 21 * the integer can be used to get the object. All objects are immutable, and 22 * created with a corresponding factory object. The integers have no relation to 23 * the order in the original set. The integers may not be compact (eg 0..n). 24 * 25 * @param <T> 26 */ 27 public abstract class IntMap<T> { 28 29 public interface IntMapFactory<T> { make(Collection<T> values)30 public IntMap<T> make(Collection<T> values); 31 } 32 get(int index)33 public abstract T get(int index); 34 getValueMap(Map<T, Integer> output)35 public abstract Map<T, Integer> getValueMap(Map<T, Integer> output); 36 getValueMap()37 public Map<T, Integer> getValueMap() { 38 return getValueMap(new HashMap<T, Integer>()); 39 } 40 approximateStorage()41 public abstract int approximateStorage(); 42 43 @Override toString()44 public String toString() { 45 return getValueMap().toString(); 46 } 47 48 // Concrete classes, embedded for simplicity 49 50 public static class BasicIntMap<T> extends IntMap<T> { 51 private final T[] intToValue; 52 BasicIntMap(T[] intToValue)53 private BasicIntMap(T[] intToValue) { 54 this.intToValue = intToValue; 55 } 56 57 @Override get(int index)58 public T get(int index) { 59 return intToValue[index]; 60 } 61 62 @Override getValueMap(Map<T, Integer> output)63 public Map<T, Integer> getValueMap(Map<T, Integer> output) { 64 for (int i = 0; i < intToValue.length; ++i) { 65 output.put(intToValue[i], i); 66 } 67 return output; 68 } 69 70 @Override approximateStorage()71 public int approximateStorage() { 72 int size = OBJECT_OVERHEAD; 73 for (T item : intToValue) { 74 size += 4; // size of int 75 size += item.toString().length() * 2 + STRING_OVERHEAD; 76 } 77 return size; 78 } 79 } 80 81 public static class BasicIntMapFactory<T> implements IntMapFactory<T> { 82 @Override 83 @SuppressWarnings("unchecked") make(Collection<T> values)84 public BasicIntMap<T> make(Collection<T> values) { 85 return new BasicIntMap<>((T[]) new ArrayList<>(new HashSet<>(values)).toArray()); 86 } 87 } 88 89 /** 90 * Stores short strings (255 or less) in compacted fashion. The number of 91 * strings is also limited: in the worst case to 2^24 / total number of UTF-8 92 * bytes 93 * 94 * @author markdavis 95 */ 96 public static class CompactStringIntMap extends IntMap<String> { 97 private final String data; 98 private final int[] intToValue; 99 CompactStringIntMap(String data, int[] intToValue)100 private CompactStringIntMap(String data, int[] intToValue) { 101 this.data = data; 102 this.intToValue = intToValue; 103 } 104 105 @Override get(int index)106 public String get(int index) { 107 // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8. 108 int packedIndex = intToValue[index]; 109 int len = packedIndex & 0xFF; 110 int dataIndex = packedIndex >>> 8; 111 return data.substring(dataIndex, dataIndex + len); 112 } 113 114 @Override getValueMap(Map<String, Integer> output)115 public Map<String, Integer> getValueMap(Map<String, Integer> output) { 116 for (int i = 0; i < intToValue.length; ++i) { 117 output.put(get(i), i); 118 } 119 return output; 120 } 121 122 @Override approximateStorage()123 public int approximateStorage() { 124 int size = OBJECT_OVERHEAD + POINTER_OVERHEAD * 2; 125 size += data.length() * 2 + STRING_OVERHEAD; 126 size += 4 * intToValue.length; 127 return size; 128 } 129 } 130 131 public static final Comparator<String> LONGEST_FIRST_COMPARATOR = new Comparator<String>() { 132 @Override 133 public int compare(String a, String b) { 134 return a.length() > b.length() ? -1 135 : a.length() < b.length() ? 1 136 : a.compareTo(b); 137 } 138 }; 139 140 public static class CompactStringIntMapFactory implements IntMapFactory<String> { 141 @Override make(Collection<String> values)142 public CompactStringIntMap make(Collection<String> values) { 143 // first sort, longest first 144 Set<String> sorted = new TreeSet<>(LONGEST_FIRST_COMPARATOR); 145 sorted.addAll(values); 146 147 StringBuilder data = new StringBuilder(); 148 int[] intToValue = new int[sorted.size()]; 149 int count = 0; 150 151 // compact the values 152 // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8. 153 for (String string : sorted) { 154 if (string.length() > 255) { 155 throw new IllegalArgumentException( 156 "String too large: CompactStringIntMapFactory only handles strings up to 255 in length"); 157 } 158 int position = data.indexOf(string); 159 if (position < 0) { // add if not there 160 position = data.length(); 161 data.append(string); 162 } 163 intToValue[count++] = (position << 8) | string.length(); 164 } 165 return new CompactStringIntMap(data.toString(), intToValue); 166 } 167 } 168 169 // for approximateSize 170 private static final int OBJECT_OVERHEAD = 16; 171 private static final int POINTER_OVERHEAD = 16; 172 private static final int STRING_OVERHEAD = 16 + OBJECT_OVERHEAD; 173 174 }