• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
4  **********************************************************************
5  * Author: Mark Davis
6  **********************************************************************
7  */
8 package org.unicode.cldr.util;
9 
10 import java.util.ArrayList;
11 import java.util.Collection;
12 import java.util.Comparator;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.TreeSet;
18 
19 /**
20  * Provide way to associate each of a set of T objects with an integer, where
21  * the integer can be used to get the object. All objects are immutable, and
22  * created with a corresponding factory object. The integers have no relation to
23  * the order in the original set. The integers may not be compact (eg 0..n).
24  *
25  * @param <T>
26  */
27 public abstract class IntMap<T> {
28 
29     public interface IntMapFactory<T> {
make(Collection<T> values)30         public IntMap<T> make(Collection<T> values);
31     }
32 
get(int index)33     public abstract T get(int index);
34 
getValueMap(Map<T, Integer> output)35     public abstract Map<T, Integer> getValueMap(Map<T, Integer> output);
36 
getValueMap()37     public Map<T, Integer> getValueMap() {
38         return getValueMap(new HashMap<T, Integer>());
39     }
40 
approximateStorage()41     public abstract int approximateStorage();
42 
43     @Override
toString()44     public String toString() {
45         return getValueMap().toString();
46     }
47 
48     // Concrete classes, embedded for simplicity
49 
50     public static class BasicIntMap<T> extends IntMap<T> {
51         private final T[] intToValue;
52 
BasicIntMap(T[] intToValue)53         private BasicIntMap(T[] intToValue) {
54             this.intToValue = intToValue;
55         }
56 
57         @Override
get(int index)58         public T get(int index) {
59             return intToValue[index];
60         }
61 
62         @Override
getValueMap(Map<T, Integer> output)63         public Map<T, Integer> getValueMap(Map<T, Integer> output) {
64             for (int i = 0; i < intToValue.length; ++i) {
65                 output.put(intToValue[i], i);
66             }
67             return output;
68         }
69 
70         @Override
approximateStorage()71         public int approximateStorage() {
72             int size = OBJECT_OVERHEAD;
73             for (T item : intToValue) {
74                 size += 4; // size of int
75                 size += item.toString().length() * 2 + STRING_OVERHEAD;
76             }
77             return size;
78         }
79     }
80 
81     public static class BasicIntMapFactory<T> implements IntMapFactory<T> {
82         @Override
83         @SuppressWarnings("unchecked")
make(Collection<T> values)84         public BasicIntMap<T> make(Collection<T> values) {
85             return new BasicIntMap<>((T[]) new ArrayList<>(new HashSet<>(values)).toArray());
86         }
87     }
88 
89     /**
90      * Stores short strings (255 or less) in compacted fashion. The number of
91      * strings is also limited: in the worst case to 2^24 / total number of UTF-8
92      * bytes
93      *
94      * @author markdavis
95      */
96     public static class CompactStringIntMap extends IntMap<String> {
97         private final String data;
98         private final int[] intToValue;
99 
CompactStringIntMap(String data, int[] intToValue)100         private CompactStringIntMap(String data, int[] intToValue) {
101             this.data = data;
102             this.intToValue = intToValue;
103         }
104 
105         @Override
get(int index)106         public String get(int index) {
107             // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8.
108             int packedIndex = intToValue[index];
109             int len = packedIndex & 0xFF;
110             int dataIndex = packedIndex >>> 8;
111             return data.substring(dataIndex, dataIndex + len);
112         }
113 
114         @Override
getValueMap(Map<String, Integer> output)115         public Map<String, Integer> getValueMap(Map<String, Integer> output) {
116             for (int i = 0; i < intToValue.length; ++i) {
117                 output.put(get(i), i);
118             }
119             return output;
120         }
121 
122         @Override
approximateStorage()123         public int approximateStorage() {
124             int size = OBJECT_OVERHEAD + POINTER_OVERHEAD * 2;
125             size += data.length() * 2 + STRING_OVERHEAD;
126             size += 4 * intToValue.length;
127             return size;
128         }
129     }
130 
131     public static final Comparator<String> LONGEST_FIRST_COMPARATOR = new Comparator<String>() {
132         @Override
133         public int compare(String a, String b) {
134             return a.length() > b.length() ? -1
135                 : a.length() < b.length() ? 1
136                     : a.compareTo(b);
137         }
138     };
139 
140     public static class CompactStringIntMapFactory implements IntMapFactory<String> {
141         @Override
make(Collection<String> values)142         public CompactStringIntMap make(Collection<String> values) {
143             // first sort, longest first
144             Set<String> sorted = new TreeSet<>(LONGEST_FIRST_COMPARATOR);
145             sorted.addAll(values);
146 
147             StringBuilder data = new StringBuilder();
148             int[] intToValue = new int[sorted.size()];
149             int count = 0;
150 
151             // compact the values
152             // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8.
153             for (String string : sorted) {
154                 if (string.length() > 255) {
155                     throw new IllegalArgumentException(
156                         "String too large: CompactStringIntMapFactory only handles strings up to 255 in length");
157                 }
158                 int position = data.indexOf(string);
159                 if (position < 0) { // add if not there
160                     position = data.length();
161                     data.append(string);
162                 }
163                 intToValue[count++] = (position << 8) | string.length();
164             }
165             return new CompactStringIntMap(data.toString(), intToValue);
166         }
167     }
168 
169     // for approximateSize
170     private static final int OBJECT_OVERHEAD = 16;
171     private static final int POINTER_OVERHEAD = 16;
172     private static final int STRING_OVERHEAD = 16 + OBJECT_OVERHEAD;
173 
174 }