• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
4  **********************************************************************
5  * Author: Mark Davis
6  **********************************************************************
7  */
8 package org.unicode.cldr.util;
9 
10 import java.util.Collections;
11 import java.util.Iterator;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 
18 import org.unicode.cldr.util.Dictionary.Matcher.Status;
19 
20 /**
21  * This is a simple dictionary class used for testing. Should be in the package usertest, but it's a pain to rename
22  * files in CVS.
23  *
24  * @author markdavis
25  */
26 public class SimpleDictionary<T> extends Dictionary<T> {
27     private TreeMap<CharSequence, T> data = new TreeMap<>();
28     private Set<CharSequence> possibleMatchesBefore;
29     private Set<CharSequence> possibleMatchesAfter;
30     private Status finalStatus;
31     boolean done;
32     private int matchCount;
33     private CharSequence lastEntry = "";
34 
35     public static class SimpleDictionaryBuilder<T> implements DictionaryBuilder<T> {
36 
37         @Override
make(Map<CharSequence, T> source)38         public SimpleDictionary<T> make(Map<CharSequence, T> source) {
39             return new SimpleDictionary(source);
40         }
41 
42     }
43 
SimpleDictionary(Map<CharSequence, T> source)44     private SimpleDictionary(Map<CharSequence, T> source) {
45         for (CharSequence text : source.keySet()) {
46             addMapping(text, source.get(text));
47         }
48     }
49 
50     // private List<T> results;
51     //
52     // public static <U> Dictionary<U> getInstance(Map<CharSequence, U> source) {
53     // SimpleDictionary<U> result = new SimpleDictionary<U>();
54     // // if T is not an integer, then get the results, and assign each a number
55     // Map<U,Integer> valueToInt = new HashMap<U,Integer>();
56     // result.results = new ArrayList<U>();
57     // int count = 0;
58     // for (U value : source.values()) {
59     // Integer oldValue = valueToInt.get(value);
60     // if (oldValue == null) {
61     // result.results.add(value);
62     // valueToInt.put(value, count++);
63     // }
64     // }
65     //
66     //
67     // for (CharSequence text : source.keySet()) {
68     // result.addMapping(text, valueToInt.get(source.get(text)));
69     // }
70     // return result;
71     // }
72 
addMapping(CharSequence text, T result)73     private void addMapping(CharSequence text, T result) {
74         if (CharUtilities.compare(text, lastEntry) <= 0) {
75             throw new IllegalArgumentException("Each string must be greater than the previous one.");
76         }
77         lastEntry = text;
78         data.put(text, result);
79     }
80 
81     @Override
getMapping()82     public Iterator<Entry<CharSequence, T>> getMapping() {
83         return Collections.unmodifiableMap(data).entrySet().iterator();
84     }
85 
86     @Override
getMatcher()87     public Matcher<T> getMatcher() {
88         return new SimpleMatcher();
89     }
90 
91     private class SimpleMatcher extends Matcher<T> {
92 
93         @Override
setOffset(int offset)94         public Matcher<T> setOffset(int offset) {
95             possibleMatchesBefore = data.keySet();
96             done = false;
97             matchValue = null;
98             return super.setOffset(offset);
99         }
100 
101         /**
102          * Dumb implementation.
103          *
104          */
105         @Override
next()106         public Status next() {
107             // There are two degenerate cases: our dictionary is empty, or we are called on an empty string.
108 
109             // As long as we get matches, we return them.
110             // When we fail, we return one of two statuses
111             // DONE if there were no more matches in the dictionary past the last match
112             // SINGLE if there was a longer match, plus the longest offset that we successfully got to.
113 
114             // if we have already narrowed down to the end, just return the status
115             // everything should already be set to make this work.
116             if (done) {
117                 if (finalStatus == Status.NONE) {
118                     matchValue = null;
119                 }
120                 return finalStatus;
121             }
122 
123             CharSequence firstMatch = null;
124 
125             while (text.hasCharAt(matchEnd)) {
126                 // get the next probe value
127                 ++matchEnd;
128                 CharSequence probe = text.subSequence(offset, matchEnd);
129 
130                 // narrow to the items that start with the probe
131                 // this filters Before into After
132 
133                 firstMatch = filterToStartsWith(probe);
134 
135                 // if we have a full match, return it
136 
137                 if (firstMatch != null && firstMatch.length() == probe.length()) {
138                     possibleMatchesAfter.remove(firstMatch);
139                     possibleMatchesBefore = possibleMatchesAfter;
140                     matchValue = data.get(firstMatch);
141                     finalStatus = Status.NONE;
142                     return Status.MATCH;
143                 }
144 
145                 // See if we've run out
146                 // example: probe = "man"
147                 // three cases, based on what was in the set
148                 // {man}: return DONE (we did a match before)
149                 // {man, many}: return SINGLE
150                 // {man, many, manner}: return PLURAL
151                 // {many}: return SINGLE
152                 if (possibleMatchesAfter.size() == 0) {
153                     --matchEnd; // backup
154                     break;
155                 }
156                 possibleMatchesBefore = possibleMatchesAfter;
157             }
158             // no more work to be done.
159             done = true;
160 
161             if (matchEnd == offset || possibleMatchesBefore.size() == 0) {
162                 matchValue = null;
163                 return finalStatus = Status.NONE;
164             }
165             if (firstMatch == null) { // just in case we skipped the above loop
166                 firstMatch = possibleMatchesBefore.iterator().next();
167             }
168             matchValue = data.get(firstMatch);
169             matchCount = possibleMatchesBefore.size();
170             return finalStatus = Status.PARTIAL;
171         }
172 
173         @Override
nextUniquePartial()174         public boolean nextUniquePartial() {
175             // we have already set the matchValue, so we don't need to reset here.
176             return matchCount == 1;
177         }
178 
179         /**
180          * Returns the first matching item, if there is one, and
181          * filters the rest of the list to those that match the probe.
182          *
183          * @param probe
184          * @return
185          */
filterToStartsWith(CharSequence probe)186         private CharSequence filterToStartsWith(CharSequence probe) {
187             CharSequence result = null;
188             possibleMatchesAfter = new TreeSet<>();
189             for (CharSequence item : possibleMatchesBefore) {
190                 if (startsWith(item, probe)) {
191                     if (result == null) {
192                         result = item;
193                     }
194                     possibleMatchesAfter.add(item);
195                 }
196             }
197             return result;
198         }
199 
contains(CharSequence text)200         public boolean contains(CharSequence text) {
201             return data.containsKey(text);
202         }
203 
get(CharSequence text)204         public T get(CharSequence text) {
205             return data.get(text);
206         }
207 
208         // public static class GeqGetter<K> {
209         // private Set<K> set;
210         // private Iterator<K> iterator;
211         // private Comparator<? super K> comparator;
212         // boolean done;
213         // K lastItem = null;
214         // private int count;
215         //
216         // public GeqGetter(Set<K> set, Comparator<K> comparator) {
217         // this.comparator = comparator;
218         // this.set = set;
219         // reset();
220         // }
221         //
222         // public GeqGetter reset() {
223         // iterator = set.iterator();
224         // done = false;
225         // return this;
226         // }
227         //
228         // /**
229         // * Returns least element greater than or equal to probe.
230         // * @param probe
231         // * @return
232         // */
233         // public K getGeq(K probe) {
234         // if (lastItem != null && comparator.compare(lastItem, probe) >= 0) {
235         // return lastItem;
236         // }
237         // count = 0;
238         // while (iterator.hasNext()) {
239         // lastItem = iterator.next();
240         // ++count;
241         // if (comparator.compare(lastItem, probe) >= 0) {
242         // return lastItem;
243         // }
244         // }
245         // lastItem = null;
246         // return lastItem;
247         // }
248         // }
249 
250         @Override
getDictionary()251         public Dictionary<T> getDictionary() {
252             // TODO Auto-generated method stub
253             return SimpleDictionary.this;
254         }
255 
256         // public static class CharSequenceComparator implements Comparator<CharSequence> {
257         //
258         // public int compare(CharSequence first, CharSequence second) {
259         // int minLen = first.length();
260         // if (minLen > second.length())
261         // minLen = second.length();
262         // int result;
263         // for (int i = 0; i < minLen; ++i) {
264         // if (0 != (result = first.charAt(i) - second.charAt(i)))
265         // return result;
266         // }
267         // return first.length() - second.length();
268         // }
269         //
270         // }
271     }
272 
startsWith(CharSequence first, CharSequence possiblePrefix)273     public static boolean startsWith(CharSequence first, CharSequence possiblePrefix) {
274         if (first.length() < possiblePrefix.length()) {
275             return false;
276         }
277         for (int i = 0; i < possiblePrefix.length(); ++i) {
278             if (first.charAt(i) != possiblePrefix.charAt(i)) {
279                 return false;
280             }
281         }
282         return true;
283     }
284 
285 }