1 /* 2 ********************************************************************** 3 * Copyright (c) 2006-2007, Google and others. All Rights Reserved. 4 ********************************************************************** 5 * Author: Mark Davis 6 ********************************************************************** 7 */ 8 package org.unicode.cldr.util; 9 10 import java.util.Collections; 11 import java.util.Iterator; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 18 import org.unicode.cldr.util.Dictionary.Matcher.Status; 19 20 /** 21 * This is a simple dictionary class used for testing. Should be in the package usertest, but it's a pain to rename 22 * files in CVS. 23 * 24 * @author markdavis 25 */ 26 public class SimpleDictionary<T> extends Dictionary<T> { 27 private TreeMap<CharSequence, T> data = new TreeMap<>(); 28 private Set<CharSequence> possibleMatchesBefore; 29 private Set<CharSequence> possibleMatchesAfter; 30 private Status finalStatus; 31 boolean done; 32 private int matchCount; 33 private CharSequence lastEntry = ""; 34 35 public static class SimpleDictionaryBuilder<T> implements DictionaryBuilder<T> { 36 37 @Override make(Map<CharSequence, T> source)38 public SimpleDictionary<T> make(Map<CharSequence, T> source) { 39 return new SimpleDictionary(source); 40 } 41 42 } 43 SimpleDictionary(Map<CharSequence, T> source)44 private SimpleDictionary(Map<CharSequence, T> source) { 45 for (CharSequence text : source.keySet()) { 46 addMapping(text, source.get(text)); 47 } 48 } 49 50 // private List<T> results; 51 // 52 // public static <U> Dictionary<U> getInstance(Map<CharSequence, U> source) { 53 // SimpleDictionary<U> result = new SimpleDictionary<U>(); 54 // // if T is not an integer, then get the results, and assign each a number 55 // Map<U,Integer> valueToInt = new HashMap<U,Integer>(); 56 // result.results = new ArrayList<U>(); 57 // int count = 0; 58 // for (U value : source.values()) { 59 // Integer oldValue = valueToInt.get(value); 60 // if (oldValue == null) { 61 // result.results.add(value); 62 // valueToInt.put(value, count++); 63 // } 64 // } 65 // 66 // 67 // for (CharSequence text : source.keySet()) { 68 // result.addMapping(text, valueToInt.get(source.get(text))); 69 // } 70 // return result; 71 // } 72 addMapping(CharSequence text, T result)73 private void addMapping(CharSequence text, T result) { 74 if (CharUtilities.compare(text, lastEntry) <= 0) { 75 throw new IllegalArgumentException("Each string must be greater than the previous one."); 76 } 77 lastEntry = text; 78 data.put(text, result); 79 } 80 81 @Override getMapping()82 public Iterator<Entry<CharSequence, T>> getMapping() { 83 return Collections.unmodifiableMap(data).entrySet().iterator(); 84 } 85 86 @Override getMatcher()87 public Matcher<T> getMatcher() { 88 return new SimpleMatcher(); 89 } 90 91 private class SimpleMatcher extends Matcher<T> { 92 93 @Override setOffset(int offset)94 public Matcher<T> setOffset(int offset) { 95 possibleMatchesBefore = data.keySet(); 96 done = false; 97 matchValue = null; 98 return super.setOffset(offset); 99 } 100 101 /** 102 * Dumb implementation. 103 * 104 */ 105 @Override next()106 public Status next() { 107 // There are two degenerate cases: our dictionary is empty, or we are called on an empty string. 108 109 // As long as we get matches, we return them. 110 // When we fail, we return one of two statuses 111 // DONE if there were no more matches in the dictionary past the last match 112 // SINGLE if there was a longer match, plus the longest offset that we successfully got to. 113 114 // if we have already narrowed down to the end, just return the status 115 // everything should already be set to make this work. 116 if (done) { 117 if (finalStatus == Status.NONE) { 118 matchValue = null; 119 } 120 return finalStatus; 121 } 122 123 CharSequence firstMatch = null; 124 125 while (text.hasCharAt(matchEnd)) { 126 // get the next probe value 127 ++matchEnd; 128 CharSequence probe = text.subSequence(offset, matchEnd); 129 130 // narrow to the items that start with the probe 131 // this filters Before into After 132 133 firstMatch = filterToStartsWith(probe); 134 135 // if we have a full match, return it 136 137 if (firstMatch != null && firstMatch.length() == probe.length()) { 138 possibleMatchesAfter.remove(firstMatch); 139 possibleMatchesBefore = possibleMatchesAfter; 140 matchValue = data.get(firstMatch); 141 finalStatus = Status.NONE; 142 return Status.MATCH; 143 } 144 145 // See if we've run out 146 // example: probe = "man" 147 // three cases, based on what was in the set 148 // {man}: return DONE (we did a match before) 149 // {man, many}: return SINGLE 150 // {man, many, manner}: return PLURAL 151 // {many}: return SINGLE 152 if (possibleMatchesAfter.size() == 0) { 153 --matchEnd; // backup 154 break; 155 } 156 possibleMatchesBefore = possibleMatchesAfter; 157 } 158 // no more work to be done. 159 done = true; 160 161 if (matchEnd == offset || possibleMatchesBefore.size() == 0) { 162 matchValue = null; 163 return finalStatus = Status.NONE; 164 } 165 if (firstMatch == null) { // just in case we skipped the above loop 166 firstMatch = possibleMatchesBefore.iterator().next(); 167 } 168 matchValue = data.get(firstMatch); 169 matchCount = possibleMatchesBefore.size(); 170 return finalStatus = Status.PARTIAL; 171 } 172 173 @Override nextUniquePartial()174 public boolean nextUniquePartial() { 175 // we have already set the matchValue, so we don't need to reset here. 176 return matchCount == 1; 177 } 178 179 /** 180 * Returns the first matching item, if there is one, and 181 * filters the rest of the list to those that match the probe. 182 * 183 * @param probe 184 * @return 185 */ filterToStartsWith(CharSequence probe)186 private CharSequence filterToStartsWith(CharSequence probe) { 187 CharSequence result = null; 188 possibleMatchesAfter = new TreeSet<>(); 189 for (CharSequence item : possibleMatchesBefore) { 190 if (startsWith(item, probe)) { 191 if (result == null) { 192 result = item; 193 } 194 possibleMatchesAfter.add(item); 195 } 196 } 197 return result; 198 } 199 contains(CharSequence text)200 public boolean contains(CharSequence text) { 201 return data.containsKey(text); 202 } 203 get(CharSequence text)204 public T get(CharSequence text) { 205 return data.get(text); 206 } 207 208 // public static class GeqGetter<K> { 209 // private Set<K> set; 210 // private Iterator<K> iterator; 211 // private Comparator<? super K> comparator; 212 // boolean done; 213 // K lastItem = null; 214 // private int count; 215 // 216 // public GeqGetter(Set<K> set, Comparator<K> comparator) { 217 // this.comparator = comparator; 218 // this.set = set; 219 // reset(); 220 // } 221 // 222 // public GeqGetter reset() { 223 // iterator = set.iterator(); 224 // done = false; 225 // return this; 226 // } 227 // 228 // /** 229 // * Returns least element greater than or equal to probe. 230 // * @param probe 231 // * @return 232 // */ 233 // public K getGeq(K probe) { 234 // if (lastItem != null && comparator.compare(lastItem, probe) >= 0) { 235 // return lastItem; 236 // } 237 // count = 0; 238 // while (iterator.hasNext()) { 239 // lastItem = iterator.next(); 240 // ++count; 241 // if (comparator.compare(lastItem, probe) >= 0) { 242 // return lastItem; 243 // } 244 // } 245 // lastItem = null; 246 // return lastItem; 247 // } 248 // } 249 250 @Override getDictionary()251 public Dictionary<T> getDictionary() { 252 // TODO Auto-generated method stub 253 return SimpleDictionary.this; 254 } 255 256 // public static class CharSequenceComparator implements Comparator<CharSequence> { 257 // 258 // public int compare(CharSequence first, CharSequence second) { 259 // int minLen = first.length(); 260 // if (minLen > second.length()) 261 // minLen = second.length(); 262 // int result; 263 // for (int i = 0; i < minLen; ++i) { 264 // if (0 != (result = first.charAt(i) - second.charAt(i))) 265 // return result; 266 // } 267 // return first.length() - second.length(); 268 // } 269 // 270 // } 271 } 272 startsWith(CharSequence first, CharSequence possiblePrefix)273 public static boolean startsWith(CharSequence first, CharSequence possiblePrefix) { 274 if (first.length() < possiblePrefix.length()) { 275 return false; 276 } 277 for (int i = 0; i < possiblePrefix.length(); ++i) { 278 if (first.charAt(i) != possiblePrefix.charAt(i)) { 279 return false; 280 } 281 } 282 return true; 283 } 284 285 }