1 package org.unicode.cldr.test; 2 3 import java.io.DataInputStream; 4 import java.io.File; 5 import java.io.FileNotFoundException; 6 import java.io.IOException; 7 import java.io.InputStream; 8 import java.lang.ref.Reference; 9 import java.lang.ref.SoftReference; 10 import java.util.Collections; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Map; 14 import java.util.Set; 15 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CldrUtility; 19 import org.unicode.cldr.util.Factory; 20 import org.unicode.cldr.util.InputStreamFactory; 21 import org.unicode.cldr.util.PathHeader; 22 import org.unicode.cldr.util.RegexLookup; 23 import org.unicode.cldr.util.StringId; 24 25 import com.ibm.icu.util.ICUUncheckedIOException; 26 27 /** 28 * This class should be used to detect when a path should be included in the set 29 * of outdated items, because the value in the locale has not changed since the 30 * last time the English changed. For efficiency, it only keeps a record of 31 * those values in trunk that are out of date. 32 * <p> 33 * That is, to get the set of outdated values, the caller should do the following: 34 * <ol> 35 * <li>Test to see if the user has voted for a value for the path. If so, don't include. 36 * <li>Test to see if the winning value for the path is different from the trunk value. If so, don't include. 37 * <li>Test with isOutdated(path) to see if the trunk value was outdated. If not, don't include. 38 * <li>Otherwise, include this path in the set of outdated items. 39 * </ol> 40 * <p> 41 * To update the data file, use GenerateBirth.java. 42 */ 43 public class OutdatedPaths { 44 45 public static final String OUTDATED_DIR = "births/"; 46 public static final String OUTDATED_ENGLISH_DATA = "outdatedEnglish.data"; 47 public static final String OUTDATED_DATA = "outdated.data"; 48 49 private static final boolean DEBUG = CldrUtility.getProperty("OutdatedPathsDebug", false); 50 51 private final HashMap<String, Set<Long>> localeToData = new HashMap<String, Set<Long>>(); 52 private final HashMap<Long, String> pathToPrevious = new HashMap<Long, String>(); 53 54 /** 55 * Creates a new OutdatedPaths, using the data file "outdated.data" in the same directory as this class. 56 * 57 * @param version 58 */ OutdatedPaths()59 public OutdatedPaths() { 60 this(null); 61 } 62 63 /** 64 * Loads the data from the specified directory, using the data file "outdated.data". 65 * 66 * @param directory 67 */ OutdatedPaths(String directory)68 public OutdatedPaths(String directory) { 69 try { 70 DataInputStream dataIn = openDataInput(directory, OUTDATED_DATA); 71 Map<Long, PathHeader> id2header = new HashMap<Long, PathHeader>(); 72 if (DEBUG) { 73 Factory factory = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 74 id2header = getIdToPath(factory); 75 } 76 while (true) { 77 String locale = dataIn.readUTF(); 78 if (locale.equals("$END$")) { 79 break; 80 } 81 if (DEBUG) { 82 System.out.println("OutdatedPaths: Locale: " + locale); 83 } 84 final HashSet<Long> data = new HashSet<Long>(); 85 int size = dataIn.readInt(); 86 for (int i = 0; i < size; ++i) { 87 long item = dataIn.readLong(); 88 data.add(item); 89 if (DEBUG) { 90 System.out.println(locale + "\t" + id2header.get(item)); 91 } 92 } 93 localeToData.put(locale, Collections.unmodifiableSet(data)); 94 } 95 dataIn.close(); 96 97 // now previous English 98 99 dataIn = openDataInput(directory, OUTDATED_ENGLISH_DATA); 100 int size = dataIn.readInt(); 101 if (DEBUG) { 102 System.out.println("English Data"); 103 } 104 for (int i = 0; i < size; ++i) { 105 long pathId = dataIn.readLong(); 106 String previous = dataIn.readUTF(); 107 if (DEBUG) { 108 System.out.println("en\t(" + previous + ")\t" + id2header.get(pathId)); 109 } 110 pathToPrevious.put(pathId, previous); 111 } 112 String finalCheck = dataIn.readUTF(); 113 if (!finalCheck.equals("$END$")) { 114 throw new IllegalArgumentException("Corrupted " + OUTDATED_ENGLISH_DATA); 115 } 116 dataIn.close(); 117 118 } catch (IOException e) { 119 throw new ICUUncheckedIOException("Data Not Available", e); 120 } 121 } 122 getIdToPath(Factory factory)123 public Map<Long, PathHeader> getIdToPath(Factory factory) { 124 Map<Long, PathHeader> result = new HashMap<Long, PathHeader>(); 125 CLDRFile english = factory.make("en", true); 126 PathHeader.Factory pathHeaders = PathHeader.getFactory(english); 127 for (String s : english) { 128 long id = StringId.getId(s); 129 PathHeader pathHeader = pathHeaders.fromPath(s); 130 result.put(id, pathHeader); 131 } 132 return result; 133 } 134 135 @SuppressWarnings("resource") openDataInput(String directory, String filename)136 private DataInputStream openDataInput(String directory, String filename) throws FileNotFoundException { 137 String dataFileName = filename; 138 InputStream fileInputStream = directory == null 139 ? CldrUtility.getInputStream(OUTDATED_DIR + dataFileName) : 140 //: new FileInputStream(new File(directory, dataFileName)); 141 InputStreamFactory.createInputStream(new File(directory, dataFileName)); 142 DataInputStream dataIn = new DataInputStream(fileInputStream); 143 return dataIn; 144 } 145 146 /** 147 * Returns true if the value for the path is outdated in trunk. See class 148 * description for more info. 149 * 150 * @param distinguishedPath 151 * @return true if the string is outdated 152 */ isOutdated(String locale, String distinguishedPath)153 public boolean isOutdated(String locale, String distinguishedPath) { 154 Set<Long> data = localeToData.get(locale); 155 if (data == null) { 156 return false; 157 } 158 long id = StringId.getId(distinguishedPath); 159 boolean result = data.contains(id); 160 if (result == false) { 161 return false; 162 } 163 Boolean toSkip = SKIP_PATHS.get(distinguishedPath); 164 if (toSkip != null) { 165 return false; 166 } 167 return result; 168 } 169 170 /** 171 * The same as isOutdated, but also returns paths that aren't skipped. 172 * 173 * @param locale 174 * @param distinguishedPath 175 * @return 176 */ isRawOutdated(String locale, String distinguishedPath)177 public boolean isRawOutdated(String locale, String distinguishedPath) { 178 Set<Long> data = localeToData.get(locale); 179 if (data == null) { 180 return false; 181 } 182 long id = StringId.getId(distinguishedPath); 183 return data.contains(id); 184 } 185 186 /** 187 * Is this path to be skipped? (because the English is normally irrelevant). 188 * 189 * @param distinguishedPath 190 * @return 191 */ isSkipped(String distinguishedPath)192 public boolean isSkipped(String distinguishedPath) { 193 return SKIP_PATHS.get(distinguishedPath) != null; 194 } 195 196 /** 197 * Returns true if the value for the path is outdated in trunk. See class 198 * description for more info. 199 * 200 * @param distinguishedPath 201 * @return true if the string is outdated 202 */ getPreviousEnglish(String distinguishedPath)203 public String getPreviousEnglish(String distinguishedPath) { 204 long id = StringId.getId(distinguishedPath); 205 return pathToPrevious.get(id); 206 } 207 208 static RegexLookup<Boolean> SKIP_PATHS = new RegexLookup<Boolean>() 209 .add("/exemplarCharacters", true) 210 .add("/references", true) 211 .add("/delimiters/[^/]*uotation", true) 212 .add("/posix", true) 213 .add("/pattern", true) 214 .add("/fields/field[^/]*/displayName", true) 215 .add("/dateFormatItem", true) 216 .add("/numbers/symbols", true) 217 .add("/fallback", true) 218 .add("/quarters", true) 219 .add("/months", true); 220 221 /** 222 * Returns the number of outdated paths. 223 * 224 * @param locale 225 * @return number of outdated paths. 226 */ countOutdated(String locale)227 public int countOutdated(String locale) { 228 Set<Long> data = localeToData.get(locale); 229 return data == null ? 0 : data.size(); 230 } 231 getInstance()232 public static OutdatedPaths getInstance() { 233 OutdatedPaths outdatedPaths = SINGLETON.get(); 234 if (outdatedPaths == null) { 235 outdatedPaths = new OutdatedPaths(); 236 SINGLETON = new SoftReference<OutdatedPaths>(outdatedPaths); 237 } 238 return outdatedPaths; 239 } 240 241 private static Reference<OutdatedPaths> SINGLETON = new SoftReference<OutdatedPaths>(null); 242 } 243