1 package org.unicode.cldr.test; 2 3 import java.io.DataInputStream; 4 import java.io.File; 5 import java.io.FileNotFoundException; 6 import java.io.IOException; 7 import java.io.InputStream; 8 import java.lang.ref.Reference; 9 import java.lang.ref.SoftReference; 10 import java.util.Collections; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Map; 14 import java.util.Set; 15 16 import org.unicode.cldr.tool.CldrVersion; 17 import org.unicode.cldr.util.CLDRConfig; 18 import org.unicode.cldr.util.CLDRFile; 19 import org.unicode.cldr.util.CldrUtility; 20 import org.unicode.cldr.util.Factory; 21 import org.unicode.cldr.util.InputStreamFactory; 22 import org.unicode.cldr.util.Pair; 23 import org.unicode.cldr.util.PathHeader; 24 import org.unicode.cldr.util.RegexLookup; 25 import org.unicode.cldr.util.StringId; 26 27 import com.ibm.icu.util.ICUUncheckedIOException; 28 29 /** 30 * This class should be used to detect when a path should be included in the set 31 * of outdated items, because the value in the locale has not changed since the 32 * last time the English changed. For efficiency, it only keeps a record of 33 * those values in trunk that are out of date. 34 * <p> 35 * That is, to get the set of outdated values, the caller should do the following: 36 * <ol> 37 * <li>Test to see if the user has voted for a value for the path. If so, don't include. 38 * <li>Test to see if the winning value for the path is different from the trunk value. If so, don't include. 39 * <li>Test with isOutdated(path) to see if the trunk value was outdated. If not, don't include. 40 * <li>Otherwise, include this path in the set of outdated items. 41 * </ol> 42 * <p> 43 * To update the data file, use GenerateBirth.java. 44 */ 45 public class OutdatedPaths { 46 public static String FORMAT_KEY = "odp-1"; 47 public static final String NO_VALUE = "�"; 48 49 public static final String OUTDATED_DIR = "births/"; 50 public static final String OUTDATED_ENGLISH_DATA = "outdatedEnglish.data"; 51 public static final String OUTDATED_DATA = "outdated.data"; 52 53 private static final boolean DEBUG = CldrUtility.getProperty("OutdatedPathsDebug", false); 54 55 private final Map<String, Set<Long>> localeToData = new HashMap<>(); 56 private final Map<Long, Pair<CldrVersion,String>> pathToBirthNPrevious = new HashMap<>(); 57 58 /** 59 * Creates a new OutdatedPaths, using the data file "outdated.data" in the same directory as this class. 60 * 61 * @param version 62 */ OutdatedPaths()63 public OutdatedPaths() { 64 this(null); 65 } 66 67 /** 68 * Loads the data from the specified directory, using the data file "outdated.data". 69 * 70 * @param directory 71 */ OutdatedPaths(String directory)72 public OutdatedPaths(String directory) { 73 Map<Long, PathHeader> id2header = new HashMap<>(); // for debugging 74 75 readLocaleToPaths(directory, id2header); 76 77 // now previous English 78 79 readBirthValues(directory, id2header, pathToBirthNPrevious); 80 } 81 readLocaleToPaths(String directory, Map<Long, PathHeader> id2header)82 private void readLocaleToPaths(String directory, Map<Long, PathHeader> id2header) { 83 try { 84 DataInputStream dataIn = openDataInput(directory, OUTDATED_DATA); 85 String key = dataIn.readUTF(); 86 if (!OutdatedPaths.FORMAT_KEY.equals(key)) { 87 throw new IllegalArgumentException("Mismatch in FORMAT_KEY: expected=" + OutdatedPaths.FORMAT_KEY + ", read=" + key); 88 } 89 if (DEBUG) { 90 Factory factory = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 91 id2header = getIdToPath(factory); 92 } 93 while (true) { 94 String locale = dataIn.readUTF(); 95 if (locale.equals("$END$")) { 96 break; 97 } 98 if (DEBUG) { 99 System.out.println("OutdatedPaths: Locale: " + locale); 100 } 101 final HashSet<Long> data = new HashSet<>(); 102 int size = dataIn.readInt(); 103 for (int i = 0; i < size; ++i) { 104 long item = dataIn.readLong(); 105 data.add(item); 106 if (DEBUG) { 107 System.out.println(locale + "\t" + id2header.get(item)); 108 } 109 } 110 localeToData.put(locale, Collections.unmodifiableSet(data)); 111 } 112 dataIn.close(); 113 } catch (IOException e) { 114 throw new ICUUncheckedIOException("Data Not Available", e); 115 } 116 } 117 readBirthValues(String outdatedDirectory, Map<Long, PathHeader> id2header, Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2)118 public static void readBirthValues(String outdatedDirectory, Map<Long, PathHeader> id2header, 119 Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2) { 120 try { 121 DataInputStream dataIn = openDataInput(outdatedDirectory, OUTDATED_ENGLISH_DATA); 122 String key = dataIn.readUTF(); 123 if (!OutdatedPaths.FORMAT_KEY.equals(key)) { 124 throw new IllegalArgumentException("Mismatch in FORMAT_KEY: expected=" + OutdatedPaths.FORMAT_KEY + ", read=" + key); 125 } 126 127 int size = dataIn.readInt(); 128 if (DEBUG) { 129 System.out.println("English Data"); 130 } 131 for (int i = 0; i < size; ++i) { 132 long pathId = dataIn.readLong(); 133 String previous = dataIn.readUTF(); 134 CldrVersion birth = CldrVersion.from(dataIn.readUTF()); 135 136 if (DEBUG) { 137 System.out.println("en\t(" + previous + ")" 138 + (id2header == null ? "" : "\t" + id2header.get(pathId))); 139 } 140 pathToBirthNPrevious2.put(pathId, Pair.of(birth,previous).freeze()); 141 } 142 String finalCheck = dataIn.readUTF(); 143 if (!finalCheck.equals("$END$")) { 144 throw new IllegalArgumentException("Corrupted " + OUTDATED_ENGLISH_DATA); 145 } 146 dataIn.close(); 147 } catch (IOException e) { 148 throw new ICUUncheckedIOException("Data Not Available", e); 149 } 150 } 151 getIdToPath(Factory factory)152 public Map<Long, PathHeader> getIdToPath(Factory factory) { 153 Map<Long, PathHeader> result = new HashMap<>(); 154 CLDRFile english = factory.make("en", true); 155 PathHeader.Factory pathHeaders = PathHeader.getFactory(english); 156 for (String s : english) { 157 long id = StringId.getId(s); 158 PathHeader pathHeader = pathHeaders.fromPath(s); 159 result.put(id, pathHeader); 160 } 161 return result; 162 } 163 openDataInput(String directory, String filename)164 private static DataInputStream openDataInput(String directory, String filename) throws FileNotFoundException { 165 String dataFileName = filename; 166 InputStream fileInputStream = directory == null 167 ? CldrUtility.getInputStream(OUTDATED_DIR + dataFileName) : 168 //: new FileInputStream(new File(directory, dataFileName)); 169 InputStreamFactory.createInputStream(new File(directory, dataFileName)); 170 DataInputStream dataIn = new DataInputStream(fileInputStream); 171 return dataIn; 172 } 173 174 /** 175 * Returns true if the value for the path is outdated in trunk. See class 176 * description for more info. 177 * 178 * @param distinguishedPath 179 * @return true if the string is outdated 180 */ isOutdated(String locale, String distinguishedPath)181 public boolean isOutdated(String locale, String distinguishedPath) { 182 Set<Long> data = localeToData.get(locale); 183 if (data == null) { 184 return false; 185 } 186 long id = StringId.getId(distinguishedPath); 187 boolean result = data.contains(id); 188 if (result == false) { 189 return false; 190 } 191 Boolean toSkip = SKIP_PATHS.get(distinguishedPath); 192 if (toSkip != null) { 193 return false; 194 } 195 return result; 196 } 197 198 /** 199 * The same as isOutdated, but also returns paths that aren't skipped. 200 * 201 * @param locale 202 * @param distinguishedPath 203 * @return 204 */ isRawOutdated(String locale, String distinguishedPath)205 public boolean isRawOutdated(String locale, String distinguishedPath) { 206 Set<Long> data = localeToData.get(locale); 207 if (data == null) { 208 return false; 209 } 210 long id = StringId.getId(distinguishedPath); 211 return data.contains(id); 212 } 213 214 /** 215 * Is this path to be skipped? (because the English is normally irrelevant). 216 * 217 * @param distinguishedPath 218 * @return 219 */ isSkipped(String distinguishedPath)220 public boolean isSkipped(String distinguishedPath) { 221 return SKIP_PATHS.get(distinguishedPath) != null; 222 } 223 224 /** 225 * Returns true if the value for the path is outdated in trunk. See class 226 * description for more info. 227 * 228 * @param distinguishedPath 229 * @return true if the string is outdated 230 */ getPreviousEnglish(String distinguishedPath)231 public String getPreviousEnglish(String distinguishedPath) { 232 long id = StringId.getId(distinguishedPath); 233 Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id); 234 return value == null ? null : value.getSecond(); 235 } 236 getEnglishBirth(String distinguishedPath)237 public CldrVersion getEnglishBirth(String distinguishedPath) { 238 long id = StringId.getId(distinguishedPath); 239 Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id); 240 return value == null ? null : value.getFirst(); 241 } 242 243 static RegexLookup<Boolean> SKIP_PATHS = new RegexLookup<Boolean>() 244 .add("/exemplarCharacters", true) 245 .add("/references", true) 246 .add("/delimiters/[^/]*uotation", true) 247 .add("/posix", true) 248 .add("/pattern", true) 249 .add("/fields/field[^/]*/displayName", true) 250 .add("/dateFormatItem", true) 251 .add("/numbers/symbols", true) 252 .add("/fallback", true) 253 .add("/quarters", true) 254 .add("/months", true); 255 256 /** 257 * Returns the number of outdated paths. 258 * 259 * @param locale 260 * @return number of outdated paths. 261 */ countOutdated(String locale)262 public int countOutdated(String locale) { 263 Set<Long> data = localeToData.get(locale); 264 return data == null ? 0 : data.size(); 265 } 266 getInstance()267 public static OutdatedPaths getInstance() { 268 OutdatedPaths outdatedPaths = SINGLETON.get(); 269 if (outdatedPaths == null) { 270 outdatedPaths = new OutdatedPaths(); 271 SINGLETON = new SoftReference<>(outdatedPaths); 272 } 273 return outdatedPaths; 274 } 275 276 private static Reference<OutdatedPaths> SINGLETON = new SoftReference<>(null); 277 } 278