1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.Preconditions.checkArgument; 4 import static com.google.common.base.Preconditions.checkNotNull; 5 import static com.google.common.collect.ImmutableSet.toImmutableSet; 6 import static org.unicode.cldr.api.CldrDataType.LDML; 7 8 import java.io.File; 9 import java.io.IOException; 10 import java.io.UncheckedIOException; 11 import java.nio.file.Files; 12 import java.nio.file.Path; 13 import java.util.Set; 14 import java.util.function.Predicate; 15 import java.util.stream.Stream; 16 17 import org.unicode.cldr.api.CldrData.PrefixVisitor; 18 import org.unicode.cldr.api.CldrData.ValueVisitor; 19 import org.unicode.cldr.util.CLDRFile; 20 import org.unicode.cldr.util.Factory; 21 import org.unicode.cldr.util.SimpleFactory; 22 23 import com.google.common.collect.ImmutableSet; 24 import com.google.common.collect.ImmutableSetMultimap; 25 import com.google.common.collect.LinkedHashMultimap; 26 import com.google.common.collect.Multimap; 27 28 /** 29 * The main API for accessing {@link CldrPath} and {@link CldrValue} instances for CLDR data. This 30 * API abstracts the data sources, file names and other implementation details of CLDR to provide 31 * a clean way to access CLDR data. 32 * 33 * <p>{@code CldrData} instances are obtained from an appropriate {@code CldrDataSupplier}, and 34 * accept a {@link ValueVisitor} or {@link PrefixVisitor} to iterate over the data. 35 * 36 * <p>For example the following code prints every value (including its associated distinguishing 37 * path) in the BCP-47 data in DTD order: 38 * <pre>{@code 39 * CldrDataSupplier supplier = CldrDataSupplier.forFilesIn(rootDir); 40 * CldrData bcp47Data = supplier.getDataForType(CldrDataType.BCP47); 41 * bcp47Data.accept(PathOrder.DTD, System.out::println); 42 * }</pre> 43 * 44 * <p>Note that while the paths of values visited in a single {@link CldrData} instance are unique, 45 * there is nothing to prevent duplication between multiple data sources. This is particularly 46 * important when considering "ordered" elements with a sort index, since it represents "encounter 47 * order" and so any merging of values would have to track and rewrite sort indices carefully. It 48 * is recommended that if multiple {@code CldrData} instances are to be processed, users ensure 49 * that no path prefixes be shared between them. See also {@link CldrPath#getSortIndex()}. 50 * 51 * <p>Note that because the distinguishing paths associated with a {@link CldrValue} are unique per 52 * visitation, the special "version" path/value must be omitted (e.g. "//ldml/version") since it 53 * would otherwise appear multiple times. This should be fine, since the version is always available 54 * via {@link #getCldrVersionString()} and this mechanism is scheduled for deprecation anyway. 55 */ 56 public abstract class CldrDataSupplier { 57 /** 58 * Returns the current CLDR version string (e.g. {@code "36"}). This is just wrapping the 59 * underlying CLDR version string to avoid users needing to import anything from outside the 60 * "api" package. 61 */ getCldrVersionString()62 public static String getCldrVersionString() { 63 return CLDRFile.GEN_VERSION; 64 } 65 66 /** Options for controlling how locale-based LDML data is processed. */ 67 public enum CldrResolution { 68 /** 69 * Locale-based CLDR data should include resolved values from other "parent" locales 70 * according to the CLDR specification. 71 */ 72 RESOLVED, 73 74 /** 75 * Locale-based CLDR data should only include values specified directly in the specified 76 * locale. 77 */ 78 UNRESOLVED 79 } 80 81 /** 82 * Returns a supplier for CLDR data in the specified CLDR project root directory. This must be 83 * a directory which contains the standard CLDR {@code "common"} directory file hierarchy. 84 * 85 * @param cldrRootDir the root directory of a CLDR project containing the data to be read. 86 * @return a supplier for CLDR data in the given path. 87 */ forCldrFilesIn(Path cldrRootDir)88 public static CldrDataSupplier forCldrFilesIn(Path cldrRootDir) { 89 // Note that, unlike "withDraftStatusAtLeast()", adding a new fluent method to support 90 // additional root directories is problematic, since: 91 // 1) directories are conceptually only important for FileBasedDataSupplier (so a new 92 // fluent method in the supplier API makes no sense for other implementations). 93 // 2) creating the directory map must happen before the supplier is returned (rather than 94 // just before it supplies any data) because of the getAvailableLocaleIds() method. 95 // 96 // Thus it seems better to just add an extra parameter to this method when/if needed. 97 // TODO: Extend the API to allow source roots to be specified (but not via directory name). 98 Set<String> rootDirs = ImmutableSet.of("common"); 99 return new FileBasedDataSupplier( 100 createCldrDirectoryMap(cldrRootDir, rootDirs), CldrDraftStatus.UNCONFIRMED); 101 } 102 103 /** 104 * Returns an unresolved CLDR data instance of a set of XML file. This is typically only used 105 * for accessing additional CLDR data outside the CLDR project directories. The data in the 106 * specified files is merged, and it is a error if the same path appears multiple times (i.e. 107 * this input file must be "disjoint" in terms of the CLDR paths they specify). 108 * 109 * @param type the expected CLDR type of the data in the XML file. 110 * @param draftStatus the desired status for filtering paths/values. 111 * @param xmlFiles the CLDR XML files. 112 * @return a data instance for the paths/values in the specified XML file. 113 */ forCldrFiles( CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles)114 public static CldrData forCldrFiles( 115 CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles) { 116 return new XmlDataSource(type, ImmutableSet.copyOf(xmlFiles), draftStatus); 117 } 118 createCldrDirectoryMap( Path cldrRootDir, Set<String> rootDirs)119 private static Multimap<CldrDataType, Path> createCldrDirectoryMap( 120 Path cldrRootDir, Set<String> rootDirs) { 121 122 LinkedHashMultimap<CldrDataType, Path> multimap = LinkedHashMultimap.create(); 123 for (CldrDataType type : CldrDataType.values()) { 124 type.getSourceDirectories() 125 .flatMap(d -> rootDirs.stream().map(r -> cldrRootDir.resolve(r).resolve(d))) 126 .filter(Files::isDirectory) 127 .forEach(p -> multimap.put(type, p)); 128 } 129 return multimap; 130 } 131 132 /** 133 * Returns an in-memory supplier for the specified {@link CldrValue}s. This is useful for 134 * testing or handling special case data. The default (arbitrary) path order is determined by 135 * the order of values passed to this method. 136 * 137 * @param values the values (and associated paths) to include in the returned data. 138 */ forValues(Iterable<CldrValue> values)139 public static CldrData forValues(Iterable<CldrValue> values) { 140 return new InMemoryData(values); 141 } 142 143 /** 144 * Returns a modified data supplier which only provides paths/values with a draft status at or 145 * above the specified value. To create a supplier that will process all CLDR paths/values, use 146 * {@link CldrDraftStatus#UNCONFIRMED UNCONFIRMED}. 147 * 148 * @param draftStatus the desired status for filtering paths/values. 149 * @return a modified supplier which filters by the specified status. 150 */ withDraftStatusAtLeast(CldrDraftStatus draftStatus)151 public abstract CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus); 152 153 /** 154 * Returns an LDML data instance for the specified locale ID. 155 * 156 * <p>If {@code resolution} is set to {@link CldrResolution#RESOLVED RESOLVED} then values 157 * inferred from parent locales and aliases will be produced by the supplier. Note that if an 158 * unsupported locale ID is given (i.e. one not in the set returned by 159 * {@link #getAvailableLocaleIds()}), then an empty data instance is returned. 160 * 161 * @param localeId the locale ID (e.g. "en_GB" or "root") for the returned data. 162 * @param resolution whether to resolve CLDR values for the given locale ID according to the 163 * CLDR specification. 164 * @return the specified locale based CLDR data (possibly empty). 165 * @throws IllegalArgumentException if the locale ID is not structurally valid. 166 */ getDataForLocale(String localeId, CldrResolution resolution)167 public abstract CldrData getDataForLocale(String localeId, CldrResolution resolution); 168 169 /** 170 * Returns an unmodifiable set of available locale IDs that this supplier can provide. This 171 * need not be ordered. 172 * 173 * @return the set of available locale IDs. 174 */ getAvailableLocaleIds()175 public abstract Set<String> getAvailableLocaleIds(); 176 177 /** 178 * Returns a data supplier for non-locale specific CLDR data of the given type. 179 * 180 * @param type the required non-{@link CldrDataType#LDML LDML} data type. 181 * @return the specified non-locale based CLDR data. 182 * @throws IllegalArgumentException if {@link CldrDataType#LDML} is given. 183 */ getDataForType(CldrDataType type)184 public abstract CldrData getDataForType(CldrDataType type); 185 186 private static final class FileBasedDataSupplier extends CldrDataSupplier { 187 private final ImmutableSetMultimap<CldrDataType, Path> directoryMap; 188 private final CldrDraftStatus draftStatus; 189 190 // Created on-demand to keep constructor simple (in a fluent API you might create several 191 // variants of a supplier but only get data from one, or only use non-LDML XML data). 192 private Factory factory = null; 193 FileBasedDataSupplier( Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus)194 private FileBasedDataSupplier( 195 Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus) { 196 this.directoryMap = ImmutableSetMultimap.copyOf(directoryMap); 197 this.draftStatus = checkNotNull(draftStatus); 198 } 199 200 // Locking should be no issue, since contention on these supplier instance is expected to 201 // be minimal. getFactory()202 private synchronized Factory getFactory() { 203 if (factory == null) { 204 File[] dirArray = 205 getDirectoriesForType(LDML).map(Path::toFile).toArray(File[]::new); 206 checkArgument(dirArray.length > 0, 207 "no LDML directories exist: %s", directoryMap.get(LDML)); 208 factory = SimpleFactory.make(dirArray, ".*", draftStatus.getRawStatus()); 209 } 210 return factory; 211 } 212 213 @Override withDraftStatusAtLeast(CldrDraftStatus draftStatus)214 public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) { 215 return new FileBasedDataSupplier(directoryMap, draftStatus); 216 } 217 218 @Override getDataForLocale(String localeId, CldrResolution resolution)219 public CldrData getDataForLocale(String localeId, CldrResolution resolution) { 220 LocaleIds.checkCldrLocaleId(localeId); 221 Factory factory = getFactory(); 222 if (factory.getAvailable().contains(localeId)) { 223 return new CldrFileDataSource( 224 factory.make(localeId, resolution == CldrResolution.RESOLVED)); 225 } 226 return NO_DATA; 227 } 228 229 @Override getAvailableLocaleIds()230 public Set<String> getAvailableLocaleIds() { 231 return getFactory().getAvailable(); 232 } 233 234 @Override getDataForType(CldrDataType type)235 public CldrData getDataForType(CldrDataType type) { 236 ImmutableSet<Path> xmlFiles = listXmlFilesForType(type); 237 if (!xmlFiles.isEmpty()) { 238 return new XmlDataSource(type, xmlFiles, draftStatus); 239 } 240 return NO_DATA; 241 } 242 getDirectoriesForType(CldrDataType type)243 private Stream<Path> getDirectoriesForType(CldrDataType type) { 244 return directoryMap.get(type).stream().filter(Files::exists); 245 } 246 listXmlFilesForType(CldrDataType type)247 private ImmutableSet<Path> listXmlFilesForType(CldrDataType type) { 248 ImmutableSet<Path> xmlFiles = getDirectoriesForType(type) 249 .flatMap(FileBasedDataSupplier::listXmlFiles) 250 .collect(toImmutableSet()); 251 checkArgument(!xmlFiles.isEmpty(), 252 "no XML files exist within directories: %s", directoryMap.get(type)); 253 return xmlFiles; 254 } 255 256 // This is a separate function because stream functions cannot throw checked exceptions. 257 // 258 // Note: "Files.walk()" warns about closing resources and suggests "try-with-resources" to 259 // ensure closure, "flatMap()" (which is what calls this method) is defined to call close() 260 // on each stream as it's added into the result, so in normal use this should all be fine. 261 // 262 // https://docs.oracle.com/javase/8/docs/api/java/util/stream/Stream.html#flatMap-java.util.function.Function- listXmlFiles(Path dir)263 private static Stream<Path> listXmlFiles(Path dir) { 264 try { 265 return Files.walk(dir).filter(IS_XML_FILE); 266 } catch (IOException e) { 267 throw new UncheckedIOException(e); 268 } 269 } 270 271 private static final Predicate<Path> IS_XML_FILE = 272 p -> Files.isRegularFile(p) && p.getFileName().toString().endsWith(".xml"); 273 } 274 275 private static final CldrData NO_DATA = new CldrData() { 276 @Override public void accept(PathOrder order, ValueVisitor visitor) {} 277 278 @Override public void accept(PathOrder order, PrefixVisitor visitor) {} 279 280 @Override public CldrValue get(CldrPath path) { 281 return null; 282 } 283 }; 284 } 285