• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.api;
2 
3 import static com.google.common.base.Preconditions.checkArgument;
4 import static com.google.common.base.Preconditions.checkNotNull;
5 import static com.google.common.collect.ImmutableSet.toImmutableSet;
6 import static org.unicode.cldr.api.CldrDataType.LDML;
7 
8 import java.io.File;
9 import java.io.IOException;
10 import java.io.UncheckedIOException;
11 import java.nio.file.Files;
12 import java.nio.file.Path;
13 import java.util.Set;
14 import java.util.function.Predicate;
15 import java.util.stream.Stream;
16 
17 import org.unicode.cldr.api.CldrData.PrefixVisitor;
18 import org.unicode.cldr.api.CldrData.ValueVisitor;
19 import org.unicode.cldr.util.CLDRFile;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.SimpleFactory;
22 
23 import com.google.common.collect.ImmutableSet;
24 import com.google.common.collect.ImmutableSetMultimap;
25 import com.google.common.collect.LinkedHashMultimap;
26 import com.google.common.collect.Multimap;
27 
28 /**
29  * The main API for accessing {@link CldrPath} and {@link CldrValue} instances for CLDR data. This
30  * API abstracts the data sources, file names and other implementation details of CLDR to provide
31  * a clean way to access CLDR data.
32  *
33  * <p>{@code CldrData} instances are obtained from an appropriate {@code CldrDataSupplier}, and
34  * accept a {@link ValueVisitor} or {@link PrefixVisitor} to iterate over the data.
35  *
36  * <p>For example the following code prints every value (including its associated distinguishing
37  * path) in the BCP-47 data in DTD order:
38  * <pre>{@code
39  *   CldrDataSupplier supplier = CldrDataSupplier.forFilesIn(rootDir);
40  *   CldrData bcp47Data = supplier.getDataForType(CldrDataType.BCP47);
41  *   bcp47Data.accept(PathOrder.DTD, System.out::println);
42  * }</pre>
43  *
44  * <p>Note that while the paths of values visited in a single {@link CldrData} instance are unique,
45  * there is nothing to prevent duplication between multiple data sources. This is particularly
46  * important when considering "ordered" elements with a sort index, since it represents "encounter
47  * order" and so any merging of values would have to track and rewrite sort indices carefully. It
48  * is recommended that if multiple {@code CldrData} instances are to be processed, users ensure
49  * that no path prefixes be shared between them. See also {@link CldrPath#getSortIndex()}.
50  *
51  * <p>Note that because the distinguishing paths associated with a {@link CldrValue} are unique per
52  * visitation, the special "version" path/value must be omitted (e.g. "//ldml/version") since it
53  * would otherwise appear multiple times. This should be fine, since the version is always available
54  * via {@link #getCldrVersionString()} and this mechanism is scheduled for deprecation anyway.
55  */
56 public abstract class CldrDataSupplier {
57     /**
58      * Returns the current CLDR version string (e.g. {@code "36"}). This is just wrapping the
59      * underlying CLDR version string to avoid users needing to import anything from outside the
60      * "api" package.
61      */
getCldrVersionString()62     public static String getCldrVersionString() {
63         return CLDRFile.GEN_VERSION;
64     }
65 
66     /** Options for controlling how locale-based LDML data is processed. */
67     public enum CldrResolution {
68         /**
69          * Locale-based CLDR data should include resolved values from other "parent" locales
70          * according to the CLDR specification.
71          */
72         RESOLVED,
73 
74         /**
75          * Locale-based CLDR data should only include values specified directly in the specified
76          * locale.
77          */
78         UNRESOLVED
79     }
80 
81     /**
82      * Returns a supplier for CLDR data in the specified CLDR project root directory. This must be
83      * a directory which contains the standard CLDR {@code "common"} directory file hierarchy.
84      *
85      * @param cldrRootDir the root directory of a CLDR project containing the data to be read.
86      * @return a supplier for CLDR data in the given path.
87      */
forCldrFilesIn(Path cldrRootDir)88     public static CldrDataSupplier forCldrFilesIn(Path cldrRootDir) {
89         // Note that, unlike "withDraftStatusAtLeast()", adding a new fluent method to support
90         // additional root directories is problematic, since:
91         // 1) directories are conceptually only important for FileBasedDataSupplier (so a new
92         //    fluent method in the supplier API makes no sense for other implementations).
93         // 2) creating the directory map must happen before the supplier is returned (rather than
94         //    just before it supplies any data) because of the getAvailableLocaleIds() method.
95         //
96         // Thus it seems better to just add an extra parameter to this method when/if needed.
97         // TODO: Extend the API to allow source roots to be specified (but not via directory name).
98         Set<String> rootDirs = ImmutableSet.of("common");
99         return new FileBasedDataSupplier(
100             createCldrDirectoryMap(cldrRootDir, rootDirs), CldrDraftStatus.UNCONFIRMED);
101     }
102 
103     /**
104      * Returns an unresolved CLDR data instance of a set of XML file. This is typically only used
105      * for accessing additional CLDR data outside the CLDR project directories. The data in the
106      * specified files is merged, and it is a error if the same path appears multiple times (i.e.
107      * this input file must be "disjoint" in terms of the CLDR paths they specify).
108      *
109      * @param type the expected CLDR type of the data in the XML file.
110      * @param draftStatus the desired status for filtering paths/values.
111      * @param xmlFiles the CLDR XML files.
112      * @return a data instance for the paths/values in the specified XML file.
113      */
forCldrFiles( CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles)114     public static CldrData forCldrFiles(
115         CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles) {
116         return new XmlDataSource(type, ImmutableSet.copyOf(xmlFiles), draftStatus);
117     }
118 
createCldrDirectoryMap( Path cldrRootDir, Set<String> rootDirs)119     private static Multimap<CldrDataType, Path> createCldrDirectoryMap(
120         Path cldrRootDir, Set<String> rootDirs) {
121 
122         LinkedHashMultimap<CldrDataType, Path> multimap = LinkedHashMultimap.create();
123         for (CldrDataType type : CldrDataType.values()) {
124             type.getSourceDirectories()
125                 .flatMap(d -> rootDirs.stream().map(r -> cldrRootDir.resolve(r).resolve(d)))
126                 .filter(Files::isDirectory)
127                 .forEach(p -> multimap.put(type, p));
128         }
129         return multimap;
130     }
131 
132     /**
133      * Returns an in-memory supplier for the specified {@link CldrValue}s. This is useful for
134      * testing or handling special case data. The default (arbitrary) path order is determined by
135      * the order of values passed to this method.
136      *
137      * @param values the values (and associated paths) to include in the returned data.
138      */
forValues(Iterable<CldrValue> values)139     public static CldrData forValues(Iterable<CldrValue> values) {
140         return new InMemoryData(values);
141     }
142 
143     /**
144      * Returns a modified data supplier which only provides paths/values with a draft status at or
145      * above the specified value. To create a supplier that will process all CLDR paths/values, use
146      * {@link CldrDraftStatus#UNCONFIRMED UNCONFIRMED}.
147      *
148      * @param draftStatus the desired status for filtering paths/values.
149      * @return a modified supplier which filters by the specified status.
150      */
withDraftStatusAtLeast(CldrDraftStatus draftStatus)151     public abstract CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus);
152 
153     /**
154      * Returns an LDML data instance for the specified locale ID.
155      *
156      * <p>If {@code resolution} is set to {@link CldrResolution#RESOLVED RESOLVED} then values
157      * inferred from parent locales and aliases will be produced by the supplier. Note that if an
158      * unsupported locale ID is given (i.e. one not in the set returned by
159      * {@link #getAvailableLocaleIds()}), then an empty data instance is returned.
160      *
161      * @param localeId the locale ID (e.g. "en_GB" or "root") for the returned data.
162      * @param resolution whether to resolve CLDR values for the given locale ID according to the
163      *     CLDR specification.
164      * @return the specified locale based CLDR data (possibly empty).
165      * @throws IllegalArgumentException if the locale ID is not structurally valid.
166      */
getDataForLocale(String localeId, CldrResolution resolution)167     public abstract CldrData getDataForLocale(String localeId, CldrResolution resolution);
168 
169     /**
170      * Returns an unmodifiable set of available locale IDs that this supplier can provide. This
171      * need not be ordered.
172      *
173      * @return the set of available locale IDs.
174      */
getAvailableLocaleIds()175     public abstract Set<String> getAvailableLocaleIds();
176 
177     /**
178      * Returns a data supplier for non-locale specific CLDR data of the given type.
179      *
180      * @param type the required non-{@link CldrDataType#LDML LDML} data type.
181      * @return the specified non-locale based CLDR data.
182      * @throws IllegalArgumentException if {@link CldrDataType#LDML} is given.
183      */
getDataForType(CldrDataType type)184     public abstract CldrData getDataForType(CldrDataType type);
185 
186     private static final class FileBasedDataSupplier extends CldrDataSupplier {
187         private final ImmutableSetMultimap<CldrDataType, Path> directoryMap;
188         private final CldrDraftStatus draftStatus;
189 
190         // Created on-demand to keep constructor simple (in a fluent API you might create several
191         // variants of a supplier but only get data from one, or only use non-LDML XML data).
192         private Factory factory = null;
193 
FileBasedDataSupplier( Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus)194         private FileBasedDataSupplier(
195             Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus) {
196             this.directoryMap = ImmutableSetMultimap.copyOf(directoryMap);
197             this.draftStatus = checkNotNull(draftStatus);
198         }
199 
200         // Locking should be no issue, since contention on these supplier instance is expected to
201         // be minimal.
getFactory()202         private synchronized Factory getFactory() {
203             if (factory == null) {
204                 File[] dirArray =
205                     getDirectoriesForType(LDML).map(Path::toFile).toArray(File[]::new);
206                 checkArgument(dirArray.length > 0,
207                     "no LDML directories exist: %s", directoryMap.get(LDML));
208                 factory = SimpleFactory.make(dirArray, ".*", draftStatus.getRawStatus());
209             }
210             return factory;
211         }
212 
213         @Override
withDraftStatusAtLeast(CldrDraftStatus draftStatus)214         public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
215             return new FileBasedDataSupplier(directoryMap, draftStatus);
216         }
217 
218         @Override
getDataForLocale(String localeId, CldrResolution resolution)219         public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
220             LocaleIds.checkCldrLocaleId(localeId);
221             Factory factory = getFactory();
222             if (factory.getAvailable().contains(localeId)) {
223                 return new CldrFileDataSource(
224                     factory.make(localeId, resolution == CldrResolution.RESOLVED));
225             }
226             return NO_DATA;
227         }
228 
229         @Override
getAvailableLocaleIds()230         public Set<String> getAvailableLocaleIds() {
231             return getFactory().getAvailable();
232         }
233 
234         @Override
getDataForType(CldrDataType type)235         public CldrData getDataForType(CldrDataType type) {
236             ImmutableSet<Path> xmlFiles = listXmlFilesForType(type);
237             if (!xmlFiles.isEmpty()) {
238                 return new XmlDataSource(type, xmlFiles, draftStatus);
239             }
240             return NO_DATA;
241         }
242 
getDirectoriesForType(CldrDataType type)243         private Stream<Path> getDirectoriesForType(CldrDataType type) {
244             return directoryMap.get(type).stream().filter(Files::exists);
245         }
246 
listXmlFilesForType(CldrDataType type)247         private ImmutableSet<Path> listXmlFilesForType(CldrDataType type) {
248             ImmutableSet<Path> xmlFiles = getDirectoriesForType(type)
249                 .flatMap(FileBasedDataSupplier::listXmlFiles)
250                 .collect(toImmutableSet());
251             checkArgument(!xmlFiles.isEmpty(),
252                 "no XML files exist within directories: %s", directoryMap.get(type));
253             return xmlFiles;
254         }
255 
256         // This is a separate function because stream functions cannot throw checked exceptions.
257         //
258         // Note: "Files.walk()" warns about closing resources and suggests "try-with-resources" to
259         // ensure closure, "flatMap()" (which is what calls this method) is defined to call close()
260         // on each stream as it's added into the result, so in normal use this should all be fine.
261         //
262         // https://docs.oracle.com/javase/8/docs/api/java/util/stream/Stream.html#flatMap-java.util.function.Function-
listXmlFiles(Path dir)263         private static Stream<Path> listXmlFiles(Path dir) {
264             try {
265                 return Files.walk(dir).filter(IS_XML_FILE);
266             } catch (IOException e) {
267                 throw new UncheckedIOException(e);
268             }
269         }
270 
271         private static final Predicate<Path> IS_XML_FILE =
272             p -> Files.isRegularFile(p) && p.getFileName().toString().endsWith(".xml");
273     }
274 
275     private static final CldrData NO_DATA = new CldrData() {
276         @Override public void accept(PathOrder order, ValueVisitor visitor) {}
277 
278         @Override public void accept(PathOrder order, PrefixVisitor visitor) {}
279 
280         @Override public CldrValue get(CldrPath path) {
281             return null;
282         }
283     };
284 }
285