• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkNotNull;
6 
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.function.BiConsumer;
10 import java.util.function.BiFunction;
11 import java.util.function.Consumer;
12 import java.util.function.Function;
13 import java.util.function.Supplier;
14 
15 import org.unicode.cldr.api.CldrData;
16 import org.unicode.cldr.api.CldrData.PathOrder;
17 import org.unicode.cldr.api.CldrData.PrefixVisitor;
18 import org.unicode.cldr.api.CldrData.PrefixVisitor.Context;
19 import org.unicode.cldr.api.CldrPath;
20 import org.unicode.cldr.api.CldrValue;
21 import org.unicode.cldr.api.PathMatcher;
22 
23 import com.google.common.collect.ImmutableList;
24 import com.google.common.collect.Lists;
25 
26 /**
27  * An immutable processor which can be configured to process CLDR data according to a series of
28  * mappings from CLDR paths to "actions".
29  *
30  * <p>In typical use a processor would be statically created to bind paths and handler functions
31  * (actions) together, and calling {@link CldrDataProcessor#process(CldrData, Object, PathOrder)}
32  * once for each {@link CldrData} instance.
33  *
34  * <p>A processor is built by adding a mixture of "actions" to a builder. An action either defines
35  * how to handle a single value (see {@link SubProcessor#addValueAction addValueAction()}) or how
36  * to start a new sub-processor at a specific point in the data hierarchy (see {@link
37  * SubProcessor#addAction addAction()} or {@link SubProcessor#addSubprocessor addSubprocessor()}).
38  *
39  * @param <T> the main "state" type used by the processor for the top-level processing.
40  */
41 public class CldrDataProcessor<T> {
42     /** Returns a processor builder which operates on a "state" of type {@code <T>}. */
builder()43     public static <T> Builder<T> builder() {
44         return new Builder<>();
45     }
46 
47     /**
48      * A builder for processing a CLDR data sub-hierarchy.
49      *
50      * @param <T> the "state" type used by the processor.
51      */
52     public static abstract class SubProcessor<T> {
53         final List<PrefixBuilder<?, T>> prefixActions = new ArrayList<>();
54         final List<ValueAction<T>> valueActions = new ArrayList<>();
55 
SubProcessor()56         private SubProcessor() { }
57 
58         /**
59          * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
60          * for the sub-hierarchy.
61          *
62          * <p>This method is intended for cases where the subtype state does not depend on the
63          * parent state or the path prefix, but needs some post-processing. For example, the
64          * subtype state might just be a {@code List} and the elements added to it must be
65          * combined with the parent state after sub-hierarchy is processing is complete.
66          *
67          * <pre>{@code
68          * processor
69          *     .addAction("//parent/path", ArrayList::new, ParentState::addValues)
70          *     .addValueAction("value/suffix", List::add);
71          * }</pre>
72          *
73          * @param pattern the path pattern for the prefix where sub-processing starts.
74          * @param newStateFn a supplier of subtype state instances for each sub-processing step.
75          * @param doneFn called after each sub-processing step.
76          */
addAction( String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn)77         public <S> SubProcessor<S> addAction(
78             String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn) {
79             return addAction(pattern, (t, p) -> newStateFn.get(), doneFn);
80         }
81 
82         /**
83          * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
84          * for the sub-hierarchy.
85          *
86          * <p>This method is similar to {@link #addAction(String, Supplier, BiConsumer)} but is
87          * intended for cases where the subtype state depends on the parent path prefix.
88          *
89          * <pre>{@code
90          * processor
91          *     .addAction("//parent/path[@type=*]", SubState::fromType, ParentState::addSubState)
92          *     .addValueAction("value/suffix", SubState::collectValue);
93          * }</pre>
94          *
95          * @param pattern the path pattern for the prefix where sub-processing starts.
96          * @param newStateFn a supplier of subtype state instances for each sub-processing step.
97          * @param doneFn called after each sub-processing step.
98          */
addAction( String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)99         public <S> SubProcessor<S> addAction(
100             String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn) {
101             return addAction(pattern, (t, p) -> newStateFn.apply(p), doneFn);
102         }
103 
104         /**
105          * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
106          * for the sub-hierarchy.
107          *
108          * <p>This method is intended for the case where the subtype state is derived from the
109          * parent state (e.g. an inner class) but does not depend on the path prefix at which the
110          * sub-hierarchy is rooted.
111          *
112          * <pre>{@code
113          * processor
114          *     .addAction("//parent/path", ParentState::newValueCollector)
115          *     .addValueAction("value/suffix", ValueCollector::addValue);
116          * }</pre>
117          *
118          * @param pattern the path pattern for the prefix where sub-processing starts.
119          * @param newStateFn a supplier of subtype state instances for each sub-processing step.
120          */
addAction(String pattern, Function<T, S> newStateFn)121         public <S> SubProcessor<S> addAction(String pattern, Function<T, S> newStateFn) {
122             return addAction(pattern, (t, p) -> newStateFn.apply(t));
123         }
124 
125         /**
126          * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
127          * for the sub-hierarchy.
128          *
129          * <p>This method is intended for the case where the subtype state is derived from the
130          * parent state (e.g. an inner class) and the path prefix at which the sub-hierarchy is
131          * rooted.
132          *
133          * <pre>{@code
134          * processor
135          *     .addAction("//parent/path[@type=*]", ParentState::newCollectorOfType)
136          *     .addValueAction("value/suffix", ValueCollector::addValue);
137          * }</pre>
138          *
139          * @param pattern the path pattern for the prefix where sub-processing starts.
140          * @param newStateFn a supplier of subtype state instances for each sub-processing step.
141          */
addAction( String pattern, BiFunction<T, CldrPath, S> newStateFn)142         public <S> SubProcessor<S> addAction(
143             String pattern, BiFunction<T, CldrPath, S> newStateFn) {
144             return addAction(pattern, newStateFn, (t, y) -> {});
145         }
146 
147         /**
148          * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder
149          * for the sub-hierarchy.
150          *
151          * <p>This method is the most general purpose way to add a sub-hierarchy action and is
152          * intended for the most complex cases, where subtype state depends on parent state and
153          * path prefix, and post processing is required. All other implementations of {@code
154          * addAction} simply delegate to this one in one way or another.
155          *
156          * <pre>{@code
157          * processor
158          *     .addAction("//parent/path[@type=*]", ParentState::newCollector, ParentState::done)
159          *     .addValueAction("value/suffix", ValueCollector::addValue);
160          * }</pre>
161          *
162          * @param pattern the path pattern for the prefix where sub-processing starts.
163          * @param newStateFn a supplier of subtype state instances for each sub-processing step.
164          * @param doneFn called after each sub-processing step.
165          */
addAction( String pattern, BiFunction<T, CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)166         public <S> SubProcessor<S> addAction(
167             String pattern,
168             BiFunction<T, CldrPath, S> newStateFn,
169             BiConsumer<T, ? super S> doneFn) {
170 
171             PrefixBuilder<S, T> action =
172                 new PrefixBuilder<>(getMatcher(pattern), newStateFn, doneFn);
173             prefixActions.add(action);
174             return action;
175         }
176 
177         /**
178          * Returns a new sub-processor for the specified sub-hierarchy rooted at the given
179          * {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
180          * the parent.
181          *
182          * <p>This method is intended for the case where multiple sub-processors are needed below
183          * a certain point in the hierarchy, but they all operate on the same state instance.
184          *
185          * <pre>{@code
186          * SubBuilder<MyCollector> subprocessor = processor.addSubprocessor("//parent/path");
187          * subprocessor.addValueAction("value/suffix", MyCollector::addValue);
188          * subprocessor.addValueAction("other/suffix", MyCollector::addOtherValue);
189          * }</pre>
190          *
191          * @param pattern the path pattern for the prefix where sub-processing starts.
192          */
addSubprocessor(String pattern)193         public SubProcessor<T> addSubprocessor(String pattern) {
194             return addAction(pattern, (t, p) -> t);
195         }
196 
197         /**
198          * Returns a new sub-processor for the specified sub-hierarchy rooted at the given
199          * {@link PathMatcher} prefix pattern. The new processor builder has the same state type as
200          * the parent.
201          *
202          * <p>This method is intended for the case where a some setup is required before a
203          * sub-hierarchy is processed, but the sub-processor state is the same.
204          *
205          * <pre>{@code
206          * SubBuilder<MyCollector> subprocessor = processor
207          *     .addSubprocessor("//parent/path", MyCollector::startFn)
208          *     .addValueAction("value/suffix", MyCollector::addValue);
209          * }</pre>
210          *
211          * @param startFn a handler called when sub-processing begins
212          * @param pattern the path pattern for the prefix where sub-processing starts.
213          */
addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn)214         public SubProcessor<T> addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn) {
215             return addAction(pattern, (t, p) -> {
216                 startFn.accept(t, p);
217                 return t;
218             });
219         }
220 
221         /**
222          * Adds an action to handle {@link CldrValue}s found in the current sub-hierarchy
223          * visitation which match the given {@link PathMatcher} leaf-path pattern.
224          *
225          * <p>This method is expected to be called at least once for each sub-hierarchy processor
226          * in order to handle the actual CLDR values being processed, and the path pattern should
227          * match leaf-paths in the CLDR data hierarchy, rather than path prefixes.
228          *
229          * <p>Multiple value actions can be added to a sub-hierarchy processor, and paths are
230          * matched in the order the actions are added. It is also possible to mix sub-hierarchy
231          * actions and value actions on the same processor, but note that sub-hierarchy processors
232          * will take precedence, so you cannot try to match the same value in both a sub-hierarchy
233          * processor and a value action.
234          *
235          * For example:
236          * <pre>{@code
237          * processor
238          *     .addAction("//parent/path", ...)
239          *     .addValueAction("value/suffix", ...);
240          * // This will never match any values since the sub-hierarchy processor takes precedence!
241          * processor.addValueAction("//parent/path/value/suffix", ...);
242          * }</pre>
243          *
244          * @param pattern the CLDR path suffix idenifying the values to be processed.
245          * @param doFn the action to be carried out for each value.
246          */
addValueAction(String pattern, BiConsumer<T, CldrValue> doFn)247         public void addValueAction(String pattern, BiConsumer<T, CldrValue> doFn) {
248             valueActions.add(new ValueAction<>(getMatcher(pattern), doFn));
249         }
250 
getMatcher(String pattern)251         abstract PathMatcher getMatcher(String pattern);
252     }
253 
254     /**
255      * A root builder of a CLDR data processor.
256      *
257      * @param <T> the processor state type.
258      */
259     public static final class Builder<T> extends SubProcessor<T> {
Builder()260         private Builder() { }
261 
262         /** Returns the immutable CLDR data processor. */
build()263         public CldrDataProcessor<T> build() {
264             return new CldrDataProcessor<>(
265                 Lists.transform(prefixActions, PrefixBuilder::build), valueActions);
266         }
267 
268         @Override
getMatcher(String pattern)269         PathMatcher getMatcher(String pattern) {
270             return PathMatcher.of(pattern);
271         }
272     }
273 
274     /**
275      * A sub-hierarchy data processor rooted at some specified path prefix.
276      *
277      * @param <S> the subtype processor state.
278      * @param <T> the parent processor state.
279      */
280     private static class PrefixBuilder<S, T> extends SubProcessor<S> {
281         private final PathMatcher matcher;
282         private final BiFunction<T, CldrPath, S> newStateFn;
283         private final BiConsumer<T, ? super S> doneFn;
284 
PrefixBuilder( PathMatcher matcher, BiFunction<T, CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)285         PrefixBuilder(
286             PathMatcher matcher,
287             BiFunction<T, CldrPath, S> newStateFn,
288             BiConsumer<T, ? super S> doneFn) {
289             this.matcher = checkNotNull(matcher);
290             this.newStateFn = checkNotNull(newStateFn);
291             this.doneFn = checkNotNull(doneFn);
292         }
293 
build()294         PrefixAction<S, T> build() {
295             List<PrefixAction<?, S>> actions = Lists.transform(prefixActions, PrefixBuilder::build);
296             return new PrefixAction<>(actions, valueActions, matcher, newStateFn, doneFn);
297         }
298 
getMatcher(String pattern)299         @Override PathMatcher getMatcher(String pattern) {
300             return matcher.withSuffix(pattern);
301         }
302     }
303 
304     private final ImmutableList<PrefixAction<?, T>> prefixActions;
305     private final ImmutableList<ValueAction<T>> valueActions;
306 
CldrDataProcessor( List<PrefixAction<?, T>> prefixActions, List<ValueAction<T>> valueActions)307     private CldrDataProcessor(
308         List<PrefixAction<?, T>> prefixActions,
309         List<ValueAction<T>> valueActions) {
310         this.prefixActions = ImmutableList.copyOf(prefixActions);
311         this.valueActions = ImmutableList.copyOf(valueActions);
312     }
313 
314     /**
315      * Processes a CLDR data instance according to the actions registered for this processor in DTD
316      * order. This method is preferred over {@link #process(CldrData, Object, PathOrder)} and
317      * eventually the ability to even specify a path order for processing will be removed.
318      *
319      * <p>This is the main method used to drive the processing of some CLDR data and is typically
320      * used like:
321      *
322      * <pre>{@code
323      * MyResult result = CLDR_PROCESSOR.process(data, new MyResult(), DTD);
324      * }</pre>
325      * <p>or:*
326      * <pre>{@code
327      * MyResult result = CLDR_PROCESSOR.process(data, MyResult.newBuilder(), DTD).build();
328      * }</pre>
329      *
330      * @param data the CLDR data to be processed.
331      * @param state an instance of the "primary" state.
332      * @return the given primary state (after modification).
333      */
process(CldrData data, T state)334     public T process(CldrData data, T state) {
335         return process(data, state, PathOrder.DTD);
336     }
337 
338     /**
339      * Processes a CLDR data instance according to the actions registered for this processor.
340      * Callers should prefer using {@link #process(CldrData, Object)} whenever possible and avoid
341      * relying on path ordering for processing.
342      *
343      * @param data the CLDR data to be processed.
344      * @param state an instance of the "primary" state.
345      * @param pathOrder the order in which CLDR paths should be visited.
346      * @return the given primary state (after modification).
347      */
process(CldrData data, T state, PathOrder pathOrder)348     public T process(CldrData data, T state, PathOrder pathOrder) {
349         data.accept(pathOrder, new DispatchingVisitor<>(this, state, s -> {}));
350         return state;
351     }
352 
dispatchPrefixActions(T state, CldrPath prefix, Context context)353     private void dispatchPrefixActions(T state, CldrPath prefix, Context context) {
354         for (PrefixAction<?, T> a : prefixActions) {
355             if (a.matches(state, prefix, context)) {
356                 break;
357             }
358         }
359     }
360 
dispatchValueActions(T state, CldrValue value)361     private void dispatchValueActions(T state, CldrValue value) {
362         for (ValueAction<T> a : valueActions) {
363             if (a.matches(state, value)) {
364                 break;
365             }
366         }
367     }
368 
369     /*
370      * Implementation notes:
371      *
372      * "PrefixAction" is a critical part of the design of the path visitor. It acts as a bridge
373      * between the parent visitation (with state type 'T') and child visitation (state type 'S').
374      *
375      * It is the only class to need to know about both types. Both types are known when the
376      * CldrDataProcessor is made, but during visitation the caller of the "matches" method doesn't
377      * need to know about the child type, which is why the parent can just have a list of
378      * "PrefixAction<?, T>" and don't need any magical recasting.
379      *
380      * It might only be a few lines of code, but it can only exist in a class which knows about
381      * both parent and child types (obtaining a new child state is a function of the parent state).
382      */
383     static final class PrefixAction<S, T> extends CldrDataProcessor<S> {
384         private final PathMatcher matcher;
385         private final BiFunction<T, CldrPath, S> newStateFn;
386         private final BiConsumer<T, ? super S> doneFn;
387 
PrefixAction( List<PrefixAction<?, S>> prefixActions, List<ValueAction<S>> valueActions, PathMatcher matcher, BiFunction<T, CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)388         PrefixAction(
389             List<PrefixAction<?, S>> prefixActions,
390             List<ValueAction<S>> valueActions,
391             PathMatcher matcher,
392             BiFunction<T, CldrPath, S> newStateFn,
393             BiConsumer<T, ? super S> doneFn) {
394             super(prefixActions, valueActions);
395             this.matcher = checkNotNull(matcher);
396             this.newStateFn = checkNotNull(newStateFn);
397             this.doneFn = checkNotNull(doneFn);
398         }
399 
matches(T state, CldrPath prefix, Context context)400         public boolean matches(T state, CldrPath prefix, Context context) {
401             if (matcher.locallyMatches(prefix)) {
402                 Consumer<S> doneFn = childState -> this.doneFn.accept(state, childState);
403                 context.install(
404                     new DispatchingVisitor<>(this, newStateFn.apply(state, prefix), doneFn),
405                     DispatchingVisitor::done);
406                 return true;
407             }
408             return false;
409         }
410     }
411 
412     private static final class ValueAction<T> {
413         private final PathMatcher matcher;
414         private BiConsumer<T, CldrValue> doFn;
415 
ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn)416         ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn) {
417             this.matcher = checkNotNull(matcher);
418             this.doFn = checkNotNull(doFn);
419         }
420 
matches(T state, CldrValue value)421         boolean matches(T state, CldrValue value) {
422             if (matcher.locallyMatches(value.getPath())) {
423                 doFn.accept(state, value);
424                 return true;
425             }
426             return false;
427         }
428     }
429 
430     private static final class DispatchingVisitor<T> implements PrefixVisitor {
431         CldrDataProcessor<T> processor;
432         private final T state;
433         private final Consumer<T> doneFn;
434 
DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn)435         DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn) {
436             this.processor = checkNotNull(processor);
437             this.state = checkNotNull(state);
438             this.doneFn = checkNotNull(doneFn);
439         }
440 
441         @Override
visitPrefixStart(CldrPath prefix, Context context)442         public void visitPrefixStart(CldrPath prefix, Context context) {
443             processor.dispatchPrefixActions(state, prefix, context);
444         }
445 
446         @Override
visitValue(CldrValue value)447         public void visitValue(CldrValue value) {
448             processor.dispatchValueActions(state, value);
449         }
450 
451         // Important: This is NOT visitPrefixEnd() since that happens multiple times and isn't
452         // going to be called for the prefix at which this visitor was started.
done()453         void done() {
454             doneFn.accept(state);
455         }
456     }
457 }
458