1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkNotNull; 6 7 import java.util.ArrayList; 8 import java.util.List; 9 import java.util.function.BiConsumer; 10 import java.util.function.BiFunction; 11 import java.util.function.Consumer; 12 import java.util.function.Function; 13 import java.util.function.Supplier; 14 15 import org.unicode.cldr.api.CldrData; 16 import org.unicode.cldr.api.CldrData.PathOrder; 17 import org.unicode.cldr.api.CldrData.PrefixVisitor; 18 import org.unicode.cldr.api.CldrData.PrefixVisitor.Context; 19 import org.unicode.cldr.api.CldrPath; 20 import org.unicode.cldr.api.CldrValue; 21 import org.unicode.cldr.api.PathMatcher; 22 23 import com.google.common.collect.ImmutableList; 24 import com.google.common.collect.Lists; 25 26 /** 27 * An immutable processor which can be configured to process CLDR data according to a series of 28 * mappings from CLDR paths to "actions". 29 * 30 * <p>In typical use a processor would be statically created to bind paths and handler functions 31 * (actions) together, and calling {@link CldrDataProcessor#process(CldrData, Object, PathOrder)} 32 * once for each {@link CldrData} instance. 33 * 34 * <p>A processor is built by adding a mixture of "actions" to a builder. An action either defines 35 * how to handle a single value (see {@link SubProcessor#addValueAction addValueAction()}) or how 36 * to start a new sub-processor at a specific point in the data hierarchy (see {@link 37 * SubProcessor#addAction addAction()} or {@link SubProcessor#addSubprocessor addSubprocessor()}). 38 * 39 * @param <T> the main "state" type used by the processor for the top-level processing. 40 */ 41 public class CldrDataProcessor<T> { 42 /** Returns a processor builder which operates on a "state" of type {@code <T>}. */ builder()43 public static <T> Builder<T> builder() { 44 return new Builder<>(); 45 } 46 47 /** 48 * A builder for processing a CLDR data sub-hierarchy. 49 * 50 * @param <T> the "state" type used by the processor. 51 */ 52 public static abstract class SubProcessor<T> { 53 final List<PrefixBuilder<?, T>> prefixActions = new ArrayList<>(); 54 final List<ValueAction<T>> valueActions = new ArrayList<>(); 55 SubProcessor()56 private SubProcessor() { } 57 58 /** 59 * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder 60 * for the sub-hierarchy. 61 * 62 * <p>This method is intended for cases where the subtype state does not depend on the 63 * parent state or the path prefix, but needs some post-processing. For example, the 64 * subtype state might just be a {@code List} and the elements added to it must be 65 * combined with the parent state after sub-hierarchy is processing is complete. 66 * 67 * <pre>{@code 68 * processor 69 * .addAction("//parent/path", ArrayList::new, ParentState::addValues) 70 * .addValueAction("value/suffix", List::add); 71 * }</pre> 72 * 73 * @param pattern the path pattern for the prefix where sub-processing starts. 74 * @param newStateFn a supplier of subtype state instances for each sub-processing step. 75 * @param doneFn called after each sub-processing step. 76 */ addAction( String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn)77 public <S> SubProcessor<S> addAction( 78 String pattern, Supplier<S> newStateFn, BiConsumer<T, ? super S> doneFn) { 79 return addAction(pattern, (t, p) -> newStateFn.get(), doneFn); 80 } 81 82 /** 83 * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder 84 * for the sub-hierarchy. 85 * 86 * <p>This method is similar to {@link #addAction(String, Supplier, BiConsumer)} but is 87 * intended for cases where the subtype state depends on the parent path prefix. 88 * 89 * <pre>{@code 90 * processor 91 * .addAction("//parent/path[@type=*]", SubState::fromType, ParentState::addSubState) 92 * .addValueAction("value/suffix", SubState::collectValue); 93 * }</pre> 94 * 95 * @param pattern the path pattern for the prefix where sub-processing starts. 96 * @param newStateFn a supplier of subtype state instances for each sub-processing step. 97 * @param doneFn called after each sub-processing step. 98 */ addAction( String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)99 public <S> SubProcessor<S> addAction( 100 String pattern, Function<CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn) { 101 return addAction(pattern, (t, p) -> newStateFn.apply(p), doneFn); 102 } 103 104 /** 105 * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder 106 * for the sub-hierarchy. 107 * 108 * <p>This method is intended for the case where the subtype state is derived from the 109 * parent state (e.g. an inner class) but does not depend on the path prefix at which the 110 * sub-hierarchy is rooted. 111 * 112 * <pre>{@code 113 * processor 114 * .addAction("//parent/path", ParentState::newValueCollector) 115 * .addValueAction("value/suffix", ValueCollector::addValue); 116 * }</pre> 117 * 118 * @param pattern the path pattern for the prefix where sub-processing starts. 119 * @param newStateFn a supplier of subtype state instances for each sub-processing step. 120 */ addAction(String pattern, Function<T, S> newStateFn)121 public <S> SubProcessor<S> addAction(String pattern, Function<T, S> newStateFn) { 122 return addAction(pattern, (t, p) -> newStateFn.apply(t)); 123 } 124 125 /** 126 * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder 127 * for the sub-hierarchy. 128 * 129 * <p>This method is intended for the case where the subtype state is derived from the 130 * parent state (e.g. an inner class) and the path prefix at which the sub-hierarchy is 131 * rooted. 132 * 133 * <pre>{@code 134 * processor 135 * .addAction("//parent/path[@type=*]", ParentState::newCollectorOfType) 136 * .addValueAction("value/suffix", ValueCollector::addValue); 137 * }</pre> 138 * 139 * @param pattern the path pattern for the prefix where sub-processing starts. 140 * @param newStateFn a supplier of subtype state instances for each sub-processing step. 141 */ addAction( String pattern, BiFunction<T, CldrPath, S> newStateFn)142 public <S> SubProcessor<S> addAction( 143 String pattern, BiFunction<T, CldrPath, S> newStateFn) { 144 return addAction(pattern, newStateFn, (t, y) -> {}); 145 } 146 147 /** 148 * Binds a subtype action to a {@link PathMatcher} prefix pattern, returning a new builder 149 * for the sub-hierarchy. 150 * 151 * <p>This method is the most general purpose way to add a sub-hierarchy action and is 152 * intended for the most complex cases, where subtype state depends on parent state and 153 * path prefix, and post processing is required. All other implementations of {@code 154 * addAction} simply delegate to this one in one way or another. 155 * 156 * <pre>{@code 157 * processor 158 * .addAction("//parent/path[@type=*]", ParentState::newCollector, ParentState::done) 159 * .addValueAction("value/suffix", ValueCollector::addValue); 160 * }</pre> 161 * 162 * @param pattern the path pattern for the prefix where sub-processing starts. 163 * @param newStateFn a supplier of subtype state instances for each sub-processing step. 164 * @param doneFn called after each sub-processing step. 165 */ addAction( String pattern, BiFunction<T, CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)166 public <S> SubProcessor<S> addAction( 167 String pattern, 168 BiFunction<T, CldrPath, S> newStateFn, 169 BiConsumer<T, ? super S> doneFn) { 170 171 PrefixBuilder<S, T> action = 172 new PrefixBuilder<>(getMatcher(pattern), newStateFn, doneFn); 173 prefixActions.add(action); 174 return action; 175 } 176 177 /** 178 * Returns a new sub-processor for the specified sub-hierarchy rooted at the given 179 * {@link PathMatcher} prefix pattern. The new processor builder has the same state type as 180 * the parent. 181 * 182 * <p>This method is intended for the case where multiple sub-processors are needed below 183 * a certain point in the hierarchy, but they all operate on the same state instance. 184 * 185 * <pre>{@code 186 * SubBuilder<MyCollector> subprocessor = processor.addSubprocessor("//parent/path"); 187 * subprocessor.addValueAction("value/suffix", MyCollector::addValue); 188 * subprocessor.addValueAction("other/suffix", MyCollector::addOtherValue); 189 * }</pre> 190 * 191 * @param pattern the path pattern for the prefix where sub-processing starts. 192 */ addSubprocessor(String pattern)193 public SubProcessor<T> addSubprocessor(String pattern) { 194 return addAction(pattern, (t, p) -> t); 195 } 196 197 /** 198 * Returns a new sub-processor for the specified sub-hierarchy rooted at the given 199 * {@link PathMatcher} prefix pattern. The new processor builder has the same state type as 200 * the parent. 201 * 202 * <p>This method is intended for the case where a some setup is required before a 203 * sub-hierarchy is processed, but the sub-processor state is the same. 204 * 205 * <pre>{@code 206 * SubBuilder<MyCollector> subprocessor = processor 207 * .addSubprocessor("//parent/path", MyCollector::startFn) 208 * .addValueAction("value/suffix", MyCollector::addValue); 209 * }</pre> 210 * 211 * @param startFn a handler called when sub-processing begins 212 * @param pattern the path pattern for the prefix where sub-processing starts. 213 */ addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn)214 public SubProcessor<T> addSubprocessor(String pattern, BiConsumer<T, CldrPath> startFn) { 215 return addAction(pattern, (t, p) -> { 216 startFn.accept(t, p); 217 return t; 218 }); 219 } 220 221 /** 222 * Adds an action to handle {@link CldrValue}s found in the current sub-hierarchy 223 * visitation which match the given {@link PathMatcher} leaf-path pattern. 224 * 225 * <p>This method is expected to be called at least once for each sub-hierarchy processor 226 * in order to handle the actual CLDR values being processed, and the path pattern should 227 * match leaf-paths in the CLDR data hierarchy, rather than path prefixes. 228 * 229 * <p>Multiple value actions can be added to a sub-hierarchy processor, and paths are 230 * matched in the order the actions are added. It is also possible to mix sub-hierarchy 231 * actions and value actions on the same processor, but note that sub-hierarchy processors 232 * will take precedence, so you cannot try to match the same value in both a sub-hierarchy 233 * processor and a value action. 234 * 235 * For example: 236 * <pre>{@code 237 * processor 238 * .addAction("//parent/path", ...) 239 * .addValueAction("value/suffix", ...); 240 * // This will never match any values since the sub-hierarchy processor takes precedence! 241 * processor.addValueAction("//parent/path/value/suffix", ...); 242 * }</pre> 243 * 244 * @param pattern the CLDR path suffix idenifying the values to be processed. 245 * @param doFn the action to be carried out for each value. 246 */ addValueAction(String pattern, BiConsumer<T, CldrValue> doFn)247 public void addValueAction(String pattern, BiConsumer<T, CldrValue> doFn) { 248 valueActions.add(new ValueAction<>(getMatcher(pattern), doFn)); 249 } 250 getMatcher(String pattern)251 abstract PathMatcher getMatcher(String pattern); 252 } 253 254 /** 255 * A root builder of a CLDR data processor. 256 * 257 * @param <T> the processor state type. 258 */ 259 public static final class Builder<T> extends SubProcessor<T> { Builder()260 private Builder() { } 261 262 /** Returns the immutable CLDR data processor. */ build()263 public CldrDataProcessor<T> build() { 264 return new CldrDataProcessor<>( 265 Lists.transform(prefixActions, PrefixBuilder::build), valueActions); 266 } 267 268 @Override getMatcher(String pattern)269 PathMatcher getMatcher(String pattern) { 270 return PathMatcher.of(pattern); 271 } 272 } 273 274 /** 275 * A sub-hierarchy data processor rooted at some specified path prefix. 276 * 277 * @param <S> the subtype processor state. 278 * @param <T> the parent processor state. 279 */ 280 private static class PrefixBuilder<S, T> extends SubProcessor<S> { 281 private final PathMatcher matcher; 282 private final BiFunction<T, CldrPath, S> newStateFn; 283 private final BiConsumer<T, ? super S> doneFn; 284 PrefixBuilder( PathMatcher matcher, BiFunction<T, CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)285 PrefixBuilder( 286 PathMatcher matcher, 287 BiFunction<T, CldrPath, S> newStateFn, 288 BiConsumer<T, ? super S> doneFn) { 289 this.matcher = checkNotNull(matcher); 290 this.newStateFn = checkNotNull(newStateFn); 291 this.doneFn = checkNotNull(doneFn); 292 } 293 build()294 PrefixAction<S, T> build() { 295 List<PrefixAction<?, S>> actions = Lists.transform(prefixActions, PrefixBuilder::build); 296 return new PrefixAction<>(actions, valueActions, matcher, newStateFn, doneFn); 297 } 298 getMatcher(String pattern)299 @Override PathMatcher getMatcher(String pattern) { 300 return matcher.withSuffix(pattern); 301 } 302 } 303 304 private final ImmutableList<PrefixAction<?, T>> prefixActions; 305 private final ImmutableList<ValueAction<T>> valueActions; 306 CldrDataProcessor( List<PrefixAction<?, T>> prefixActions, List<ValueAction<T>> valueActions)307 private CldrDataProcessor( 308 List<PrefixAction<?, T>> prefixActions, 309 List<ValueAction<T>> valueActions) { 310 this.prefixActions = ImmutableList.copyOf(prefixActions); 311 this.valueActions = ImmutableList.copyOf(valueActions); 312 } 313 314 /** 315 * Processes a CLDR data instance according to the actions registered for this processor in DTD 316 * order. This method is preferred over {@link #process(CldrData, Object, PathOrder)} and 317 * eventually the ability to even specify a path order for processing will be removed. 318 * 319 * <p>This is the main method used to drive the processing of some CLDR data and is typically 320 * used like: 321 * 322 * <pre>{@code 323 * MyResult result = CLDR_PROCESSOR.process(data, new MyResult(), DTD); 324 * }</pre> 325 * <p>or:* 326 * <pre>{@code 327 * MyResult result = CLDR_PROCESSOR.process(data, MyResult.newBuilder(), DTD).build(); 328 * }</pre> 329 * 330 * @param data the CLDR data to be processed. 331 * @param state an instance of the "primary" state. 332 * @return the given primary state (after modification). 333 */ process(CldrData data, T state)334 public T process(CldrData data, T state) { 335 return process(data, state, PathOrder.DTD); 336 } 337 338 /** 339 * Processes a CLDR data instance according to the actions registered for this processor. 340 * Callers should prefer using {@link #process(CldrData, Object)} whenever possible and avoid 341 * relying on path ordering for processing. 342 * 343 * @param data the CLDR data to be processed. 344 * @param state an instance of the "primary" state. 345 * @param pathOrder the order in which CLDR paths should be visited. 346 * @return the given primary state (after modification). 347 */ process(CldrData data, T state, PathOrder pathOrder)348 public T process(CldrData data, T state, PathOrder pathOrder) { 349 data.accept(pathOrder, new DispatchingVisitor<>(this, state, s -> {})); 350 return state; 351 } 352 dispatchPrefixActions(T state, CldrPath prefix, Context context)353 private void dispatchPrefixActions(T state, CldrPath prefix, Context context) { 354 for (PrefixAction<?, T> a : prefixActions) { 355 if (a.matches(state, prefix, context)) { 356 break; 357 } 358 } 359 } 360 dispatchValueActions(T state, CldrValue value)361 private void dispatchValueActions(T state, CldrValue value) { 362 for (ValueAction<T> a : valueActions) { 363 if (a.matches(state, value)) { 364 break; 365 } 366 } 367 } 368 369 /* 370 * Implementation notes: 371 * 372 * "PrefixAction" is a critical part of the design of the path visitor. It acts as a bridge 373 * between the parent visitation (with state type 'T') and child visitation (state type 'S'). 374 * 375 * It is the only class to need to know about both types. Both types are known when the 376 * CldrDataProcessor is made, but during visitation the caller of the "matches" method doesn't 377 * need to know about the child type, which is why the parent can just have a list of 378 * "PrefixAction<?, T>" and don't need any magical recasting. 379 * 380 * It might only be a few lines of code, but it can only exist in a class which knows about 381 * both parent and child types (obtaining a new child state is a function of the parent state). 382 */ 383 static final class PrefixAction<S, T> extends CldrDataProcessor<S> { 384 private final PathMatcher matcher; 385 private final BiFunction<T, CldrPath, S> newStateFn; 386 private final BiConsumer<T, ? super S> doneFn; 387 PrefixAction( List<PrefixAction<?, S>> prefixActions, List<ValueAction<S>> valueActions, PathMatcher matcher, BiFunction<T, CldrPath, S> newStateFn, BiConsumer<T, ? super S> doneFn)388 PrefixAction( 389 List<PrefixAction<?, S>> prefixActions, 390 List<ValueAction<S>> valueActions, 391 PathMatcher matcher, 392 BiFunction<T, CldrPath, S> newStateFn, 393 BiConsumer<T, ? super S> doneFn) { 394 super(prefixActions, valueActions); 395 this.matcher = checkNotNull(matcher); 396 this.newStateFn = checkNotNull(newStateFn); 397 this.doneFn = checkNotNull(doneFn); 398 } 399 matches(T state, CldrPath prefix, Context context)400 public boolean matches(T state, CldrPath prefix, Context context) { 401 if (matcher.locallyMatches(prefix)) { 402 Consumer<S> doneFn = childState -> this.doneFn.accept(state, childState); 403 context.install( 404 new DispatchingVisitor<>(this, newStateFn.apply(state, prefix), doneFn), 405 DispatchingVisitor::done); 406 return true; 407 } 408 return false; 409 } 410 } 411 412 private static final class ValueAction<T> { 413 private final PathMatcher matcher; 414 private BiConsumer<T, CldrValue> doFn; 415 ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn)416 ValueAction(PathMatcher matcher, BiConsumer<T, CldrValue> doFn) { 417 this.matcher = checkNotNull(matcher); 418 this.doFn = checkNotNull(doFn); 419 } 420 matches(T state, CldrValue value)421 boolean matches(T state, CldrValue value) { 422 if (matcher.locallyMatches(value.getPath())) { 423 doFn.accept(state, value); 424 return true; 425 } 426 return false; 427 } 428 } 429 430 private static final class DispatchingVisitor<T> implements PrefixVisitor { 431 CldrDataProcessor<T> processor; 432 private final T state; 433 private final Consumer<T> doneFn; 434 DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn)435 DispatchingVisitor(CldrDataProcessor<T> processor, T state, Consumer<T> doneFn) { 436 this.processor = checkNotNull(processor); 437 this.state = checkNotNull(state); 438 this.doneFn = checkNotNull(doneFn); 439 } 440 441 @Override visitPrefixStart(CldrPath prefix, Context context)442 public void visitPrefixStart(CldrPath prefix, Context context) { 443 processor.dispatchPrefixActions(state, prefix, context); 444 } 445 446 @Override visitValue(CldrValue value)447 public void visitValue(CldrValue value) { 448 processor.dispatchValueActions(state, value); 449 } 450 451 // Important: This is NOT visitPrefixEnd() since that happens multiple times and isn't 452 // going to be called for the prefix at which this visitor was started. done()453 void done() { 454 doneFn.accept(state); 455 } 456 } 457 } 458