1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.regex; 4 5 import static com.google.common.base.Preconditions.checkNotNull; 6 import static com.google.common.collect.ImmutableList.toImmutableList; 7 8 import java.util.Optional; 9 import java.util.function.BiFunction; 10 import java.util.function.Function; 11 import java.util.regex.Matcher; 12 import java.util.regex.Pattern; 13 import java.util.stream.Stream; 14 15 import org.unicode.cldr.api.CldrDataType; 16 import org.unicode.cldr.api.CldrPath; 17 import org.unicode.cldr.api.CldrValue; 18 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.DynamicVars; 19 import org.unicode.icu.tool.cldrtoicu.PathValueTransformer.Result; 20 import org.unicode.icu.tool.cldrtoicu.RbPath; 21 22 import com.google.common.collect.ImmutableList; 23 24 /* 25 * Each rule corresponds to a single target xpath specification in the configuration file 26 * (lines starting //) but may have more than one result specification. For example: 27 * 28 * //supplementalData/languageData/language[@type="(%W)"][@scripts="(%W)"][@territories="(%W)"] 29 * ; /languageData/$1/primary/scripts ; values=$2 30 * ; /languageData/$1/primary/territories; values=$3 31 * 32 * is represented by a single rule with two result specifications. 33 */ 34 abstract class Rule { 35 /** Returns a rule for which all '%X' arguments have been resolved (almost all cases). */ staticRule( CldrDataType dtdType, String prefix, Iterable<ResultSpec> specs, String pathRegex, String xpathSpec, int lineNumber)36 static Rule staticRule( 37 CldrDataType dtdType, 38 String prefix, 39 Iterable<ResultSpec> specs, 40 String pathRegex, 41 String xpathSpec, 42 int lineNumber) { 43 44 return new StaticRule(dtdType, prefix, specs, pathRegex, xpathSpec, lineNumber); 45 } 46 47 /** Returns a rule for which some '%X' arguments are unresolved until matching occurs. */ dynamicRule( CldrDataType dtdType, String pathRegex, Iterable<ResultSpec> specs, VarString varString, Function<Character, CldrPath> varFn, String xpathSpec, int lineNumber)48 static Rule dynamicRule( 49 CldrDataType dtdType, 50 String pathRegex, 51 Iterable<ResultSpec> specs, 52 VarString varString, 53 Function<Character, CldrPath> varFn, 54 String xpathSpec, 55 int lineNumber) { 56 57 return new DynamicRule(dtdType, pathRegex, specs, varString, varFn, xpathSpec, lineNumber); 58 } 59 60 // Type of CLDR path which can match this rule. 61 private final CldrDataType dtdType; 62 // The first path element below the root, used to do fast rejection of non-matching paths 63 // and to "bucket" rules by their prefix to speed up matching. 64 private final String pathPrefix; 65 // One or more result specifications to be processed for matching CLDR paths/values. 66 private final ImmutableList<ResultSpec> resultSpecs; 67 // Debug information only to help determine unused rules. 68 private final String xpathSpec; 69 private final int lineNumber; 70 Rule( CldrDataType dtdType, String pathPrefix, Iterable<ResultSpec> resultSpecs, String xpathSpec, int lineNumber)71 private Rule( 72 CldrDataType dtdType, 73 String pathPrefix, 74 Iterable<ResultSpec> resultSpecs, 75 String xpathSpec, 76 int lineNumber) { 77 78 this.dtdType = checkNotNull(dtdType); 79 this.pathPrefix = checkNotNull(pathPrefix); 80 this.resultSpecs = ImmutableList.copyOf(resultSpecs); 81 this.xpathSpec = checkNotNull(xpathSpec); 82 this.lineNumber = lineNumber; 83 } 84 85 /** Returns the CLDR DTD type of the path that the rule can match. */ getDataType()86 final CldrDataType getDataType() { 87 return dtdType; 88 } 89 90 /** Returns the name of the first path element below the path root. */ getPathPrefix()91 final String getPathPrefix() { 92 return pathPrefix; 93 } 94 95 /** Returns the regular expression against which CLDR path strings are matched. */ getPathPattern(DynamicVars varLookupFn)96 abstract Pattern getPathPattern(DynamicVars varLookupFn); 97 98 /** 99 * Attempts to match the incoming xpath and (if successful) use captured arguments to 100 * generate one result for each result specification. 101 */ transform(CldrValue v, String fullXPath, DynamicVars varFn)102 final ImmutableList<Result> transform(CldrValue v, String fullXPath, DynamicVars varFn) { 103 Matcher m = getPathPattern(varFn).matcher(fullXPath); 104 return m.matches() 105 ? resultSpecs.stream() 106 .flatMap(r -> r.transform(v, m, varFn)) 107 .collect(toImmutableList()) 108 : ImmutableList.of(); 109 } 110 111 /** 112 * Returns any fallback functions defined in results specifications. These are used to 113 * determine the set of possible fallback values for a given resource bundle path. 114 */ getFallbackFunctions()115 final Stream<BiFunction<RbPath, DynamicVars, Optional<Result>>> getFallbackFunctions() { 116 return resultSpecs.stream() 117 .map(ResultSpec::getFallbackFunction) 118 .filter(Optional::isPresent) 119 .map(Optional::get); 120 } 121 122 // Debugging only getXpathSpec()123 final String getXpathSpec() { 124 return xpathSpec; 125 } 126 127 // Debugging only getLineNumber()128 final int getLineNumber() { 129 return lineNumber; 130 } 131 132 private static final class StaticRule extends Rule { 133 // The processed xpath specification yielding an xpath matching regular expression. This is 134 // only suitable for matching incoming xpaths and cannot be processed in any other way. 135 private final Pattern xpathPattern; 136 StaticRule( CldrDataType dtdType, String prefix, Iterable<ResultSpec> specs, String pathRegex, String xpathSpec, int lineNumber)137 StaticRule( 138 CldrDataType dtdType, 139 String prefix, 140 Iterable<ResultSpec> specs, 141 String pathRegex, 142 String xpathSpec, 143 int lineNumber) { 144 145 super(dtdType, prefix, specs, xpathSpec, lineNumber); 146 this.xpathPattern = Pattern.compile(pathRegex); 147 } 148 149 @Override getPathPattern(DynamicVars varLookupFn)150 Pattern getPathPattern(DynamicVars varLookupFn) { 151 return xpathPattern; 152 } 153 } 154 155 private static final class DynamicRule extends Rule { 156 // The processed xpath specification yielding an xpath matching regular expression. This is 157 // only suitable for matching incoming xpaths and cannot be processed in any other way. 158 private final VarString varString; 159 private final Function<Character, CldrPath> dynamicVarFn; 160 DynamicRule( CldrDataType dtdType, String prefix, Iterable<ResultSpec> specs, VarString varString, Function<Character, CldrPath> varFn, String xpathSpec, int lineNumber)161 DynamicRule( 162 CldrDataType dtdType, 163 String prefix, 164 Iterable<ResultSpec> specs, 165 VarString varString, 166 Function<Character, CldrPath> varFn, 167 String xpathSpec, 168 int lineNumber) { 169 170 super(dtdType, prefix, specs, xpathSpec, lineNumber); 171 this.varString = checkNotNull(varString); 172 this.dynamicVarFn = checkNotNull(varFn); 173 } 174 getPathPattern(DynamicVars varLookupFn)175 @Override Pattern getPathPattern(DynamicVars varLookupFn) { 176 String pathRegex = varString.apply(dynamicVarFn.andThen(varLookupFn)).get(); 177 return Pattern.compile(pathRegex); 178 } 179 } 180 } 181