1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.ibm.icu.util.Output; 6 import java.util.ArrayList; 7 import java.util.Iterator; 8 import java.util.List; 9 import java.util.function.Function; 10 import org.unicode.cldr.util.SupplementalDataInfo.UnitIdComponentType; 11 import org.unicode.cldr.util.With.SimpleIterator; 12 13 public class UnitParser implements SimpleIterator<String> { 14 public static final Splitter DASH_SPLITTER = Splitter.on('-'); 15 public static final Joiner DASH_JOIN = Joiner.on('-'); 16 17 private String bufferedItem = null; 18 private UnitIdComponentType bufferedType = null; 19 private Iterator<String> source; 20 private UnitIdComponentType type; 21 Function<String, UnitIdComponentType> componentTypeSupplier; 22 private String original; 23 24 // Provide this api for use inside of SupplementalDataInfo, to avoid circularity UnitParser(Function<String, UnitIdComponentType> componentTypeSupplier)25 public UnitParser(Function<String, UnitIdComponentType> componentTypeSupplier) { 26 this.componentTypeSupplier = componentTypeSupplier; 27 } 28 UnitParser()29 public UnitParser() { 30 this(CLDRConfig.getInstance().getSupplementalDataInfo()::getUnitIdComponentType); 31 } 32 33 // public UnitParser set(Iterator<String> source) { 34 // bufferedItem = null; 35 // this.source = source; 36 // return this; 37 // } 38 // 39 // public UnitParser set(Iterable<String> source) { 40 // return set(source.iterator()); 41 // } 42 // set(String source)43 public UnitParser set(String source) { 44 if (source == null) { 45 throw new IllegalArgumentException("Unit Parser doesn't handle null"); 46 } 47 bufferedItem = null; 48 this.original = source; 49 this.source = UnitParser.DASH_SPLITTER.split(source).iterator(); 50 return this; 51 } 52 53 private enum State { 54 start, 55 havePrefix, 56 haveBaseOrSuffix 57 } 58 getRemaining()59 public List<Pair<UnitIdComponentType, String>> getRemaining() { 60 List<Pair<UnitIdComponentType, String>> result = new ArrayList<>(); 61 Output<UnitIdComponentType> type = new Output<>(); 62 while (true) { 63 String item = nextParse(type); 64 if (item == null) { 65 return result; 66 } 67 result.add(Pair.of(type.value, item)); 68 } 69 } 70 71 /** 72 * Parses the next segment in the source from set. 73 * 74 * @param output returns type type of the item 75 * @return a unit segment of the form: prefix* base suffix*, and, per, or power; or null if no 76 * more remaining 77 */ nextParse(Output<UnitIdComponentType> unitIdComponentType)78 public String nextParse(Output<UnitIdComponentType> unitIdComponentType) { 79 String result = next(); 80 unitIdComponentType.value = type; 81 return result; 82 } 83 84 /** 85 * Return the last UnitIdComponentType from a next() call. 86 * 87 * @return 88 */ getLastUnitIdComponentType()89 public UnitIdComponentType getLastUnitIdComponentType() { 90 return type; 91 } 92 93 /** 94 * Parses the next segment in the source from set. The UnitIdComponentType can be retrieved 95 * after calling, from getLastUnitIdComponentType() 96 * 97 * @return a unit segment of the form: prefix* base suffix*, and, per, or power; or null if no 98 * more remaining 99 */ 100 @Override next()101 public String next() { 102 String output = null; 103 State state = State.start; 104 UnitIdComponentType outputType = null; 105 106 while (true) { 107 if (bufferedItem == null) { 108 if (!source.hasNext()) { 109 break; 110 } 111 bufferedItem = source.next(); 112 bufferedType = componentTypeSupplier.apply(bufferedItem); 113 } 114 switch (bufferedType) { 115 case prefix: 116 switch (state) { 117 case start: 118 state = State.havePrefix; 119 break; 120 case havePrefix: // ok, continue 121 break; 122 case haveBaseOrSuffix: 123 type = 124 outputType == UnitIdComponentType.suffix 125 ? UnitIdComponentType.base 126 : outputType; 127 return output; 128 } 129 break; 130 case base: 131 switch (state) { 132 case start: 133 case havePrefix: 134 state = State.haveBaseOrSuffix; 135 break; 136 case haveBaseOrSuffix: // have stuff to return 137 type = 138 outputType == UnitIdComponentType.suffix 139 ? UnitIdComponentType.base 140 : outputType; 141 return output; 142 } 143 break; 144 case suffix: 145 switch (state) { 146 case start: 147 case havePrefix: 148 throw new IllegalArgumentException( 149 "Unit suffix must follow base: " 150 + original 151 + " → " 152 + output 153 + " ❌ " 154 + bufferedItem); 155 case haveBaseOrSuffix: // ok, continue 156 break; 157 } 158 break; 159 case and: 160 case per: 161 case power: 162 switch (state) { 163 case start: // return this item 164 output = bufferedItem; 165 bufferedItem = null; 166 type = bufferedType; 167 return output; 168 case havePrefix: 169 throw new IllegalArgumentException( 170 "Unit prefix must be followed with base: " 171 + original 172 + " → " 173 + output 174 + " ❌ " 175 + bufferedItem); 176 case haveBaseOrSuffix: // have stuff to return 177 type = 178 outputType == UnitIdComponentType.suffix 179 ? UnitIdComponentType.base 180 : outputType; 181 return output; 182 } 183 break; 184 } 185 output = output == null ? bufferedItem : output + "-" + bufferedItem; 186 bufferedItem = null; 187 outputType = bufferedType; 188 } 189 switch (state) { 190 default: 191 case start: 192 return null; 193 case havePrefix: 194 throw new IllegalArgumentException( 195 "Unit prefix must be followed with base: " 196 + original 197 + " → " 198 + output 199 + " ❌ " 200 + bufferedItem); 201 case haveBaseOrSuffix: // have stuff to return 202 type = 203 outputType == UnitIdComponentType.suffix 204 ? UnitIdComponentType.base 205 : outputType; 206 return output; 207 } 208 } 209 210 // TODO create from custom map getUnitIdComponentType(String part)211 public UnitIdComponentType getUnitIdComponentType(String part) { 212 return componentTypeSupplier.apply(part); 213 } 214 } 215