• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.LinkedHashSet;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 import java.util.stream.Collectors;
15 import java.util.stream.Stream;
16 
17 import org.unicode.cldr.util.CldrUtility;
18 import org.unicode.cldr.util.Rational;
19 import org.unicode.cldr.util.UnitConverter.ConversionInfo;
20 import org.unicode.cldr.util.UnitConverter.TargetInfo;
21 
22 import com.google.common.base.Joiner;
23 import com.google.common.base.Splitter;
24 import com.google.common.collect.ImmutableList;
25 import com.google.common.collect.ImmutableMap;
26 import com.google.common.collect.ImmutableMultimap;
27 import com.google.common.collect.ImmutableSet;
28 import com.google.common.collect.LinkedHashMultimap;
29 import com.google.common.collect.Multimap;
30 import com.google.common.collect.TreeMultimap;
31 import com.ibm.icu.util.ICUUncheckedIOException;
32 
33 final class NistUnits {
34     private static final boolean DEBUG = false;
35 
36     final static Multimap<String,String> unitToQuantity;
37     final static Map<String, TargetInfo> derivedUnitToConversion;
38     final static List<ExternalUnitConversionData> externalConversionData;
39     final static Multimap<String, String> idChanges;
40     final static Set<String> skipping;
41 
42     static final Splitter SPLIT_MIDDOT = Splitter.on('·').trimResults();
43     static final Pattern flatExponent = Pattern.compile("([a-zA-Z]+)(-?[0-9]+)?");
44     static final Splitter SPLIT_TABS = Splitter.on('\t').trimResults();
45     static final Splitter SPLIT_COMMAS = Splitter.on(',').trimResults();
46     static final Splitter SPLIT_PARENS = Splitter.on('(').trimResults();
47 
48 
49     static {
50         try {
51             Multimap<String, String> _idChanges = LinkedHashMultimap.create();
52             Set<String> _skipping = new LinkedHashSet<>();
53 
54             List<ExternalUnitConversionData> _externalConversionData = new ArrayList<>();
try(BufferedReader in = CldrUtility.getUTF8Data("external/nistConversions.txt"))55             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistConversions.txt")) {
56                 String quantity = null;
57                 try (Stream<String> s = in.lines()) {
58                     for (String line : (Iterable<String>) s::iterator) {
59                         if (line.startsWith("#")
60                             || line.equals("To convert from\tto\tMultiply by")
61                             || line.startsWith("degree Fahrenheit hour square foot per British thermal unitth inch") // bad NIST data
62                             ) {
63                             continue;
64                         }
65                         List<String> parts = SPLIT_TABS.splitToList(line);
66                         switch(parts.size()) {
67                         case 1:
68                             quantity = parts.get(0);
69                             break;
70                         case 4:
71                             Rational factor = Rational.of((parts.get(2) + parts.get(3)).replace(" ", ""));
72                             ExternalUnitConversionData data = new ExternalUnitConversionData(quantity, parts.get(0), parts.get(1), factor, line, _idChanges);
73                             _externalConversionData.add(data);
74                             break;
75                         default:
76                             _skipping.add(line);
77                         }
78                     }
79                 }
80             }
81 
82             Map<String, TargetInfo> unitToTargetInfo = new TreeMap<>();
83             Map<String,String> _symbolToUnit = new TreeMap<>();
84             Multimap<String,String> _unitToQuantity = TreeMultimap.create();
try(BufferedReader in = CldrUtility.getUTF8Data("external/nistBaseUnits.txt"))85             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistBaseUnits.txt")) {
86                 try (Stream<String> s = in.lines()) {
87                     for (String line : (Iterable<String>) s::iterator) {
88                         if (line.startsWith("#")) {
89                             continue;
90                         }
91                         List<String> parts = SPLIT_TABS.splitToList(line);
92                         //#Base quantity  Name    Symbol
93                         String quantity2 = parts.get(0);
94                         String name = parts.get(1);
95                         String symbol = parts.get(2);
96                         switch(parts.size()) {
97                         case 3:
98                             _symbolToUnit.put(symbol, name);
99                             _unitToQuantity.put(name, quantity2);
100                             break;
101                         }
102                     }
103                 }
104             }
105 
try(BufferedReader in = CldrUtility.getUTF8Data("external/nistDerivedUnits.txt"))106             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistDerivedUnits.txt")) {
107                 try (Stream<String> s = in.lines()) {
108                     for (String line : (Iterable<String>) s::iterator) {
109                         if (line.startsWith("#")) {
110                             continue;
111                         }
112                         List<String> parts = SPLIT_TABS.splitToList(line);
113                         // #Quantity   Special Name    Special symbol  Expression in terms of other SI units   Expression in terms of SI base units
114 
115                         String quantity = parts.get(0);
116                         List<String> quantities = SPLIT_COMMAS.splitToList(quantity).stream()
117                             .map(x ->  SPLIT_PARENS.split(parts.get(0)).iterator().next())
118                             .collect(Collectors.toList());
119                         quantity = Joiner.on(", ").join(quantities);
120 
121                         String name = SPLIT_PARENS.split(parts.get(1)).iterator().next();
122                         if (name.equals("degree Celsius")) {
123                             name = "celsius";
124                         }
125 
126                         String symbol = parts.get(2);
127                         String expressionInOtherSymbols = parts.get(4);
128                         String expressionInBaseSymbols = parts.get(4);
129                         _symbolToUnit.put(symbol, name);
130                         _unitToQuantity.putAll(name, quantities);
131 
132                         final String targetUnit = getUnitFromSymbols(expressionInBaseSymbols, _symbolToUnit);
133                         unitToTargetInfo.put(name, new TargetInfo(targetUnit, new ConversionInfo(Rational.ONE, Rational.ZERO), Collections.emptyMap()));
134 
135                         ExternalUnitConversionData data = new ExternalUnitConversionData(quantity, name, targetUnit, Rational.ONE, line, _idChanges);
136                         _externalConversionData.add(data);
137 
138                     }
139                 }
140             }
141 
142             // Protect everything
143 
144             skipping = ImmutableSet.copyOf(_skipping);
145             idChanges = ImmutableMultimap.copyOf(_idChanges);
146             externalConversionData = ImmutableList.copyOf(_externalConversionData);
147             unitToQuantity = ImmutableMultimap.copyOf(_unitToQuantity);
148             derivedUnitToConversion = ImmutableMap.copyOf(unitToTargetInfo);
149         } catch (IOException e) {
150             throw new ICUUncheckedIOException(e);
151         }
152     }
153 
getUnitFromSymbols(String expressionInBaseSymbols, Map<String, String> symbolToUnit)154     public static String getUnitFromSymbols(String expressionInBaseSymbols, Map<String, String> symbolToUnit) {
155         String result;
156         // handle the irregular formats
157         if (expressionInBaseSymbols.equals("m/m")) {
158             result = "meter-per-meter";
159         } else if (expressionInBaseSymbols.equals("m2/m2")) {
160             result = "square-meter-per-square-meter";
161         } else {
162             // m2 · kg · s-3 · A-1
163             StringBuilder numerator = new StringBuilder();
164             StringBuilder denominator = new StringBuilder();
165             for (String part : SPLIT_MIDDOT.split(expressionInBaseSymbols)) {
166                 final Matcher parts = flatExponent.matcher(part);
167                 if (!parts.matches()) {
168                     throw new IllegalArgumentException("bad symbol: " + part);
169                 }
170                 String unit = symbolToUnit.get(parts.group(1));
171                 String pow = null;
172                 int power = 0;
173                 final String exponent = parts.group(2);
174                 if (exponent != null) {
175                     power = Integer.parseInt(exponent);
176                     switch(Math.abs(power)) {
177                     case 0: case 1: break;// skip
178                     case 2: pow = "square-"; break;
179                     case 3: pow = "cubic-"; break;
180                     default: pow = "pow" + Math.abs(power) + "-"; break;
181                     }
182                 }
183                 StringBuilder target = power >= 0 ? numerator : denominator;
184                 if (target.length() != 0) {
185                     target.append('-');
186                 }
187                 if (pow != null) {
188                     target.append(pow);
189                 }
190                 target.append(unit);
191             }
192             result = (numerator.length() == 0 ? "" : numerator)
193                 + (denominator.length() == 0 ? "" :
194                     (numerator.length() == 0 ? "per-" : "-per-") + denominator);
195         }
196         if (DEBUG) System.out.println(expressionInBaseSymbols + " => " + result);
197         return result;
198     }
199 
200 }