• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static java.lang.Integer.parseInt;
7 
8 import java.time.LocalDate;
9 import java.time.LocalDateTime;
10 import java.time.ZoneId;
11 import java.util.function.Function;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 
15 import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
16 
17 import com.google.common.base.Ascii;
18 import com.google.common.base.CharMatcher;
19 import com.google.common.collect.ImmutableMap;
20 
21 /**
22  * The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}.
23  */
24 final class IcuFunctions {
25     /**
26      * Converts an ISO date string to a space-separated pair of integer values representing the top
27      * and bottom parts of a deconstructed millisecond epoch value (i.e. {@code
28      * "<hi32bits> <low32bits>"}).
29      *
30      * <p>Note that the values are formatted as <em>signed</em> decimal values, so it's entirely
31      * possible that the low bits value will be appear as a negative number (the high bits won't
32      * appear negative for many thousands of years).
33      *
34      * <ul>
35      *   <li>args[0] = ISO date string (e.g. "2019-05-23")
36      *   <li>args[1] = Date field type name (e.g. "from")
37      *   <li>args[2] = Timezone for ISO date string, as CLDR canonical “long” time zone ID; Etc/UTC for most
38      * </ul>
39      */
40     static final NamedFunction DATE_FN =
41         NamedFunction.create("date", 3, args -> {
42             long millis =
43                 DateFieldType.toEnum(args.get(1)).toEpochMillis(LocalDate.parse(args.get(0)), args.get(2));
44             // Strictly speaking the masking is redundant and could be removed.
45             int hiBits = (int) ((millis >>> 32) & 0xFFFFFFFFL);
46             int loBits = (int) (millis & 0xFFFFFFFFL);
47             return hiBits + " " + loBits;
48         });
49 
50     // TODO: Improve this documentation (e.g. why is this being done, give examples?).
51     /**
52      * Inserts '%' into numberingSystems descriptions.
53      *
54      * <ul>
55      *   <li>args[0] = numbering system description (string)
56      * </ul>
57      */
58     static final NamedFunction ALGORITHM_FN =
59         NamedFunction.create("algorithm", 1, args -> {
60             String value = args.get(0);
61             int percentPos = value.lastIndexOf('/') + 1;
62             return value.substring(0, percentPos) + '%' + value.substring(percentPos);
63         });
64 
65     /**
66      * Converts a number into a special integer that represents the number in normalized scientific
67      * notation for ICU's RB parser.
68      *
69      * <p>Resultant integers are in the form "xxyyyyyy", where "xx" is the exponent offset by 50
70      * and "yyyyyy" is the coefficient to 5 decimal places. Results may also have a leading '-' to
71      * denote negative values.
72      *
73      * <p>For example:
74      * <pre>{@code
75      * 14660000000000 -> 1.466E13    -> 63146600
76      * 0.0001         -> 1E-4        -> 46100000
77      * -123.456       -> -1.23456E-2 -> -48123456
78      * }</pre>
79      *
80      * <p>The additional exponent offset is applied directly to the calculated exponent and is used
81      * to do things like converting percentages into their decimal representation (i.e. by passing
82      * a value of "-2").
83      *
84      * <ul>
85      *   <li>args[0] = number to be converted (double)
86      *   <li>args[1] = additional exponent offset (integer)
87      * </ul>
88      */
89     static final NamedFunction EXP_FN =
90         NamedFunction.create("exp", 2, args -> {
91             double value = Double.parseDouble(args.get(0));
92             if (value == 0) {
93                 return "0";
94             }
95             int exponent = 50;
96             if (args.size() == 2) {
97                 exponent += Integer.parseInt(args.get(1));
98             }
99             String sign = value >= 0 ? "" : "-";
100             value = Math.abs(value);
101             while (value >= 10) {
102                 value /= 10;
103                 exponent++;
104             }
105             while (value < 1) {
106                 value *= 10;
107                 exponent--;
108             }
109             if (exponent < 0 || exponent > 99) {
110                 throw new IllegalArgumentException("Exponent out of bounds: " + exponent);
111             }
112             return sign + exponent + Math.round(value * 100000);
113         });
114 
115     // Allow for single digit values in any part and negative year values.
116     private static final Pattern YMD = Pattern.compile("(-?[0-9]+)-([0-9]{1,2})-([0-9]{1,2})");
117 
118     /**
119      * Converts an ISO date string (i.e. "YYYY-MM-DD") into an ICU date string, which is
120      * the same but with spaces instead of hyphens. Since functions are expanded before the
121      * resulting value is split, this function will result in 3 separate values being created,
122      * unless the function call is enclosed in quotes.
123      *
124      * <p>Note that for some cases (e.g. "eras") the year part can be negative (e.g. "-2165-1-1")
125      * so this is not as simple as "split by hyphen".
126      *
127      * <ul>
128      *   <li>args[0] = ISO date string (e.g. "2019-05-23" or "-2165-1-1")
129      * </ul>
130      */
131     static final NamedFunction YMD_FN =
132         NamedFunction.create("ymd", 1, args -> {
133             Matcher m = YMD.matcher(args.get(0));
134             checkArgument(m.matches(), "invalid year-month-day string: %s", args.get(0));
135             // NOTE: Re-parsing is not optional since it removes leading zeros (needed for ICU).
136             return String.format("%s %s %s",
137                 parseInt(m.group(1)), parseInt(m.group(2)), parseInt(m.group(3)));
138         });
139 
140     // For transforming day-of-week identifiers.
141     private static final ImmutableMap<String, String> WEEKDAY_MAP_ID =
142         ImmutableMap.<String, String>builder()
143             .put("sun", "1")
144             .put("mon", "2")
145             .put("tues", "3")
146             .put("wed", "4")
147             .put("thu", "5")
148             .put("fri", "6")
149             .put("sat", "7")
150             .build();
151 
152     /**
153      * Converts a day-of-week identifier into its ordinal value (e.g. "sun" --> 1, "mon" --> 2 ...).
154      */
155     static final NamedFunction DAY_NUMBER_FN =
156         NamedFunction.create("day_number", 1,
157             args -> {
158                 String id = WEEKDAY_MAP_ID.get(args.get(0));
159                 checkArgument(id != null, "unknown weekday: %s", args.get(0));
160                 return id;
161             });
162 
163     // For transform IDs in <contextTransform> elements.
164     private static final ImmutableMap<String, String> TRANSFORM_ID_MAP =
165         ImmutableMap.of("no-change", "0", "titlecase-firstword", "1");
166 
167     /**
168      * Converts the transform type in the {@code <contextTransform>} element into its ICU index
169      * (e.g. "titlecase-firstword" --> 1).
170      */
171     static final NamedFunction CONTEXT_TRANSFORM_INDEX_FN =
172         NamedFunction.create("context_transform_index", 1,
173             args -> {
174                 String id = TRANSFORM_ID_MAP.get(args.get(0));
175                 checkArgument(id != null, "unknown contextTransform: %s", args.get(0));
176                 return id;
177             });
178 
179     // For DATE_FN only.
180     private enum DateFieldType {
181         from(LocalDate::atStartOfDay),
182         // Remember that atTime() takes nanoseconds, not micro or milli.
183         to(d -> d.atTime(23, 59, 59, 999_000_000));
184 
185         private final Function<LocalDate, LocalDateTime> adjustFn;
186 
DateFieldType(Function<LocalDate, LocalDateTime> adjustFn)187         DateFieldType(Function<LocalDate, LocalDateTime> adjustFn) {
188             this.adjustFn = adjustFn;
189         }
190 
toEpochMillis(LocalDate date, String tzid)191         long toEpochMillis(LocalDate date, String tzid) {
192             // Need to check whether Java ZoneId handles all CLDR canonical “long” time zone IDs
193             return adjustFn.apply(date).atZone(ZoneId.of(tzid)).toInstant().toEpochMilli();
194         }
195 
toEnum(String value)196         static DateFieldType toEnum(String value) {
197             switch (Ascii.toLowerCase(CharMatcher.whitespace().trimFrom(value))) {
198             case "from":
199             case "start":
200                 return from;
201             case "to":
202             case "end":
203                 return to;
204             default:
205                 throw new IllegalArgumentException(value + " is not a valid date field type");
206             }
207         }
208     }
209 
IcuFunctions()210     private IcuFunctions() {}
211 }
212