• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static java.lang.Integer.parseInt;
7 
8 import java.time.LocalDate;
9 import java.time.LocalDateTime;
10 import java.time.ZoneOffset;
11 import java.util.function.Function;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 
15 import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
16 
17 import com.google.common.base.Ascii;
18 import com.google.common.base.CharMatcher;
19 import com.google.common.collect.ImmutableMap;
20 
21 /**
22  * The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}.
23  */
24 final class IcuFunctions {
25     /**
26      * Converts an ISO date string to a space-separated pair of integer values representing the top
27      * and bottom parts of a deconstructed millisecond epoch value (i.e. {@code
28      * "<hi32bits> <low32bits>"}).
29      *
30      * <p>Note that the values are formatted as <em>signed</em> decimal values, so it's entirely
31      * possible that the low bits value will be appear as a negative number (the high bits won't
32      * appear negative for many thousands of years).
33      *
34      * <ul>
35      *   <li>args[0] = ISO date string (e.g. "2019-05-23")
36      *   <li>args[1] = Date field type name (e.g. "from")
37      * </ul>
38      */
39     static final NamedFunction DATE_FN =
40         NamedFunction.create("date", 2, args -> {
41             long millis =
42                 DateFieldType.toEnum(args.get(1)).toEpochMillis(LocalDate.parse(args.get(0)));
43             // Strictly speaking the masking is redundant and could be removed.
44             int hiBits = (int) ((millis >>> 32) & 0xFFFFFFFFL);
45             int loBits = (int) (millis & 0xFFFFFFFFL);
46             return hiBits + " " + loBits;
47         });
48 
49     // TODO: Improve this documentation (e.g. why is this being done, give examples?).
50     /**
51      * Inserts '%' into numberingSystems descriptions.
52      *
53      * <ul>
54      *   <li>args[0] = numbering system description (string)
55      * </ul>
56      */
57     static final NamedFunction ALGORITHM_FN =
58         NamedFunction.create("algorithm", 1, args -> {
59             String value = args.get(0);
60             int percentPos = value.lastIndexOf('/') + 1;
61             return value.substring(0, percentPos) + '%' + value.substring(percentPos);
62         });
63 
64     /**
65      * Converts a number into a special integer that represents the number in normalized scientific
66      * notation for ICU's RB parser.
67      *
68      * <p>Resultant integers are in the form "xxyyyyyy", where "xx" is the exponent offset by 50
69      * and "yyyyyy" is the coefficient to 5 decimal places. Results may also have a leading '-' to
70      * denote negative values.
71      *
72      * <p>For example:
73      * <pre>{@code
74      * 14660000000000 -> 1.466E13    -> 63146600
75      * 0.0001         -> 1E-4        -> 46100000
76      * -123.456       -> -1.23456E-2 -> -48123456
77      * }</pre>
78      *
79      * <p>The additional exponent offset is applied directly to the calculated exponent and is used
80      * to do things like converting percentages into their decimal representation (i.e. by passing
81      * a value of "-2").
82      *
83      * <ul>
84      *   <li>args[0] = number to be converted (double)
85      *   <li>args[1] = additional exponent offset (integer)
86      * </ul>
87      */
88     static final NamedFunction EXP_FN =
89         NamedFunction.create("exp", 2, args -> {
90             double value = Double.parseDouble(args.get(0));
91             if (value == 0) {
92                 return "0";
93             }
94             int exponent = 50;
95             if (args.size() == 2) {
96                 exponent += Integer.parseInt(args.get(1));
97             }
98             String sign = value >= 0 ? "" : "-";
99             value = Math.abs(value);
100             while (value >= 10) {
101                 value /= 10;
102                 exponent++;
103             }
104             while (value < 1) {
105                 value *= 10;
106                 exponent--;
107             }
108             if (exponent < 0 || exponent > 99) {
109                 throw new IllegalArgumentException("Exponent out of bounds: " + exponent);
110             }
111             return sign + exponent + Math.round(value * 100000);
112         });
113 
114     // Allow for single digit values in any part and negative year values.
115     private static final Pattern YMD = Pattern.compile("(-?[0-9]+)-([0-9]{1,2})-([0-9]{1,2})");
116 
117     /**
118      * Converts an ISO date string (i.e. "YYYY-MM-DD") into an ICU date string, which is
119      * the same but with spaces instead of hyphens. Since functions are expanded before the
120      * resulting value is split, this function will result in 3 separate values being created,
121      * unless the function call is enclosed in quotes.
122      *
123      * <p>Note that for some cases (e.g. "eras") the year part can be negative (e.g. "-2165-1-1")
124      * so this is not as simple as "split by hyphen".
125      *
126      * <ul>
127      *   <li>args[0] = ISO date string (e.g. "2019-05-23" or "-2165-1-1")
128      * </ul>
129      */
130     static final NamedFunction YMD_FN =
131         NamedFunction.create("ymd", 1, args -> {
132             Matcher m = YMD.matcher(args.get(0));
133             checkArgument(m.matches(), "invalid year-month-day string: %s", args.get(0));
134             // NOTE: Re-parsing is not optional since it removes leading zeros (needed for ICU).
135             return String.format("%s %s %s",
136                 parseInt(m.group(1)), parseInt(m.group(2)), parseInt(m.group(3)));
137         });
138 
139     // For transforming day-of-week identifiers.
140     private static final ImmutableMap<String, String> WEEKDAY_MAP_ID =
141         ImmutableMap.<String, String>builder()
142             .put("sun", "1")
143             .put("mon", "2")
144             .put("tues", "3")
145             .put("wed", "4")
146             .put("thu", "5")
147             .put("fri", "6")
148             .put("sat", "7")
149             .build();
150 
151     /**
152      * Converts a day-of-week identifier into its ordinal value (e.g. "sun" --> 1, "mon" --> 2 ...).
153      */
154     static final NamedFunction DAY_NUMBER_FN =
155         NamedFunction.create("day_number", 1,
156             args -> {
157                 String id = WEEKDAY_MAP_ID.get(args.get(0));
158                 checkArgument(id != null, "unknown weekday: %s", args.get(0));
159                 return id;
160             });
161 
162     // For transform IDs in <contextTransform> elements.
163     private static final ImmutableMap<String, String> TRANSFORM_ID_MAP =
164         ImmutableMap.of("no-change", "0", "titlecase-firstword", "1");
165 
166     /**
167      * Converts the transform type in the {@code <contextTransform>} element into its ICU index
168      * (e.g. "titlecase-firstword" --> 1).
169      */
170     static final NamedFunction CONTEXT_TRANSFORM_INDEX_FN =
171         NamedFunction.create("context_transform_index", 1,
172             args -> {
173                 String id = TRANSFORM_ID_MAP.get(args.get(0));
174                 checkArgument(id != null, "unknown contextTransform: %s", args.get(0));
175                 return id;
176             });
177 
178     // For DATE_FN only.
179     private enum DateFieldType {
180         from(LocalDate::atStartOfDay),
181         // Remember that atTime() takes nanoseconds, not micro or milli.
182         to(d -> d.atTime(23, 59, 59, 999_000_000));
183 
184         private final Function<LocalDate, LocalDateTime> adjustFn;
185 
DateFieldType(Function<LocalDate, LocalDateTime> adjustFn)186         DateFieldType(Function<LocalDate, LocalDateTime> adjustFn) {
187             this.adjustFn = adjustFn;
188         }
189 
toEpochMillis(LocalDate date)190         long toEpochMillis(LocalDate date) {
191             return adjustFn.apply(date).toInstant(ZoneOffset.UTC).toEpochMilli();
192         }
193 
toEnum(String value)194         static DateFieldType toEnum(String value) {
195             switch (Ascii.toLowerCase(CharMatcher.whitespace().trimFrom(value))) {
196             case "from":
197             case "start":
198                 return from;
199             case "to":
200             case "end":
201                 return to;
202             default:
203                 throw new IllegalArgumentException(value + " is not a valid date field type");
204             }
205         }
206     }
207 
IcuFunctions()208     private IcuFunctions() {}
209 }
210