1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static java.lang.Integer.parseInt; 7 8 import java.time.LocalDate; 9 import java.time.LocalDateTime; 10 import java.time.ZoneOffset; 11 import java.util.function.Function; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 14 15 import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction; 16 17 import com.google.common.base.Ascii; 18 import com.google.common.base.CharMatcher; 19 import com.google.common.collect.ImmutableMap; 20 21 /** 22 * The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}. 23 */ 24 final class IcuFunctions { 25 /** 26 * Converts an ISO date string to a space-separated pair of integer values representing the top 27 * and bottom parts of a deconstructed millisecond epoch value (i.e. {@code 28 * "<hi32bits> <low32bits>"}). 29 * 30 * <p>Note that the values are formatted as <em>signed</em> decimal values, so it's entirely 31 * possible that the low bits value will be appear as a negative number (the high bits won't 32 * appear negative for many thousands of years). 33 * 34 * <ul> 35 * <li>args[0] = ISO date string (e.g. "2019-05-23") 36 * <li>args[1] = Date field type name (e.g. "from") 37 * </ul> 38 */ 39 static final NamedFunction DATE_FN = 40 NamedFunction.create("date", 2, args -> { 41 long millis = 42 DateFieldType.toEnum(args.get(1)).toEpochMillis(LocalDate.parse(args.get(0))); 43 // Strictly speaking the masking is redundant and could be removed. 44 int hiBits = (int) ((millis >>> 32) & 0xFFFFFFFFL); 45 int loBits = (int) (millis & 0xFFFFFFFFL); 46 return hiBits + " " + loBits; 47 }); 48 49 // TODO: Improve this documentation (e.g. why is this being done, give examples?). 50 /** 51 * Inserts '%' into numberingSystems descriptions. 52 * 53 * <ul> 54 * <li>args[0] = numbering system description (string) 55 * </ul> 56 */ 57 static final NamedFunction ALGORITHM_FN = 58 NamedFunction.create("algorithm", 1, args -> { 59 String value = args.get(0); 60 int percentPos = value.lastIndexOf('/') + 1; 61 return value.substring(0, percentPos) + '%' + value.substring(percentPos); 62 }); 63 64 /** 65 * Converts a number into a special integer that represents the number in normalized scientific 66 * notation for ICU's RB parser. 67 * 68 * <p>Resultant integers are in the form "xxyyyyyy", where "xx" is the exponent offset by 50 69 * and "yyyyyy" is the coefficient to 5 decimal places. Results may also have a leading '-' to 70 * denote negative values. 71 * 72 * <p>For example: 73 * <pre>{@code 74 * 14660000000000 -> 1.466E13 -> 63146600 75 * 0.0001 -> 1E-4 -> 46100000 76 * -123.456 -> -1.23456E-2 -> -48123456 77 * }</pre> 78 * 79 * <p>The additional exponent offset is applied directly to the calculated exponent and is used 80 * to do things like converting percentages into their decimal representation (i.e. by passing 81 * a value of "-2"). 82 * 83 * <ul> 84 * <li>args[0] = number to be converted (double) 85 * <li>args[1] = additional exponent offset (integer) 86 * </ul> 87 */ 88 static final NamedFunction EXP_FN = 89 NamedFunction.create("exp", 2, args -> { 90 double value = Double.parseDouble(args.get(0)); 91 if (value == 0) { 92 return "0"; 93 } 94 int exponent = 50; 95 if (args.size() == 2) { 96 exponent += Integer.parseInt(args.get(1)); 97 } 98 String sign = value >= 0 ? "" : "-"; 99 value = Math.abs(value); 100 while (value >= 10) { 101 value /= 10; 102 exponent++; 103 } 104 while (value < 1) { 105 value *= 10; 106 exponent--; 107 } 108 if (exponent < 0 || exponent > 99) { 109 throw new IllegalArgumentException("Exponent out of bounds: " + exponent); 110 } 111 return sign + exponent + Math.round(value * 100000); 112 }); 113 114 // Allow for single digit values in any part and negative year values. 115 private static final Pattern YMD = Pattern.compile("(-?[0-9]+)-([0-9]{1,2})-([0-9]{1,2})"); 116 117 /** 118 * Converts an ISO date string (i.e. "YYYY-MM-DD") into an ICU date string, which is 119 * the same but with spaces instead of hyphens. Since functions are expanded before the 120 * resulting value is split, this function will result in 3 separate values being created, 121 * unless the function call is enclosed in quotes. 122 * 123 * <p>Note that for some cases (e.g. "eras") the year part can be negative (e.g. "-2165-1-1") 124 * so this is not as simple as "split by hyphen". 125 * 126 * <ul> 127 * <li>args[0] = ISO date string (e.g. "2019-05-23" or "-2165-1-1") 128 * </ul> 129 */ 130 static final NamedFunction YMD_FN = 131 NamedFunction.create("ymd", 1, args -> { 132 Matcher m = YMD.matcher(args.get(0)); 133 checkArgument(m.matches(), "invalid year-month-day string: %s", args.get(0)); 134 // NOTE: Re-parsing is not optional since it removes leading zeros (needed for ICU). 135 return String.format("%s %s %s", 136 parseInt(m.group(1)), parseInt(m.group(2)), parseInt(m.group(3))); 137 }); 138 139 // For transforming day-of-week identifiers. 140 private static final ImmutableMap<String, String> WEEKDAY_MAP_ID = 141 ImmutableMap.<String, String>builder() 142 .put("sun", "1") 143 .put("mon", "2") 144 .put("tues", "3") 145 .put("wed", "4") 146 .put("thu", "5") 147 .put("fri", "6") 148 .put("sat", "7") 149 .build(); 150 151 /** 152 * Converts a day-of-week identifier into its ordinal value (e.g. "sun" --> 1, "mon" --> 2 ...). 153 */ 154 static final NamedFunction DAY_NUMBER_FN = 155 NamedFunction.create("day_number", 1, 156 args -> { 157 String id = WEEKDAY_MAP_ID.get(args.get(0)); 158 checkArgument(id != null, "unknown weekday: %s", args.get(0)); 159 return id; 160 }); 161 162 // For transform IDs in <contextTransform> elements. 163 private static final ImmutableMap<String, String> TRANSFORM_ID_MAP = 164 ImmutableMap.of("no-change", "0", "titlecase-firstword", "1"); 165 166 /** 167 * Converts the transform type in the {@code <contextTransform>} element into its ICU index 168 * (e.g. "titlecase-firstword" --> 1). 169 */ 170 static final NamedFunction CONTEXT_TRANSFORM_INDEX_FN = 171 NamedFunction.create("context_transform_index", 1, 172 args -> { 173 String id = TRANSFORM_ID_MAP.get(args.get(0)); 174 checkArgument(id != null, "unknown contextTransform: %s", args.get(0)); 175 return id; 176 }); 177 178 // For DATE_FN only. 179 private enum DateFieldType { 180 from(LocalDate::atStartOfDay), 181 // Remember that atTime() takes nanoseconds, not micro or milli. 182 to(d -> d.atTime(23, 59, 59, 999_000_000)); 183 184 private final Function<LocalDate, LocalDateTime> adjustFn; 185 DateFieldType(Function<LocalDate, LocalDateTime> adjustFn)186 DateFieldType(Function<LocalDate, LocalDateTime> adjustFn) { 187 this.adjustFn = adjustFn; 188 } 189 toEpochMillis(LocalDate date)190 long toEpochMillis(LocalDate date) { 191 return adjustFn.apply(date).toInstant(ZoneOffset.UTC).toEpochMilli(); 192 } 193 toEnum(String value)194 static DateFieldType toEnum(String value) { 195 switch (Ascii.toLowerCase(CharMatcher.whitespace().trimFrom(value))) { 196 case "from": 197 case "start": 198 return from; 199 case "to": 200 case "end": 201 return to; 202 default: 203 throw new IllegalArgumentException(value + " is not a valid date field type"); 204 } 205 } 206 } 207 IcuFunctions()208 private IcuFunctions() {} 209 } 210