• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 #ifndef __SOURCE_NUMBER_SKELETONS_H__
8 #define __SOURCE_NUMBER_SKELETONS_H__
9 
10 #include "number_types.h"
11 #include "numparse_types.h"
12 #include "unicode/ucharstrie.h"
13 #include "string_segment.h"
14 
15 U_NAMESPACE_BEGIN
16 namespace number {
17 namespace impl {
18 
19 // Forward-declaration
20 struct SeenMacroProps;
21 
22 // namespace for enums and entrypoint functions
23 namespace skeleton {
24 
25 ////////////////////////////////////////////////////////////////////////////////////////
26 // NOTE: For examples of how to add a new stem to the number skeleton parser, see:    //
27 // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //
28 // and                                                                                //
29 // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //
30 ////////////////////////////////////////////////////////////////////////////////////////
31 
32 /**
33  * While parsing a skeleton, this enum records what type of option we expect to find next.
34  */
35 enum ParseState {
36 
37     // Section 0: We expect whitespace or a stem, but not an option:
38 
39     STATE_NULL,
40 
41     // Section 1: We might accept an option, but it is not required:
42 
43     STATE_SCIENTIFIC,
44     STATE_FRACTION_PRECISION,
45     STATE_PRECISION,
46 
47     // Section 2: An option is required:
48 
49     STATE_INCREMENT_PRECISION,
50     STATE_MEASURE_UNIT,
51     STATE_PER_MEASURE_UNIT,
52     STATE_IDENTIFIER_UNIT,
53     STATE_UNIT_USAGE,
54     STATE_CURRENCY_UNIT,
55     STATE_INTEGER_WIDTH,
56     STATE_NUMBERING_SYSTEM,
57     STATE_SCALE,
58 };
59 
60 /**
61  * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
62  * string literal written in upper snake case.
63  *
64  * @see StemToObject
65  * @see #SERIALIZED_STEM_TRIE
66  */
67 enum StemEnum {
68 
69     // Section 1: Stems that do not require an option:
70 
71     STEM_COMPACT_SHORT,
72     STEM_COMPACT_LONG,
73     STEM_SCIENTIFIC,
74     STEM_ENGINEERING,
75     STEM_NOTATION_SIMPLE,
76     STEM_BASE_UNIT,
77     STEM_PERCENT,
78     STEM_PERMILLE,
79     STEM_PERCENT_100, // concise-only
80     STEM_PRECISION_INTEGER,
81     STEM_PRECISION_UNLIMITED,
82     STEM_PRECISION_CURRENCY_STANDARD,
83     STEM_PRECISION_CURRENCY_CASH,
84     STEM_ROUNDING_MODE_CEILING,
85     STEM_ROUNDING_MODE_FLOOR,
86     STEM_ROUNDING_MODE_DOWN,
87     STEM_ROUNDING_MODE_UP,
88     STEM_ROUNDING_MODE_HALF_EVEN,
89     STEM_ROUNDING_MODE_HALF_ODD,
90     STEM_ROUNDING_MODE_HALF_CEILING,
91     STEM_ROUNDING_MODE_HALF_FLOOR,
92     STEM_ROUNDING_MODE_HALF_DOWN,
93     STEM_ROUNDING_MODE_HALF_UP,
94     STEM_ROUNDING_MODE_UNNECESSARY,
95     STEM_INTEGER_WIDTH_TRUNC,
96     STEM_GROUP_OFF,
97     STEM_GROUP_MIN2,
98     STEM_GROUP_AUTO,
99     STEM_GROUP_ON_ALIGNED,
100     STEM_GROUP_THOUSANDS,
101     STEM_LATIN,
102     STEM_UNIT_WIDTH_NARROW,
103     STEM_UNIT_WIDTH_SHORT,
104     STEM_UNIT_WIDTH_FULL_NAME,
105     STEM_UNIT_WIDTH_ISO_CODE,
106     STEM_UNIT_WIDTH_FORMAL,
107     STEM_UNIT_WIDTH_VARIANT,
108     STEM_UNIT_WIDTH_HIDDEN,
109     STEM_SIGN_AUTO,
110     STEM_SIGN_ALWAYS,
111     STEM_SIGN_NEVER,
112     STEM_SIGN_ACCOUNTING,
113     STEM_SIGN_ACCOUNTING_ALWAYS,
114     STEM_SIGN_EXCEPT_ZERO,
115     STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
116     STEM_SIGN_NEGATIVE,
117     STEM_SIGN_ACCOUNTING_NEGATIVE,
118     STEM_DECIMAL_AUTO,
119     STEM_DECIMAL_ALWAYS,
120 
121     // Section 2: Stems that DO require an option:
122 
123     STEM_PRECISION_INCREMENT,
124     STEM_MEASURE_UNIT,
125     STEM_PER_MEASURE_UNIT,
126     STEM_UNIT,
127     STEM_UNIT_USAGE,
128     STEM_CURRENCY,
129     STEM_INTEGER_WIDTH,
130     STEM_NUMBERING_SYSTEM,
131     STEM_SCALE,
132 };
133 
134 /** Default wildcard char, accepted on input and printed in output */
135 constexpr char16_t kWildcardChar = u'*';
136 
137 /** Alternative wildcard char, accept on input but not printed in output */
138 constexpr char16_t kAltWildcardChar = u'+';
139 
140 /** Checks whether the char is a wildcard on input */
isWildcardChar(char16_t c)141 inline bool isWildcardChar(char16_t c) {
142     return c == kWildcardChar || c == kAltWildcardChar;
143 }
144 
145 /**
146  * Creates a NumberFormatter corresponding to the given skeleton string.
147  *
148  * @param skeletonString
149  *            A number skeleton string, possibly not in its shortest form.
150  * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
151  */
152 UnlocalizedNumberFormatter create(
153     const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
154 
155 /**
156  * Create a skeleton string corresponding to the given NumberFormatter.
157  *
158  * @param macros
159  *            The NumberFormatter options object.
160  * @return A skeleton string in normalized form.
161  */
162 UnicodeString generate(const MacroProps& macros, UErrorCode& status);
163 
164 /**
165  * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
166  *
167  * Internal: use the create() endpoint instead of this function.
168  */
169 MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
170 
171 /**
172  * Given that the current segment represents a stem, parse it and save the result.
173  *
174  * @return The next state after parsing this stem, corresponding to what subset of options to expect.
175  */
176 ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
177                      MacroProps& macros, UErrorCode& status);
178 
179 /**
180  * Given that the current segment represents an option, parse it and save the result.
181  *
182  * @return The next state after parsing this option, corresponding to what subset of options to
183  *         expect next.
184  */
185 ParseState
186 parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
187 
188 } // namespace skeleton
189 
190 
191 /**
192  * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
193  * applies to only the "Section 1" stems, those that are well-defined without an option.
194  */
195 namespace stem_to_object {
196 
197 Notation notation(skeleton::StemEnum stem);
198 
199 MeasureUnit unit(skeleton::StemEnum stem);
200 
201 Precision precision(skeleton::StemEnum stem);
202 
203 UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
204 
205 UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
206 
207 UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
208 
209 UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
210 
211 UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
212 
213 } // namespace stem_to_object
214 
215 /**
216  * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
217  * take place in the object_to_stem_string namespace.
218  */
219 namespace enum_to_stem_string {
220 
221 void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
222 
223 void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
224 
225 void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
226 
227 void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
228 
229 void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
230 
231 } // namespace enum_to_stem_string
232 
233 /**
234  * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
235  */
236 namespace blueprint_helpers {
237 
238 /** @return Whether we successfully found and parsed an exponent width option. */
239 bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
240 
241 void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
242 
243 /** @return Whether we successfully found and parsed an exponent sign option. */
244 bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
245 
246 void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
247 
248 void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
249 
250 // "measure-unit/" is deprecated in favour of "unit/".
251 void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
252 
253 // "per-measure-unit/" is deprecated in favour of "unit/".
254 void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
255 
256 /**
257  * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as
258  * specified via a "unit/" concise skeleton.
259  */
260 void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
261 
262 void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
263 
264 void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
265 
266 void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
267 
268 void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
269 
270 void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
271 
272 void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
273 
274 // Note: no generateScientificStem since this syntax was added later in ICU 67
275 
276 void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
277 
278 // Note: no generateIntegerStem since this syntax was added later in ICU 67
279 
280 /** @return Whether we successfully found and parsed a frac-sig option. */
281 bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
282 
283 /** @return Whether we successfully found and parsed a trailing zero option. */
284 bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
285 
286 void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
287 
288 void
289 generateIncrementOption(uint32_t increment, digits_t incrementMagnitude, int32_t minFrac, UnicodeString& sb, UErrorCode& status);
290 
291 void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
292 
293 void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
294 
295 void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
296 
297 void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
298 
299 void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
300 
301 void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
302                               UErrorCode& status);
303 
304 } // namespace blueprint_helpers
305 
306 /**
307  * Class for utility methods for generating a token corresponding to each macro-prop. Each method
308  * returns whether or not a token was written to the string builder.
309  *
310  * This needs to be a class, not a namespace, so it can be friended.
311  */
312 class GeneratorHelpers {
313   public:
314     /**
315      * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
316      * StringBuilder.
317      *
318      * Internal: use the create() endpoint instead of this function.
319      */
320     static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
321 
322   private:
323     static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
324 
325     static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
326 
327     static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
328 
329     static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
330 
331     static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
332 
333     static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
334 
335     static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
336 
337     static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
338 
339     static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
340 
341     static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
342 
343     static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
344 
345     static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
346 
347 };
348 
349 /**
350  * Struct for null-checking.
351  * In Java, we can just check the object reference. In C++, we need a different method.
352  */
353 struct SeenMacroProps {
354     bool notation = false;
355     bool unit = false;
356     bool perUnit = false;
357     bool usage = false;
358     bool precision = false;
359     bool roundingMode = false;
360     bool grouper = false;
361     bool padder = false;
362     bool integerWidth = false;
363     bool symbols = false;
364     bool unitWidth = false;
365     bool sign = false;
366     bool decimal = false;
367     bool scale = false;
368 };
369 
370 namespace {
371 
372 #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
373 UPRV_BLOCK_MACRO_BEGIN { \
374     UErrorCode conversionStatus = U_ZERO_ERROR; \
375     (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
376     if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
377         /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
378         (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
379         return; \
380     } else if (U_FAILURE(conversionStatus)) { \
381         (status) = conversionStatus; \
382         return; \
383     } \
384 } UPRV_BLOCK_MACRO_END
385 
386 } // namespace
387 
388 } // namespace impl
389 } // namespace number
390 U_NAMESPACE_END
391 
392 #endif //__SOURCE_NUMBER_SKELETONS_H__
393 #endif /* #if !UCONFIG_NO_FORMATTING */
394