• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 #ifndef __SOURCE_NUMBER_SKELETONS_H__
8 #define __SOURCE_NUMBER_SKELETONS_H__
9 
10 #include "number_types.h"
11 #include "numparse_types.h"
12 #include "unicode/ucharstrie.h"
13 #include "string_segment.h"
14 
15 U_NAMESPACE_BEGIN
16 namespace number {
17 namespace impl {
18 
19 // Forward-declaration
20 struct SeenMacroProps;
21 
22 // namespace for enums and entrypoint functions
23 namespace skeleton {
24 
25 ///////////////////////////////////////////////////////////////////////////////////////
26 // NOTE: For an example of how to add a new stem to the number skeleton parser, see: //
27 // http://bugs.icu-project.org/trac/changeset/41193                                  //
28 ///////////////////////////////////////////////////////////////////////////////////////
29 
30 /**
31  * While parsing a skeleton, this enum records what type of option we expect to find next.
32  */
33 enum ParseState {
34 
35     // Section 0: We expect whitespace or a stem, but not an option:
36 
37     STATE_NULL,
38 
39     // Section 1: We might accept an option, but it is not required:
40 
41     STATE_SCIENTIFIC,
42     STATE_FRACTION_PRECISION,
43 
44     // Section 2: An option is required:
45 
46     STATE_INCREMENT_PRECISION,
47     STATE_MEASURE_UNIT,
48     STATE_PER_MEASURE_UNIT,
49     STATE_IDENTIFIER_UNIT,
50     STATE_CURRENCY_UNIT,
51     STATE_INTEGER_WIDTH,
52     STATE_NUMBERING_SYSTEM,
53     STATE_SCALE,
54 };
55 
56 /**
57  * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
58  * string literal written in upper snake case.
59  *
60  * @see StemToObject
61  * @see #SERIALIZED_STEM_TRIE
62  */
63 enum StemEnum {
64 
65     // Section 1: Stems that do not require an option:
66 
67     STEM_COMPACT_SHORT,
68     STEM_COMPACT_LONG,
69     STEM_SCIENTIFIC,
70     STEM_ENGINEERING,
71     STEM_NOTATION_SIMPLE,
72     STEM_BASE_UNIT,
73     STEM_PERCENT,
74     STEM_PERMILLE,
75     STEM_PERCENT_100, // concise-only
76     STEM_PRECISION_INTEGER,
77     STEM_PRECISION_UNLIMITED,
78     STEM_PRECISION_CURRENCY_STANDARD,
79     STEM_PRECISION_CURRENCY_CASH,
80     STEM_ROUNDING_MODE_CEILING,
81     STEM_ROUNDING_MODE_FLOOR,
82     STEM_ROUNDING_MODE_DOWN,
83     STEM_ROUNDING_MODE_UP,
84     STEM_ROUNDING_MODE_HALF_EVEN,
85     STEM_ROUNDING_MODE_HALF_DOWN,
86     STEM_ROUNDING_MODE_HALF_UP,
87     STEM_ROUNDING_MODE_UNNECESSARY,
88     STEM_GROUP_OFF,
89     STEM_GROUP_MIN2,
90     STEM_GROUP_AUTO,
91     STEM_GROUP_ON_ALIGNED,
92     STEM_GROUP_THOUSANDS,
93     STEM_LATIN,
94     STEM_UNIT_WIDTH_NARROW,
95     STEM_UNIT_WIDTH_SHORT,
96     STEM_UNIT_WIDTH_FULL_NAME,
97     STEM_UNIT_WIDTH_ISO_CODE,
98     STEM_UNIT_WIDTH_HIDDEN,
99     STEM_SIGN_AUTO,
100     STEM_SIGN_ALWAYS,
101     STEM_SIGN_NEVER,
102     STEM_SIGN_ACCOUNTING,
103     STEM_SIGN_ACCOUNTING_ALWAYS,
104     STEM_SIGN_EXCEPT_ZERO,
105     STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
106     STEM_DECIMAL_AUTO,
107     STEM_DECIMAL_ALWAYS,
108 
109     // Section 2: Stems that DO require an option:
110 
111     STEM_PRECISION_INCREMENT,
112     STEM_MEASURE_UNIT,
113     STEM_PER_MEASURE_UNIT,
114     STEM_UNIT,
115     STEM_CURRENCY,
116     STEM_INTEGER_WIDTH,
117     STEM_NUMBERING_SYSTEM,
118     STEM_SCALE,
119 };
120 
121 /** Default wildcard char, accepted on input and printed in output */
122 constexpr char16_t kWildcardChar = u'*';
123 
124 /** Alternative wildcard char, accept on input but not printed in output */
125 constexpr char16_t kAltWildcardChar = u'+';
126 
127 /** Checks whether the char is a wildcard on input */
isWildcardChar(char16_t c)128 inline bool isWildcardChar(char16_t c) {
129     return c == kWildcardChar || c == kAltWildcardChar;
130 }
131 
132 /**
133  * Creates a NumberFormatter corresponding to the given skeleton string.
134  *
135  * @param skeletonString
136  *            A number skeleton string, possibly not in its shortest form.
137  * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
138  */
139 UnlocalizedNumberFormatter create(
140     const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
141 
142 /**
143  * Create a skeleton string corresponding to the given NumberFormatter.
144  *
145  * @param macros
146  *            The NumberFormatter options object.
147  * @return A skeleton string in normalized form.
148  */
149 UnicodeString generate(const MacroProps& macros, UErrorCode& status);
150 
151 /**
152  * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
153  *
154  * Internal: use the create() endpoint instead of this function.
155  */
156 MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
157 
158 /**
159  * Given that the current segment represents a stem, parse it and save the result.
160  *
161  * @return The next state after parsing this stem, corresponding to what subset of options to expect.
162  */
163 ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
164                      MacroProps& macros, UErrorCode& status);
165 
166 /**
167  * Given that the current segment represents an option, parse it and save the result.
168  *
169  * @return The next state after parsing this option, corresponding to what subset of options to
170  *         expect next.
171  */
172 ParseState
173 parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
174 
175 } // namespace skeleton
176 
177 
178 /**
179  * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
180  * applies to only the "Section 1" stems, those that are well-defined without an option.
181  */
182 namespace stem_to_object {
183 
184 Notation notation(skeleton::StemEnum stem);
185 
186 MeasureUnit unit(skeleton::StemEnum stem);
187 
188 Precision precision(skeleton::StemEnum stem);
189 
190 UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
191 
192 UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
193 
194 UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
195 
196 UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
197 
198 UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
199 
200 } // namespace stem_to_object
201 
202 /**
203  * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
204  * take place in the object_to_stem_string namespace.
205  */
206 namespace enum_to_stem_string {
207 
208 void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
209 
210 void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
211 
212 void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
213 
214 void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
215 
216 void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
217 
218 } // namespace enum_to_stem_string
219 
220 /**
221  * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
222  */
223 namespace blueprint_helpers {
224 
225 /** @return Whether we successfully found and parsed an exponent width option. */
226 bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
227 
228 void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
229 
230 /** @return Whether we successfully found and parsed an exponent sign option. */
231 bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
232 
233 void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
234 
235 void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
236 
237 void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
238 
239 void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status);
240 
241 void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
242 
243 void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
244 
245 void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
246 
247 void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
248 
249 void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
250 
251 void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
252 
253 void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
254 
255 // Note: no generateScientificStem since this syntax was added later in ICU 67
256 
257 void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
258 
259 // Note: no generateIntegerStem since this syntax was added later in ICU 67
260 
261 /** @return Whether we successfully found and parsed a frac-sig option. */
262 bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
263 
264 void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
265 
266 void
267 generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
268 
269 void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
270 
271 void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
272 
273 void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
274 
275 void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
276 
277 void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
278 
279 void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
280                               UErrorCode& status);
281 
282 } // namespace blueprint_helpers
283 
284 /**
285  * Class for utility methods for generating a token corresponding to each macro-prop. Each method
286  * returns whether or not a token was written to the string builder.
287  *
288  * This needs to be a class, not a namespace, so it can be friended.
289  */
290 class GeneratorHelpers {
291   public:
292     /**
293      * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
294      * StringBuilder.
295      *
296      * Internal: use the create() endpoint instead of this function.
297      */
298     static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
299 
300   private:
301     static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
302 
303     static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
304 
305     static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
306 
307     static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
308 
309     static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
310 
311     static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
312 
313     static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
314 
315     static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
316 
317     static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
318 
319     static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
320 
321     static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
322 
323     static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
324 
325 };
326 
327 /**
328  * Struct for null-checking.
329  * In Java, we can just check the object reference. In C++, we need a different method.
330  */
331 struct SeenMacroProps {
332     bool notation = false;
333     bool unit = false;
334     bool perUnit = false;
335     bool precision = false;
336     bool roundingMode = false;
337     bool grouper = false;
338     bool padder = false;
339     bool integerWidth = false;
340     bool symbols = false;
341     bool unitWidth = false;
342     bool sign = false;
343     bool decimal = false;
344     bool scale = false;
345 };
346 
347 } // namespace impl
348 } // namespace number
349 U_NAMESPACE_END
350 
351 #endif //__SOURCE_NUMBER_SKELETONS_H__
352 #endif /* #if !UCONFIG_NO_FORMATTING */
353