1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // This file contains utilities to deal with static-allocated UnicodeSets. 5 // 6 // Common use case: you write a "private static final" UnicodeSet in Java, and 7 // want something similarly easy in C++. Originally written for number 8 // parsing, but this header can be used for other applications. 9 // 10 // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)` 11 // 12 // This file is in common instead of i18n because it is needed by ucurr.cpp. 13 // 14 // Author: sffc 15 16 #include "unicode/utypes.h" 17 18 #if !UCONFIG_NO_FORMATTING 19 #ifndef __STATIC_UNICODE_SETS_H__ 20 #define __STATIC_UNICODE_SETS_H__ 21 22 #include "unicode/uniset.h" 23 #include "unicode/unistr.h" 24 25 U_NAMESPACE_BEGIN 26 namespace unisets { 27 28 enum Key { 29 // NONE is used to indicate null in chooseFrom(). 30 // EMPTY is used to get an empty UnicodeSet. 31 NONE = -1, 32 EMPTY = 0, 33 34 // Ignorables 35 DEFAULT_IGNORABLES, 36 STRICT_IGNORABLES, 37 38 // Separators 39 // Notes: 40 // - COMMA is a superset of STRICT_COMMA 41 // - PERIOD is a superset of SCRICT_PERIOD 42 // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS 43 // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS 44 COMMA, 45 PERIOD, 46 STRICT_COMMA, 47 STRICT_PERIOD, 48 APOSTROPHE_SIGN, 49 OTHER_GROUPING_SEPARATORS, 50 ALL_SEPARATORS, 51 STRICT_ALL_SEPARATORS, 52 53 // Symbols 54 MINUS_SIGN, 55 PLUS_SIGN, 56 PERCENT_SIGN, 57 PERMILLE_SIGN, 58 INFINITY_SIGN, 59 60 // Currency Symbols 61 DOLLAR_SIGN, 62 POUND_SIGN, 63 RUPEE_SIGN, 64 YEN_SIGN, 65 WON_SIGN, 66 67 // Other 68 DIGITS, 69 70 // Combined Separators with Digits (for lead code points) 71 DIGITS_OR_ALL_SEPARATORS, 72 DIGITS_OR_STRICT_ALL_SEPARATORS, 73 74 // The number of elements in the enum. 75 UNISETS_KEY_COUNT 76 }; 77 78 /** 79 * Gets the static-allocated UnicodeSet according to the provided key. The 80 * pointer will be deleted during u_cleanup(); the caller should NOT delete it. 81 * 82 * Exported as U_COMMON_API for ucurr.cpp 83 * 84 * This method is always safe and OK to chain: in the case of a memory or other 85 * error, it returns an empty set from static memory. 86 * 87 * Example: 88 * 89 * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...); 90 * 91 * @param key The desired UnicodeSet according to the enum in this file. 92 * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but 93 * may be empty if an error occurred during data loading. 94 */ 95 U_COMMON_API const UnicodeSet* get(Key key); 96 97 /** 98 * Checks if the UnicodeSet given by key1 contains the given string. 99 * 100 * Exported as U_COMMON_API for numparse_decimal.cpp 101 * 102 * @param str The string to check. 103 * @param key1 The set to check. 104 * @return key1 if the set contains str, or NONE if not. 105 */ 106 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); 107 108 /** 109 * Checks if the UnicodeSet given by either key1 or key2 contains the string. 110 * 111 * Exported as U_COMMON_API for numparse_decimal.cpp 112 * 113 * @param str The string to check. 114 * @param key1 The first set to check. 115 * @param key2 The second set to check. 116 * @return key1 if that set contains str; key2 if that set contains str; or 117 * NONE if neither set contains str. 118 */ 119 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); 120 121 // TODO: Load these from data: ICU-20108 122 // Unused in C++: 123 // Key chooseCurrency(UnicodeString str); 124 // Used instead: 125 static const struct { 126 Key key; 127 UChar32 exemplar; 128 } kCurrencyEntries[] = { 129 {DOLLAR_SIGN, u'$'}, 130 {POUND_SIGN, u'£'}, 131 {RUPEE_SIGN, u'₹'}, 132 {YEN_SIGN, u'¥'}, 133 {WON_SIGN, u'₩'}, 134 }; 135 136 } // namespace unisets 137 U_NAMESPACE_END 138 139 #endif //__STATIC_UNICODE_SETS_H__ 140 #endif /* #if !UCONFIG_NO_FORMATTING */ 141