1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // stringoptions.h 5 // created: 2017jun08 Markus W. Scherer 6 7 #ifndef __STRINGOPTIONS_H__ 8 #define __STRINGOPTIONS_H__ 9 10 #include "unicode/utypes.h" 11 12 /** 13 * \file 14 * \brief C API: Bit set option bit constants for various string and character processing functions. 15 */ 16 17 /** 18 * Option value for case folding: Use default mappings defined in CaseFolding.txt. 19 * 20 * @stable ICU 2.0 21 */ 22 #define U_FOLD_CASE_DEFAULT 0 23 24 /** 25 * Option value for case folding: 26 * 27 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 28 * and dotless i appropriately for Turkic languages (tr, az). 29 * 30 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 31 * are to be included for default mappings and 32 * excluded for the Turkic-specific mappings. 33 * 34 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 35 * are to be excluded for default mappings and 36 * included for the Turkic-specific mappings. 37 * 38 * @stable ICU 2.0 39 */ 40 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 41 42 /** 43 * Titlecase the string as a whole rather than each word. 44 * (Titlecase only the character at index 0, possibly adjusted.) 45 * Option bits value for titlecasing APIs that take an options bit set. 46 * 47 * It is an error to specify multiple titlecasing iterator options together, 48 * including both an options bit and an explicit BreakIterator. 49 * 50 * @see U_TITLECASE_ADJUST_TO_CASED 51 * @stable ICU 60 52 */ 53 #define U_TITLECASE_WHOLE_STRING 0x20 54 55 /** 56 * Titlecase sentences rather than words. 57 * (Titlecase only the first character of each sentence, possibly adjusted.) 58 * Option bits value for titlecasing APIs that take an options bit set. 59 * 60 * It is an error to specify multiple titlecasing iterator options together, 61 * including both an options bit and an explicit BreakIterator. 62 * 63 * @see U_TITLECASE_ADJUST_TO_CASED 64 * @stable ICU 60 65 */ 66 #define U_TITLECASE_SENTENCES 0x40 67 68 /** 69 * Do not lowercase non-initial parts of words when titlecasing. 70 * Option bit for titlecasing APIs that take an options bit set. 71 * 72 * By default, titlecasing will titlecase the character at each 73 * (possibly adjusted) BreakIterator index and 74 * lowercase all other characters up to the next iterator index. 75 * With this option, the other characters will not be modified. 76 * 77 * @see U_TITLECASE_ADJUST_TO_CASED 78 * @see UnicodeString::toTitle 79 * @see CaseMap::toTitle 80 * @see ucasemap_setOptions 81 * @see ucasemap_toTitle 82 * @see ucasemap_utf8ToTitle 83 * @stable ICU 3.8 84 */ 85 #define U_TITLECASE_NO_LOWERCASE 0x100 86 87 /** 88 * Do not adjust the titlecasing BreakIterator indexes; 89 * titlecase exactly the characters at breaks from the iterator. 90 * Option bit for titlecasing APIs that take an options bit set. 91 * 92 * By default, titlecasing will take each break iterator index, 93 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), 94 * and titlecase that one. 95 * 96 * Other characters are lowercased. 97 * 98 * It is an error to specify multiple titlecasing adjustment options together. 99 * 100 * @see U_TITLECASE_ADJUST_TO_CASED 101 * @see U_TITLECASE_NO_LOWERCASE 102 * @see UnicodeString::toTitle 103 * @see CaseMap::toTitle 104 * @see ucasemap_setOptions 105 * @see ucasemap_toTitle 106 * @see ucasemap_utf8ToTitle 107 * @stable ICU 3.8 108 */ 109 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 110 111 /** 112 * Adjust each titlecasing BreakIterator index to the next cased character. 113 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) 114 * Option bit for titlecasing APIs that take an options bit set. 115 * 116 * This used to be the default index adjustment in ICU. 117 * Since ICU 60, the default index adjustment is to the next character that is 118 * a letter, number, symbol, or private use code point. 119 * (Uncased modifier letters are skipped.) 120 * The difference in behavior is small for word titlecasing, 121 * but the new adjustment is much better for whole-string and sentence titlecasing: 122 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". 123 * 124 * It is an error to specify multiple titlecasing adjustment options together. 125 * 126 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 127 * @stable ICU 60 128 */ 129 #define U_TITLECASE_ADJUST_TO_CASED 0x400 130 131 /** 132 * Option for string transformation functions to not first reset the Edits object. 133 * Used for example in some case-mapping and normalization functions. 134 * 135 * @see CaseMap 136 * @see Edits 137 * @see Normalizer2 138 * @stable ICU 60 139 */ 140 #define U_EDITS_NO_RESET 0x2000 141 142 /** 143 * Omit unchanged text when recording how source substrings 144 * relate to changed and unchanged result substrings. 145 * Used for example in some case-mapping and normalization functions. 146 * 147 * @see CaseMap 148 * @see Edits 149 * @see Normalizer2 150 * @stable ICU 60 151 */ 152 #define U_OMIT_UNCHANGED_TEXT 0x4000 153 154 /** 155 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 156 * Compare strings in code point order instead of code unit order. 157 * @stable ICU 2.2 158 */ 159 #define U_COMPARE_CODE_POINT_ORDER 0x8000 160 161 /** 162 * Option bit for unorm_compare: 163 * Perform case-insensitive comparison. 164 * @stable ICU 2.2 165 */ 166 #define U_COMPARE_IGNORE_CASE 0x10000 167 168 /** 169 * Option bit for unorm_compare: 170 * Both input strings are assumed to fulfill FCD conditions. 171 * @stable ICU 2.2 172 */ 173 #define UNORM_INPUT_IS_FCD 0x20000 174 175 // Related definitions elsewhere. 176 // Options that are not meaningful in the same functions 177 // can share the same bits. 178 // 179 // Public: 180 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 181 // 182 // Internal: (may change or be removed) 183 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff 184 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 185 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 186 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 187 // ustr_imp.h #define _STRNCMP_STYLE 0x1000 188 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 189 190 #endif // __STRINGOPTIONS_H__ 191