1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 // stringoptions.h 5 // created: 2017jun08 Markus W. Scherer 6 7 #ifndef __STRINGOPTIONS_H__ 8 #define __STRINGOPTIONS_H__ 9 10 #include "unicode/utypes.h" 11 12 /** 13 * \file 14 * \brief C API: Bit set option bit constants for various string and character processing functions. 15 */ 16 17 /** 18 * Option value for case folding: Use default mappings defined in CaseFolding.txt. 19 * 20 * @stable ICU 2.0 21 */ 22 #define U_FOLD_CASE_DEFAULT 0 23 24 /** 25 * Option value for case folding: 26 * 27 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 28 * and dotless i appropriately for Turkic languages (tr, az). 29 * 30 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 31 * are to be included for default mappings and 32 * excluded for the Turkic-specific mappings. 33 * 34 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 35 * are to be excluded for default mappings and 36 * included for the Turkic-specific mappings. 37 * 38 * @stable ICU 2.0 39 */ 40 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 41 42 #ifndef U_HIDE_DRAFT_API 43 44 /** 45 * Titlecase the string as a whole rather than each word. 46 * (Titlecase only the character at index 0, possibly adjusted.) 47 * Option bits value for titlecasing APIs that take an options bit set. 48 * 49 * It is an error to specify multiple titlecasing iterator options together, 50 * including both an options bit and an explicit BreakIterator. 51 * 52 * @see U_TITLECASE_ADJUST_TO_CASED 53 * @draft ICU 60 54 */ 55 #define U_TITLECASE_WHOLE_STRING 0x20 56 57 /** 58 * Titlecase sentences rather than words. 59 * (Titlecase only the first character of each sentence, possibly adjusted.) 60 * Option bits value for titlecasing APIs that take an options bit set. 61 * 62 * It is an error to specify multiple titlecasing iterator options together, 63 * including both an options bit and an explicit BreakIterator. 64 * 65 * @see U_TITLECASE_ADJUST_TO_CASED 66 * @draft ICU 60 67 */ 68 #define U_TITLECASE_SENTENCES 0x40 69 70 #endif // U_HIDE_DRAFT_API 71 72 /** 73 * Do not lowercase non-initial parts of words when titlecasing. 74 * Option bit for titlecasing APIs that take an options bit set. 75 * 76 * By default, titlecasing will titlecase the character at each 77 * (possibly adjusted) BreakIterator index and 78 * lowercase all other characters up to the next iterator index. 79 * With this option, the other characters will not be modified. 80 * 81 * @see U_TITLECASE_ADJUST_TO_CASED 82 * @see UnicodeString::toTitle 83 * @see CaseMap::toTitle 84 * @see ucasemap_setOptions 85 * @see ucasemap_toTitle 86 * @see ucasemap_utf8ToTitle 87 * @stable ICU 3.8 88 */ 89 #define U_TITLECASE_NO_LOWERCASE 0x100 90 91 /** 92 * Do not adjust the titlecasing BreakIterator indexes; 93 * titlecase exactly the characters at breaks from the iterator. 94 * Option bit for titlecasing APIs that take an options bit set. 95 * 96 * By default, titlecasing will take each break iterator index, 97 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), 98 * and titlecase that one. 99 * 100 * Other characters are lowercased. 101 * 102 * It is an error to specify multiple titlecasing adjustment options together. 103 * 104 * @see U_TITLECASE_ADJUST_TO_CASED 105 * @see U_TITLECASE_NO_LOWERCASE 106 * @see UnicodeString::toTitle 107 * @see CaseMap::toTitle 108 * @see ucasemap_setOptions 109 * @see ucasemap_toTitle 110 * @see ucasemap_utf8ToTitle 111 * @stable ICU 3.8 112 */ 113 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 114 115 #ifndef U_HIDE_DRAFT_API 116 117 /** 118 * Adjust each titlecasing BreakIterator index to the next cased character. 119 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) 120 * Option bit for titlecasing APIs that take an options bit set. 121 * 122 * This used to be the default index adjustment in ICU. 123 * Since ICU 60, the default index adjustment is to the next character that is 124 * a letter, number, symbol, or private use code point. 125 * (Uncased modifier letters are skipped.) 126 * The difference in behavior is small for word titlecasing, 127 * but the new adjustment is much better for whole-string and sentence titlecasing: 128 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". 129 * 130 * It is an error to specify multiple titlecasing adjustment options together. 131 * 132 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 133 * @draft ICU 60 134 */ 135 #define U_TITLECASE_ADJUST_TO_CASED 0x400 136 137 /** 138 * Option for string transformation functions to not first reset the Edits object. 139 * Used for example in some case-mapping and normalization functions. 140 * 141 * @see CaseMap 142 * @see Edits 143 * @see Normalizer2 144 * @draft ICU 60 145 */ 146 #define U_EDITS_NO_RESET 0x2000 147 148 /** 149 * Omit unchanged text when recording how source substrings 150 * relate to changed and unchanged result substrings. 151 * Used for example in some case-mapping and normalization functions. 152 * 153 * @see CaseMap 154 * @see Edits 155 * @see Normalizer2 156 * @draft ICU 60 157 */ 158 #define U_OMIT_UNCHANGED_TEXT 0x4000 159 160 #endif // U_HIDE_DRAFT_API 161 162 /** 163 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 164 * Compare strings in code point order instead of code unit order. 165 * @stable ICU 2.2 166 */ 167 #define U_COMPARE_CODE_POINT_ORDER 0x8000 168 169 /** 170 * Option bit for unorm_compare: 171 * Perform case-insensitive comparison. 172 * @stable ICU 2.2 173 */ 174 #define U_COMPARE_IGNORE_CASE 0x10000 175 176 /** 177 * Option bit for unorm_compare: 178 * Both input strings are assumed to fulfill FCD conditions. 179 * @stable ICU 2.2 180 */ 181 #define UNORM_INPUT_IS_FCD 0x20000 182 183 // Related definitions elsewhere. 184 // Options that are not meaningful in the same functions 185 // can share the same bits. 186 // 187 // Public: 188 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 189 // 190 // Internal: (may change or be removed) 191 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff 192 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 193 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 194 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 195 // ustr_imp.h #define _STRNCMP_STYLE 0x1000 196 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 197 198 #endif // __STRINGOPTIONS_H__ 199