• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // stringoptions.h
5 // created: 2017jun08 Markus W. Scherer
6 
7 #ifndef __STRINGOPTIONS_H__
8 #define __STRINGOPTIONS_H__
9 
10 #include "unicode/utypes.h"
11 
12 /**
13  * \file
14  * \brief C API: Bit set option bit constants for various string and character processing functions.
15  */
16 
17 /**
18  * Option value for case folding: Use default mappings defined in CaseFolding.txt.
19  *
20  * @stable ICU 2.0
21  */
22 #define U_FOLD_CASE_DEFAULT 0
23 
24 /**
25  * Option value for case folding:
26  *
27  * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
28  * and dotless i appropriately for Turkic languages (tr, az).
29  *
30  * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
31  * are to be included for default mappings and
32  * excluded for the Turkic-specific mappings.
33  *
34  * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
35  * are to be excluded for default mappings and
36  * included for the Turkic-specific mappings.
37  *
38  * @stable ICU 2.0
39  */
40 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
41 
42 #ifndef U_HIDE_DRAFT_API
43 
44 /**
45  * Titlecase the string as a whole rather than each word.
46  * (Titlecase only the character at index 0, possibly adjusted.)
47  * Option bits value for titlecasing APIs that take an options bit set.
48  *
49  * It is an error to specify multiple titlecasing iterator options together,
50  * including both an options bit and an explicit BreakIterator.
51  *
52  * @see U_TITLECASE_ADJUST_TO_CASED
53  * @draft ICU 60
54  */
55 #define U_TITLECASE_WHOLE_STRING 0x20
56 
57 /**
58  * Titlecase sentences rather than words.
59  * (Titlecase only the first character of each sentence, possibly adjusted.)
60  * Option bits value for titlecasing APIs that take an options bit set.
61  *
62  * It is an error to specify multiple titlecasing iterator options together,
63  * including both an options bit and an explicit BreakIterator.
64  *
65  * @see U_TITLECASE_ADJUST_TO_CASED
66  * @draft ICU 60
67  */
68 #define U_TITLECASE_SENTENCES 0x40
69 
70 #endif  // U_HIDE_DRAFT_API
71 
72 /**
73  * Do not lowercase non-initial parts of words when titlecasing.
74  * Option bit for titlecasing APIs that take an options bit set.
75  *
76  * By default, titlecasing will titlecase the character at each
77  * (possibly adjusted) BreakIterator index and
78  * lowercase all other characters up to the next iterator index.
79  * With this option, the other characters will not be modified.
80  *
81  * @see U_TITLECASE_ADJUST_TO_CASED
82  * @see UnicodeString::toTitle
83  * @see CaseMap::toTitle
84  * @see ucasemap_setOptions
85  * @see ucasemap_toTitle
86  * @see ucasemap_utf8ToTitle
87  * @stable ICU 3.8
88  */
89 #define U_TITLECASE_NO_LOWERCASE 0x100
90 
91 /**
92  * Do not adjust the titlecasing BreakIterator indexes;
93  * titlecase exactly the characters at breaks from the iterator.
94  * Option bit for titlecasing APIs that take an options bit set.
95  *
96  * By default, titlecasing will take each break iterator index,
97  * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
98  * and titlecase that one.
99  *
100  * Other characters are lowercased.
101  *
102  * It is an error to specify multiple titlecasing adjustment options together.
103  *
104  * @see U_TITLECASE_ADJUST_TO_CASED
105  * @see U_TITLECASE_NO_LOWERCASE
106  * @see UnicodeString::toTitle
107  * @see CaseMap::toTitle
108  * @see ucasemap_setOptions
109  * @see ucasemap_toTitle
110  * @see ucasemap_utf8ToTitle
111  * @stable ICU 3.8
112  */
113 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
114 
115 #ifndef U_HIDE_DRAFT_API
116 
117 /**
118  * Adjust each titlecasing BreakIterator index to the next cased character.
119  * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
120  * Option bit for titlecasing APIs that take an options bit set.
121  *
122  * This used to be the default index adjustment in ICU.
123  * Since ICU 60, the default index adjustment is to the next character that is
124  * a letter, number, symbol, or private use code point.
125  * (Uncased modifier letters are skipped.)
126  * The difference in behavior is small for word titlecasing,
127  * but the new adjustment is much better for whole-string and sentence titlecasing:
128  * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
129  *
130  * It is an error to specify multiple titlecasing adjustment options together.
131  *
132  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
133  * @draft ICU 60
134  */
135 #define U_TITLECASE_ADJUST_TO_CASED 0x400
136 
137 /**
138  * Option for string transformation functions to not first reset the Edits object.
139  * Used for example in some case-mapping and normalization functions.
140  *
141  * @see CaseMap
142  * @see Edits
143  * @see Normalizer2
144  * @draft ICU 60
145  */
146 #define U_EDITS_NO_RESET 0x2000
147 
148 /**
149  * Omit unchanged text when recording how source substrings
150  * relate to changed and unchanged result substrings.
151  * Used for example in some case-mapping and normalization functions.
152  *
153  * @see CaseMap
154  * @see Edits
155  * @see Normalizer2
156  * @draft ICU 60
157  */
158 #define U_OMIT_UNCHANGED_TEXT 0x4000
159 
160 #endif  // U_HIDE_DRAFT_API
161 
162 /**
163  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
164  * Compare strings in code point order instead of code unit order.
165  * @stable ICU 2.2
166  */
167 #define U_COMPARE_CODE_POINT_ORDER  0x8000
168 
169 /**
170  * Option bit for unorm_compare:
171  * Perform case-insensitive comparison.
172  * @stable ICU 2.2
173  */
174 #define U_COMPARE_IGNORE_CASE       0x10000
175 
176 /**
177  * Option bit for unorm_compare:
178  * Both input strings are assumed to fulfill FCD conditions.
179  * @stable ICU 2.2
180  */
181 #define UNORM_INPUT_IS_FCD          0x20000
182 
183 // Related definitions elsewhere.
184 // Options that are not meaningful in the same functions
185 // can share the same bits.
186 //
187 // Public:
188 // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
189 //
190 // Internal: (may change or be removed)
191 // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
192 // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
193 // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
194 // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
195 // ustr_imp.h #define _STRNCMP_STYLE 0x1000
196 // unormcmp.cpp #define _COMPARE_EQUIV 0x80000
197 
198 #endif  // __STRINGOPTIONS_H__
199