• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // casemap.h
5 // created: 2017jan12 Markus W. Scherer
6 
7 #ifndef __CASEMAP_H__
8 #define __CASEMAP_H__
9 
10 #include "unicode/utypes.h"
11 #include "unicode/stringpiece.h"
12 #include "unicode/uobject.h"
13 
14 /**
15  * \file
16  * \brief C++ API: Low-level C++ case mapping functions.
17  */
18 
19 U_NAMESPACE_BEGIN
20 
21 #ifndef U_HIDE_DRAFT_API
22 
23 class BreakIterator;
24 class ByteSink;
25 class Edits;
26 
27 /**
28  * Low-level C++ case mapping functions.
29  *
30  * @draft ICU 59
31  */
32 class U_COMMON_API CaseMap U_FINAL : public UMemory {
33 public:
34     /**
35      * Lowercases a UTF-16 string and optionally records edits.
36      * Casing is locale-dependent and context-sensitive.
37      * The result may be longer or shorter than the original.
38      * The source string and the destination buffer must not overlap.
39      *
40      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
41      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
42      * @param src       The original string.
43      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
44      * @param dest      A buffer for the result string. The result will be NUL-terminated if
45      *                  the buffer is large enough.
46      *                  The contents is undefined in case of failure.
47      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
48      *                  dest may be NULL and the function will only return the length of the result
49      *                  without writing any of the result string.
50      * @param edits     Records edits for index mapping, working with styled text,
51      *                  and getting only changes (if any).
52      *                  The Edits contents is undefined if any error occurs.
53      *                  This function calls edits->reset() first unless
54      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
55      * @param errorCode Reference to an in/out error code value
56      *                  which must not indicate a failure before the function call.
57      * @return The length of the result string, if successful.
58      *         When the result would be longer than destCapacity,
59      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
60      *
61      * @see u_strToLower
62      * @draft ICU 59
63      */
64      static int32_t toLower(
65             const char *locale, uint32_t options,
66             const char16_t *src, int32_t srcLength,
67             char16_t *dest, int32_t destCapacity, Edits *edits,
68             UErrorCode &errorCode);
69 
70     /**
71      * Uppercases a UTF-16 string and optionally records edits.
72      * Casing is locale-dependent and context-sensitive.
73      * The result may be longer or shorter than the original.
74      * The source string and the destination buffer must not overlap.
75      *
76      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
77      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
78      * @param src       The original string.
79      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
80      * @param dest      A buffer for the result string. The result will be NUL-terminated if
81      *                  the buffer is large enough.
82      *                  The contents is undefined in case of failure.
83      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
84      *                  dest may be NULL and the function will only return the length of the result
85      *                  without writing any of the result string.
86      * @param edits     Records edits for index mapping, working with styled text,
87      *                  and getting only changes (if any).
88      *                  The Edits contents is undefined if any error occurs.
89      *                  This function calls edits->reset() first unless
90      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
91      * @param errorCode Reference to an in/out error code value
92      *                  which must not indicate a failure before the function call.
93      * @return The length of the result string, if successful.
94      *         When the result would be longer than destCapacity,
95      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
96      *
97      * @see u_strToUpper
98      * @draft ICU 59
99      */
100     static int32_t toUpper(
101             const char *locale, uint32_t options,
102             const char16_t *src, int32_t srcLength,
103             char16_t *dest, int32_t destCapacity, Edits *edits,
104             UErrorCode &errorCode);
105 
106 #if !UCONFIG_NO_BREAK_ITERATION
107 
108     /**
109      * Titlecases a UTF-16 string and optionally records edits.
110      * Casing is locale-dependent and context-sensitive.
111      * The result may be longer or shorter than the original.
112      * The source string and the destination buffer must not overlap.
113      *
114      * Titlecasing uses a break iterator to find the first characters of words
115      * that are to be titlecased. It titlecases those characters and lowercases
116      * all others. (This can be modified with options bits.)
117      *
118      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
119      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
120      *                  U_TITLECASE_NO_LOWERCASE,
121      *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
122      *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
123      * @param iter      A break iterator to find the first characters of words that are to be titlecased.
124      *                  It is set to the source string (setText())
125      *                  and used one or more times for iteration (first() and next()).
126      *                  If NULL, then a word break iterator for the locale is used
127      *                  (or something equivalent).
128      * @param src       The original string.
129      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
130      * @param dest      A buffer for the result string. The result will be NUL-terminated if
131      *                  the buffer is large enough.
132      *                  The contents is undefined in case of failure.
133      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
134      *                  dest may be NULL and the function will only return the length of the result
135      *                  without writing any of the result string.
136      * @param edits     Records edits for index mapping, working with styled text,
137      *                  and getting only changes (if any).
138      *                  The Edits contents is undefined if any error occurs.
139      *                  This function calls edits->reset() first unless
140      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
141      * @param errorCode Reference to an in/out error code value
142      *                  which must not indicate a failure before the function call.
143      * @return The length of the result string, if successful.
144      *         When the result would be longer than destCapacity,
145      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
146      *
147      * @see u_strToTitle
148      * @see ucasemap_toTitle
149      * @draft ICU 59
150      */
151     static int32_t toTitle(
152             const char *locale, uint32_t options, BreakIterator *iter,
153             const char16_t *src, int32_t srcLength,
154             char16_t *dest, int32_t destCapacity, Edits *edits,
155             UErrorCode &errorCode);
156 
157 #endif  // UCONFIG_NO_BREAK_ITERATION
158 
159     /**
160      * Case-folds a UTF-16 string and optionally records edits.
161      *
162      * Case folding is locale-independent and not context-sensitive,
163      * but there is an option for whether to include or exclude mappings for dotted I
164      * and dotless i that are marked with 'T' in CaseFolding.txt.
165      *
166      * The result may be longer or shorter than the original.
167      * The source string and the destination buffer must not overlap.
168      *
169      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
170      *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
171      * @param src       The original string.
172      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
173      * @param dest      A buffer for the result string. The result will be NUL-terminated if
174      *                  the buffer is large enough.
175      *                  The contents is undefined in case of failure.
176      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
177      *                  dest may be NULL and the function will only return the length of the result
178      *                  without writing any of the result string.
179      * @param edits     Records edits for index mapping, working with styled text,
180      *                  and getting only changes (if any).
181      *                  The Edits contents is undefined if any error occurs.
182      *                  This function calls edits->reset() first unless
183      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
184      * @param errorCode Reference to an in/out error code value
185      *                  which must not indicate a failure before the function call.
186      * @return The length of the result string, if successful.
187      *         When the result would be longer than destCapacity,
188      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
189      *
190      * @see u_strFoldCase
191      * @draft ICU 59
192      */
193     static int32_t fold(
194             uint32_t options,
195             const char16_t *src, int32_t srcLength,
196             char16_t *dest, int32_t destCapacity, Edits *edits,
197             UErrorCode &errorCode);
198 
199     /**
200      * Lowercases a UTF-8 string and optionally records edits.
201      * Casing is locale-dependent and context-sensitive.
202      * The result may be longer or shorter than the original.
203      *
204      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
205      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
206      * @param src       The original string.
207      * @param sink      A ByteSink to which the result string is written.
208      *                  sink.Flush() is called at the end.
209      * @param edits     Records edits for index mapping, working with styled text,
210      *                  and getting only changes (if any).
211      *                  The Edits contents is undefined if any error occurs.
212      *                  This function calls edits->reset() first unless
213      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
214      * @param errorCode Reference to an in/out error code value
215      *                  which must not indicate a failure before the function call.
216      *
217      * @see ucasemap_utf8ToLower
218      * @draft ICU 60
219      */
220     static void utf8ToLower(
221             const char *locale, uint32_t options,
222             StringPiece src, ByteSink &sink, Edits *edits,
223             UErrorCode &errorCode);
224 
225     /**
226      * Uppercases a UTF-8 string and optionally records edits.
227      * Casing is locale-dependent and context-sensitive.
228      * The result may be longer or shorter than the original.
229      *
230      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
231      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
232      * @param src       The original string.
233      * @param sink      A ByteSink to which the result string is written.
234      *                  sink.Flush() is called at the end.
235      * @param edits     Records edits for index mapping, working with styled text,
236      *                  and getting only changes (if any).
237      *                  The Edits contents is undefined if any error occurs.
238      *                  This function calls edits->reset() first unless
239      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
240      * @param errorCode Reference to an in/out error code value
241      *                  which must not indicate a failure before the function call.
242      *
243      * @see ucasemap_utf8ToUpper
244      * @draft ICU 60
245      */
246     static void utf8ToUpper(
247             const char *locale, uint32_t options,
248             StringPiece src, ByteSink &sink, Edits *edits,
249             UErrorCode &errorCode);
250 
251 #if !UCONFIG_NO_BREAK_ITERATION
252 
253     /**
254      * Titlecases a UTF-8 string and optionally records edits.
255      * Casing is locale-dependent and context-sensitive.
256      * The result may be longer or shorter than the original.
257      *
258      * Titlecasing uses a break iterator to find the first characters of words
259      * that are to be titlecased. It titlecases those characters and lowercases
260      * all others. (This can be modified with options bits.)
261      *
262      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
263      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
264      *                  U_TITLECASE_NO_LOWERCASE,
265      *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
266      *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
267      * @param iter      A break iterator to find the first characters of words that are to be titlecased.
268      *                  It is set to the source string (setUText())
269      *                  and used one or more times for iteration (first() and next()).
270      *                  If NULL, then a word break iterator for the locale is used
271      *                  (or something equivalent).
272      * @param src       The original string.
273      * @param sink      A ByteSink to which the result string is written.
274      *                  sink.Flush() is called at the end.
275      * @param edits     Records edits for index mapping, working with styled text,
276      *                  and getting only changes (if any).
277      *                  The Edits contents is undefined if any error occurs.
278      *                  This function calls edits->reset() first unless
279      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
280      * @param errorCode Reference to an in/out error code value
281      *                  which must not indicate a failure before the function call.
282      *
283      * @see ucasemap_utf8ToTitle
284      * @draft ICU 60
285      */
286     static void utf8ToTitle(
287             const char *locale, uint32_t options, BreakIterator *iter,
288             StringPiece src, ByteSink &sink, Edits *edits,
289             UErrorCode &errorCode);
290 
291 #endif  // UCONFIG_NO_BREAK_ITERATION
292 
293     /**
294      * Case-folds a UTF-8 string and optionally records edits.
295      *
296      * Case folding is locale-independent and not context-sensitive,
297      * but there is an option for whether to include or exclude mappings for dotted I
298      * and dotless i that are marked with 'T' in CaseFolding.txt.
299      *
300      * The result may be longer or shorter than the original.
301      *
302      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
303      * @param src       The original string.
304      * @param sink      A ByteSink to which the result string is written.
305      *                  sink.Flush() is called at the end.
306      * @param edits     Records edits for index mapping, working with styled text,
307      *                  and getting only changes (if any).
308      *                  The Edits contents is undefined if any error occurs.
309      *                  This function calls edits->reset() first unless
310      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
311      * @param errorCode Reference to an in/out error code value
312      *                  which must not indicate a failure before the function call.
313      *
314      * @see ucasemap_utf8FoldCase
315      * @draft ICU 60
316      */
317     static void utf8Fold(
318             uint32_t options,
319             StringPiece src, ByteSink &sink, Edits *edits,
320             UErrorCode &errorCode);
321 
322     /**
323      * Lowercases a UTF-8 string and optionally records edits.
324      * Casing is locale-dependent and context-sensitive.
325      * The result may be longer or shorter than the original.
326      * The source string and the destination buffer must not overlap.
327      *
328      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
329      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
330      * @param src       The original string.
331      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
332      * @param dest      A buffer for the result string. The result will be NUL-terminated if
333      *                  the buffer is large enough.
334      *                  The contents is undefined in case of failure.
335      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
336      *                  dest may be NULL and the function will only return the length of the result
337      *                  without writing any of the result string.
338      * @param edits     Records edits for index mapping, working with styled text,
339      *                  and getting only changes (if any).
340      *                  The Edits contents is undefined if any error occurs.
341      *                  This function calls edits->reset() first unless
342      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
343      * @param errorCode Reference to an in/out error code value
344      *                  which must not indicate a failure before the function call.
345      * @return The length of the result string, if successful.
346      *         When the result would be longer than destCapacity,
347      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
348      *
349      * @see ucasemap_utf8ToLower
350      * @draft ICU 59
351      */
352     static int32_t utf8ToLower(
353             const char *locale, uint32_t options,
354             const char *src, int32_t srcLength,
355             char *dest, int32_t destCapacity, Edits *edits,
356             UErrorCode &errorCode);
357 
358     /**
359      * Uppercases a UTF-8 string and optionally records edits.
360      * Casing is locale-dependent and context-sensitive.
361      * The result may be longer or shorter than the original.
362      * The source string and the destination buffer must not overlap.
363      *
364      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
365      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
366      * @param src       The original string.
367      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
368      * @param dest      A buffer for the result string. The result will be NUL-terminated if
369      *                  the buffer is large enough.
370      *                  The contents is undefined in case of failure.
371      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
372      *                  dest may be NULL and the function will only return the length of the result
373      *                  without writing any of the result string.
374      * @param edits     Records edits for index mapping, working with styled text,
375      *                  and getting only changes (if any).
376      *                  The Edits contents is undefined if any error occurs.
377      *                  This function calls edits->reset() first unless
378      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
379      * @param errorCode Reference to an in/out error code value
380      *                  which must not indicate a failure before the function call.
381      * @return The length of the result string, if successful.
382      *         When the result would be longer than destCapacity,
383      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
384      *
385      * @see ucasemap_utf8ToUpper
386      * @draft ICU 59
387      */
388     static int32_t utf8ToUpper(
389             const char *locale, uint32_t options,
390             const char *src, int32_t srcLength,
391             char *dest, int32_t destCapacity, Edits *edits,
392             UErrorCode &errorCode);
393 
394 #if !UCONFIG_NO_BREAK_ITERATION
395 
396     /**
397      * Titlecases a UTF-8 string and optionally records edits.
398      * Casing is locale-dependent and context-sensitive.
399      * The result may be longer or shorter than the original.
400      * The source string and the destination buffer must not overlap.
401      *
402      * Titlecasing uses a break iterator to find the first characters of words
403      * that are to be titlecased. It titlecases those characters and lowercases
404      * all others. (This can be modified with options bits.)
405      *
406      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
407      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
408      *                  U_TITLECASE_NO_LOWERCASE,
409      *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
410      *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
411      * @param iter      A break iterator to find the first characters of words that are to be titlecased.
412      *                  It is set to the source string (setUText())
413      *                  and used one or more times for iteration (first() and next()).
414      *                  If NULL, then a word break iterator for the locale is used
415      *                  (or something equivalent).
416      * @param src       The original string.
417      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
418      * @param dest      A buffer for the result string. The result will be NUL-terminated if
419      *                  the buffer is large enough.
420      *                  The contents is undefined in case of failure.
421      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
422      *                  dest may be NULL and the function will only return the length of the result
423      *                  without writing any of the result string.
424      * @param edits     Records edits for index mapping, working with styled text,
425      *                  and getting only changes (if any).
426      *                  The Edits contents is undefined if any error occurs.
427      *                  This function calls edits->reset() first unless
428      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
429      * @param errorCode Reference to an in/out error code value
430      *                  which must not indicate a failure before the function call.
431      * @return The length of the result string, if successful.
432      *         When the result would be longer than destCapacity,
433      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
434      *
435      * @see ucasemap_utf8ToTitle
436      * @draft ICU 59
437      */
438     static int32_t utf8ToTitle(
439             const char *locale, uint32_t options, BreakIterator *iter,
440             const char *src, int32_t srcLength,
441             char *dest, int32_t destCapacity, Edits *edits,
442             UErrorCode &errorCode);
443 
444 #endif  // UCONFIG_NO_BREAK_ITERATION
445 
446     /**
447      * Case-folds a UTF-8 string and optionally records edits.
448      *
449      * Case folding is locale-independent and not context-sensitive,
450      * but there is an option for whether to include or exclude mappings for dotted I
451      * and dotless i that are marked with 'T' in CaseFolding.txt.
452      *
453      * The result may be longer or shorter than the original.
454      * The source string and the destination buffer must not overlap.
455      *
456      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
457      *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
458      * @param src       The original string.
459      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
460      * @param dest      A buffer for the result string. The result will be NUL-terminated if
461      *                  the buffer is large enough.
462      *                  The contents is undefined in case of failure.
463      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
464      *                  dest may be NULL and the function will only return the length of the result
465      *                  without writing any of the result string.
466      * @param edits     Records edits for index mapping, working with styled text,
467      *                  and getting only changes (if any).
468      *                  The Edits contents is undefined if any error occurs.
469      *                  This function calls edits->reset() first unless
470      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
471      * @param errorCode Reference to an in/out error code value
472      *                  which must not indicate a failure before the function call.
473      * @return The length of the result string, if successful.
474      *         When the result would be longer than destCapacity,
475      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
476      *
477      * @see ucasemap_utf8FoldCase
478      * @draft ICU 59
479      */
480     static int32_t utf8Fold(
481             uint32_t options,
482             const char *src, int32_t srcLength,
483             char *dest, int32_t destCapacity, Edits *edits,
484             UErrorCode &errorCode);
485 
486 private:
487     CaseMap() = delete;
488     CaseMap(const CaseMap &other) = delete;
489     CaseMap &operator=(const CaseMap &other) = delete;
490 };
491 
492 #endif  // U_HIDE_DRAFT_API
493 
494 U_NAMESPACE_END
495 
496 #endif  // __CASEMAP_H__
497