• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  ucasemap.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005may06
16 *   created by: Markus W. Scherer
17 *
18 *   Case mapping service object and functions using it.
19 */
20 
21 #ifndef __UCASEMAP_H__
22 #define __UCASEMAP_H__
23 
24 #include "unicode/utypes.h"
25 #include "unicode/stringoptions.h"
26 #include "unicode/ustring.h"
27 
28 #if U_SHOW_CPLUSPLUS_API
29 #include "unicode/localpointer.h"
30 #endif   // U_SHOW_CPLUSPLUS_API
31 
32 /**
33  * \file
34  * \brief C API: Unicode case mapping functions using a UCaseMap service object.
35  *
36  * The service object takes care of memory allocations, data loading, and setup
37  * for the attributes, as usual.
38  *
39  * Currently, the functionality provided here does not overlap with uchar.h
40  * and ustring.h, except for ucasemap_toTitle().
41  *
42  * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
43  */
44 
45 /**
46  * UCaseMap is an opaque service object for newer ICU case mapping functions.
47  * Older functions did not use a service object.
48  * @stable ICU 3.4
49  */
50 struct UCaseMap;
51 typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
52 
53 /**
54  * Open a UCaseMap service object for a locale and a set of options.
55  * The locale ID and options are preprocessed so that functions using the
56  * service object need not process them in each call.
57  *
58  * @param locale ICU locale ID, used for language-dependent
59  *               upper-/lower-/title-casing according to the Unicode standard.
60  *               Usual semantics: ""=root, NULL=default locale, etc.
61  * @param options Options bit set, used for case folding and string comparisons.
62  *                Same flags as for u_foldCase(), u_strFoldCase(),
63  *                u_strCaseCompare(), etc.
64  *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
65  * @param pErrorCode Must be a valid pointer to an error code value,
66  *                   which must not indicate a failure before the function call.
67  * @return Pointer to a UCaseMap service object, if successful.
68  *
69  * @see U_FOLD_CASE_DEFAULT
70  * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
71  * @see U_TITLECASE_NO_LOWERCASE
72  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
73  * @stable ICU 3.4
74  */
75 U_CAPI UCaseMap * U_EXPORT2
76 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
77 
78 /**
79  * Close a UCaseMap service object.
80  * @param csm Object to be closed.
81  * @stable ICU 3.4
82  */
83 U_CAPI void U_EXPORT2
84 ucasemap_close(UCaseMap *csm);
85 
86 #if U_SHOW_CPLUSPLUS_API
87 
88 U_NAMESPACE_BEGIN
89 
90 /**
91  * \class LocalUCaseMapPointer
92  * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
93  * For most methods see the LocalPointerBase base class.
94  *
95  * @see LocalPointerBase
96  * @see LocalPointer
97  * @stable ICU 4.4
98  */
99 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
100 
101 U_NAMESPACE_END
102 
103 #endif
104 
105 /**
106  * Get the locale ID that is used for language-dependent case mappings.
107  * @param csm UCaseMap service object.
108  * @return locale ID
109  * @stable ICU 3.4
110  */
111 U_CAPI const char * U_EXPORT2
112 ucasemap_getLocale(const UCaseMap *csm);
113 
114 /**
115  * Get the options bit set that is used for case folding and string comparisons.
116  * @param csm UCaseMap service object.
117  * @return options bit set
118  * @stable ICU 3.4
119  */
120 U_CAPI uint32_t U_EXPORT2
121 ucasemap_getOptions(const UCaseMap *csm);
122 
123 /**
124  * Set the locale ID that is used for language-dependent case mappings.
125  *
126  * @param csm UCaseMap service object.
127  * @param locale Locale ID, see ucasemap_open().
128  * @param pErrorCode Must be a valid pointer to an error code value,
129  *                   which must not indicate a failure before the function call.
130  *
131  * @see ucasemap_open
132  * @stable ICU 3.4
133  */
134 U_CAPI void U_EXPORT2
135 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
136 
137 /**
138  * Set the options bit set that is used for case folding and string comparisons.
139  *
140  * @param csm UCaseMap service object.
141  * @param options Options bit set, see ucasemap_open().
142  * @param pErrorCode Must be a valid pointer to an error code value,
143  *                   which must not indicate a failure before the function call.
144  *
145  * @see ucasemap_open
146  * @stable ICU 3.4
147  */
148 U_CAPI void U_EXPORT2
149 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
150 
151 #if !UCONFIG_NO_BREAK_ITERATION
152 
153 /**
154  * Get the break iterator that is used for titlecasing.
155  * Do not modify the returned break iterator.
156  * @param csm UCaseMap service object.
157  * @return titlecasing break iterator
158  * @stable ICU 3.8
159  */
160 U_CAPI const UBreakIterator * U_EXPORT2
161 ucasemap_getBreakIterator(const UCaseMap *csm);
162 
163 /**
164  * Set the break iterator that is used for titlecasing.
165  * The UCaseMap service object releases a previously set break iterator
166  * and "adopts" this new one, taking ownership of it.
167  * It will be released in a subsequent call to ucasemap_setBreakIterator()
168  * or ucasemap_close().
169  *
170  * Break iterator operations are not thread-safe. Therefore, titlecasing
171  * functions use non-const UCaseMap objects. It is not possible to titlecase
172  * strings concurrently using the same UCaseMap.
173  *
174  * @param csm UCaseMap service object.
175  * @param iterToAdopt Break iterator to be adopted for titlecasing.
176  * @param pErrorCode Must be a valid pointer to an error code value,
177  *                   which must not indicate a failure before the function call.
178  *
179  * @see ucasemap_toTitle
180  * @see ucasemap_utf8ToTitle
181  * @stable ICU 3.8
182  */
183 U_CAPI void U_EXPORT2
184 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
185 
186 /**
187  * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
188  * except that it takes ucasemap_setOptions() into account and has performance
189  * advantages from being able to use a UCaseMap object for multiple case mapping
190  * operations, saving setup time.
191  *
192  * Casing is locale-dependent and context-sensitive.
193  * Titlecasing uses a break iterator to find the first characters of words
194  * that are to be titlecased. It titlecases those characters and lowercases
195  * all others. (This can be modified with ucasemap_setOptions().)
196  *
197  * Note: This function takes a non-const UCaseMap pointer because it will
198  * open a default break iterator if no break iterator was set yet,
199  * and effectively call ucasemap_setBreakIterator();
200  * also because the break iterator is stateful and will be modified during
201  * the iteration.
202  *
203  * The titlecase break iterator can be provided to customize for arbitrary
204  * styles, using rules and dictionaries beyond the standard iterators.
205  * The standard titlecase iterator for the root locale implements the
206  * algorithm of Unicode TR 21.
207  *
208  * This function uses only the setText(), first() and next() methods of the
209  * provided break iterator.
210  *
211  * The result may be longer or shorter than the original.
212  * The source string and the destination buffer must not overlap.
213  *
214  * @param csm       UCaseMap service object. This pointer is non-const!
215  *                  See the note above for details.
216  * @param dest      A buffer for the result string. The result will be NUL-terminated if
217  *                  the buffer is large enough.
218  *                  The contents is undefined in case of failure.
219  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
220  *                  dest may be NULL and the function will only return the length of the result
221  *                  without writing any of the result string.
222  * @param src       The original string.
223  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
224  * @param pErrorCode Must be a valid pointer to an error code value,
225  *                  which must not indicate a failure before the function call.
226  * @return The length of the result string, if successful - or in case of a buffer overflow,
227  *         in which case it will be greater than destCapacity.
228  *
229  * @see u_strToTitle
230  * @stable ICU 3.8
231  */
232 U_CAPI int32_t U_EXPORT2
233 ucasemap_toTitle(UCaseMap *csm,
234                  UChar *dest, int32_t destCapacity,
235                  const UChar *src, int32_t srcLength,
236                  UErrorCode *pErrorCode);
237 
238 #endif  // UCONFIG_NO_BREAK_ITERATION
239 
240 /**
241  * Lowercase the characters in a UTF-8 string.
242  * Casing is locale-dependent and context-sensitive.
243  * The result may be longer or shorter than the original.
244  * The source string and the destination buffer must not overlap.
245  *
246  * @param csm       UCaseMap service object.
247  * @param dest      A buffer for the result string. The result will be NUL-terminated if
248  *                  the buffer is large enough.
249  *                  The contents is undefined in case of failure.
250  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
251  *                  dest may be NULL and the function will only return the length of the result
252  *                  without writing any of the result string.
253  * @param src       The original string.
254  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
255  * @param pErrorCode Must be a valid pointer to an error code value,
256  *                  which must not indicate a failure before the function call.
257  * @return The length of the result string, if successful - or in case of a buffer overflow,
258  *         in which case it will be greater than destCapacity.
259  *
260  * @see u_strToLower
261  * @stable ICU 3.4
262  */
263 U_CAPI int32_t U_EXPORT2
264 ucasemap_utf8ToLower(const UCaseMap *csm,
265                      char *dest, int32_t destCapacity,
266                      const char *src, int32_t srcLength,
267                      UErrorCode *pErrorCode);
268 
269 /**
270  * Uppercase the characters in a UTF-8 string.
271  * Casing is locale-dependent and context-sensitive.
272  * The result may be longer or shorter than the original.
273  * The source string and the destination buffer must not overlap.
274  *
275  * @param csm       UCaseMap service object.
276  * @param dest      A buffer for the result string. The result will be NUL-terminated if
277  *                  the buffer is large enough.
278  *                  The contents is undefined in case of failure.
279  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
280  *                  dest may be NULL and the function will only return the length of the result
281  *                  without writing any of the result string.
282  * @param src       The original string.
283  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
284  * @param pErrorCode Must be a valid pointer to an error code value,
285  *                  which must not indicate a failure before the function call.
286  * @return The length of the result string, if successful - or in case of a buffer overflow,
287  *         in which case it will be greater than destCapacity.
288  *
289  * @see u_strToUpper
290  * @stable ICU 3.4
291  */
292 U_CAPI int32_t U_EXPORT2
293 ucasemap_utf8ToUpper(const UCaseMap *csm,
294                      char *dest, int32_t destCapacity,
295                      const char *src, int32_t srcLength,
296                      UErrorCode *pErrorCode);
297 
298 #if !UCONFIG_NO_BREAK_ITERATION
299 
300 /**
301  * Titlecase a UTF-8 string.
302  * Casing is locale-dependent and context-sensitive.
303  * Titlecasing uses a break iterator to find the first characters of words
304  * that are to be titlecased. It titlecases those characters and lowercases
305  * all others. (This can be modified with ucasemap_setOptions().)
306  *
307  * Note: This function takes a non-const UCaseMap pointer because it will
308  * open a default break iterator if no break iterator was set yet,
309  * and effectively call ucasemap_setBreakIterator();
310  * also because the break iterator is stateful and will be modified during
311  * the iteration.
312  *
313  * The titlecase break iterator can be provided to customize for arbitrary
314  * styles, using rules and dictionaries beyond the standard iterators.
315  * The standard titlecase iterator for the root locale implements the
316  * algorithm of Unicode TR 21.
317  *
318  * This function uses only the setUText(), first(), next() and close() methods of the
319  * provided break iterator.
320  *
321  * The result may be longer or shorter than the original.
322  * The source string and the destination buffer must not overlap.
323  *
324  * @param csm       UCaseMap service object. This pointer is non-const!
325  *                  See the note above for details.
326  * @param dest      A buffer for the result string. The result will be NUL-terminated if
327  *                  the buffer is large enough.
328  *                  The contents is undefined in case of failure.
329  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
330  *                  dest may be NULL and the function will only return the length of the result
331  *                  without writing any of the result string.
332  * @param src       The original string.
333  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
334  * @param pErrorCode Must be a valid pointer to an error code value,
335  *                  which must not indicate a failure before the function call.
336  * @return The length of the result string, if successful - or in case of a buffer overflow,
337  *         in which case it will be greater than destCapacity.
338  *
339  * @see u_strToTitle
340  * @see U_TITLECASE_NO_LOWERCASE
341  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
342  * @stable ICU 3.8
343  */
344 U_CAPI int32_t U_EXPORT2
345 ucasemap_utf8ToTitle(UCaseMap *csm,
346                     char *dest, int32_t destCapacity,
347                     const char *src, int32_t srcLength,
348                     UErrorCode *pErrorCode);
349 
350 #endif
351 
352 /**
353  * Case-folds the characters in a UTF-8 string.
354  *
355  * Case-folding is locale-independent and not context-sensitive,
356  * but there is an option for whether to include or exclude mappings for dotted I
357  * and dotless i that are marked with 'T' in CaseFolding.txt.
358  *
359  * The result may be longer or shorter than the original.
360  * The source string and the destination buffer must not overlap.
361  *
362  * @param csm       UCaseMap service object.
363  * @param dest      A buffer for the result string. The result will be NUL-terminated if
364  *                  the buffer is large enough.
365  *                  The contents is undefined in case of failure.
366  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
367  *                  dest may be NULL and the function will only return the length of the result
368  *                  without writing any of the result string.
369  * @param src       The original string.
370  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
371  * @param pErrorCode Must be a valid pointer to an error code value,
372  *                  which must not indicate a failure before the function call.
373  * @return The length of the result string, if successful - or in case of a buffer overflow,
374  *         in which case it will be greater than destCapacity.
375  *
376  * @see u_strFoldCase
377  * @see ucasemap_setOptions
378  * @see U_FOLD_CASE_DEFAULT
379  * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
380  * @stable ICU 3.8
381  */
382 U_CAPI int32_t U_EXPORT2
383 ucasemap_utf8FoldCase(const UCaseMap *csm,
384                       char *dest, int32_t destCapacity,
385                       const char *src, int32_t srcLength,
386                       UErrorCode *pErrorCode);
387 
388 #endif
389